Skip to content
Snippets Groups Projects
Commit c6c54391 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Add an adaptive penalty path aggregation

parent 11746e43
No related branches found
No related tags found
No related merge requests found
Showing
with 376 additions and 27 deletions
......@@ -5,6 +5,7 @@ project(libstereo)
option(BUILD_MIDDLEBURY "Build Middlebury evaluation" OFF)
option(BUILD_TESTS "Build unit tests" ON)
option(LIBSTEREO_SHARED "Build a shared library" OFF)
option(USE_GPU "Use the GPU instead of CPU" ON)
find_package(OpenCV REQUIRED)
find_package(OpenMP REQUIRED)
......@@ -21,6 +22,9 @@ set(CMAKE_CXX_FLAGS_RELEASE)
if (CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CUDA_FLAGS "--gpu-architecture=compute_61 -std=c++14 -Xcompiler -fPIC -Xcompiler ${OpenMP_CXX_FLAGS}")
set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
else()
set(CMAKE_CUDA_FLAGS "--gpu-architecture=compute_61 -std=c++14")
set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
......@@ -50,6 +54,7 @@ else()
src/stereo_misgm.cu
src/stereo_misgm2.cu
src/stereo_censussgm.cu
src/stereo_census_adaptive.cu
src/stereo_adcensussgm.cu
src/stereo_adsgm.cu
#src/stereo_sgmp.cpp
......
......@@ -84,6 +84,32 @@ private:
Impl *impl_;
};
class StereoCensusAdaptive {
public:
StereoCensusAdaptive();
~StereoCensusAdaptive();
void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity);
void setPrior(cv::InputArray disp) {};
struct Parameters {
int d_min = 0;
int d_max = 0;
unsigned short P1 = 5;
float uniqueness = std::numeric_limits<unsigned short>::max();
int subpixel = 1; // subpixel interpolation method
int paths = AggregationDirections::HORIZONTAL |
AggregationDirections::VERTICAL |
AggregationDirections::DIAGONAL;
bool debug = false;
};
Parameters params;
private:
struct Impl;
Impl *impl_;
};
class StereoMiSgm {
public:
StereoMiSgm();
......
#ifndef _FTL_LIBSTEREO_TYPES_HPP_
#define _FTL_LIBSTEREO_TYPES_HPP_
enum AggregationDirections {
enum AggregationDirections : int {
LEFTRIGHT = 1,
RIGHTLEFT = 2,
HORIZONTAL = 1+2,
......
......@@ -6,6 +6,8 @@
#include "middlebury.hpp"
#include "stereo.hpp"
#include "../../components/common/cpp/include/ftl/config.h"
/**
* @param disp disparity map
* @param out output parameter
......@@ -21,8 +23,11 @@ void colorize(const cv::Mat &disp, cv::Mat &out, int ndisp=-1) {
disp.convertTo(dispf, CV_32FC1);
dispf.convertTo(dispc, CV_8UC1, (1.0f / (ndisp > 0 ? (float) ndisp : dmax)) * 255.0f);
#if OPENCV_VERSION >= 40102
cv::applyColorMap(dispc, out, cv::COLORMAP_TURBO);
//cv::applyColorMap(dispc, out, cv::COLORMAP_INFERNO);
#else
cv::applyColorMap(dispc, out, cv::COLORMAP_INFERNO);
#endif
}
////////////////////////////////////////////////////////////////////////////////
......@@ -62,18 +67,18 @@ int main(int argc, char* argv[]) {
int ndisp = calib.vmax - calib.vmin;
auto stereo = StereoCensusSgm();
stereo.params.P1 = 4;
stereo.params.P2 = 25;
auto stereo = StereoCensusAdaptive();
stereo.params.P1 = 8;
//stereo.params.P2 = 25;
stereo.params.d_min = calib.vmin;
stereo.params.d_max = calib.vmax;
stereo.params.subpixel = 1;
stereo.params.debug = true;
//stereo.params.paths = AggregationDirections::ALL;
stereo.params.uniqueness = 40;
stereo.params.uniqueness = 80;
int i_max = 5;
int i_max = 1;
float t = 4.0f;
if (imL.empty() || imR.empty() || gtL.empty()) {
......
#ifndef _FTL_LIBSTEREO_AGGREGATIONS_ADAPTIVE_HPP_
#define _FTL_LIBSTEREO_AGGREGATIONS_ADAPTIVE_HPP_
#include "../dsi.hpp"
#include "../array2d.hpp"
namespace ftl {
namespace stereo {
namespace aggregations {
template <typename DSIIN>
struct AdaptivePenaltySGM {
typedef typename DSIIN::Type Type;
typedef typename DSIIN::Type costtype_t;
// Provided externally
const DSIIN in;
typename Array2D<costtype_t>::Data min_cost_all;
const int P1;
// Provided internally
typename DisparitySpaceImage<costtype_t>::DataType out;
typename DisparitySpaceImage<costtype_t>::DataType previous;
typename DisparitySpaceImage<costtype_t>::DataType updates;
typename Array2D<uint8_t>::Data penalties;
struct PathData : BasePathData<costtype_t> {
// Custom path data goes here...
costtype_t previous_cost_min;
costtype_t *previous;
costtype_t *current;
};
struct DirectionData {
DisparitySpaceImage<costtype_t> previous;
DisparitySpaceImage<costtype_t> updates;
Array2D<uint8_t> penalties;
};
/* Initialise buffers for a new path direction. */
void direction(DirectionData &data, DisparitySpaceImage<costtype_t> &buffer) {
out = buffer.data();
data.previous.create(out.width+out.height, 1, out.disp_min, out.disp_max);
data.updates.create(out.width+out.height, 1, out.disp_min, out.disp_max);
previous = data.previous.data();
updates = data.updates.data();
data.penalties.create(out.width, out.height); // Note: should already exist
penalties = data.penalties.data();
}
/* Reset buffers to start a new path */
__host__ __device__ inline void startPath(ushort2 pixel, ushort thread, ushort stride, PathData &data) {
data.previous = &previous(0,data.pathix,previous.disp_min);
data.current = &updates(0,data.pathix,updates.disp_min);
for (int d=thread; d<=previous.disp_min; d+=stride) {
data.previous[d] = 0;
}
// To ensure all threads have finished clearing the buffer
#ifdef __CUDA_ARCH__
__syncwarp();
#endif
}
__host__ __device__ inline void endPath(ushort2 pixel, ushort thread, ushort stride, PathData &data) {}
/* Main SGM cost aggregation function */
__host__ __device__ inline costtype_t calculateCost(ushort2 pixel, int d, costtype_t *previous, int size, costtype_t previous_cost_min) {
const costtype_t L_min =
min(previous[d],
min(costtype_t(previous_cost_min + penalties(pixel.y, pixel.x)),
min(costtype_t(previous[min(d+1,size)] + P1),
costtype_t(previous[max(d-1,0)] + P1))
)
);
// Note: This clamping to min 0 does make a tiny difference
auto cost = L_min + in(pixel.y,pixel.x,d+in.disp_min);
return (cost > previous_cost_min) ? cost - previous_cost_min : 0;
}
/* Stride over each disparity and calculate minimum cost */
__host__ __device__ inline void operator()(ushort2 pixel, ushort thread, ushort stride, PathData &data) {
#ifndef __CUDA_ARCH__
using std::min;
using std::max;
#endif
const int d_stop = int(out.disp_max)-int(out.disp_min);
costtype_t min_cost = 255;
// For each disparity (striding the threads)
for (int d=thread; d<=d_stop; d+=stride) {
auto c = calculateCost(pixel, d, data.previous, d_stop, data.previous_cost_min);
out(pixel.y,pixel.x,d+in.disp_min) += c;
data.current[d] = c;
// Record the smallest disparity cost for this pixel
min_cost = (c < min_cost) ? c : min_cost;
}
// WARP Aggregate on GPU only (assumes stride = 1 on CPU)
// Min cost from each thread must be combined for overall minimum
// Each thread then obtains thread global minimum
#ifdef __CUDA_ARCH__
min_cost = warpMin(min_cost);
#else
// add assert
#endif
data.previous_cost_min = min_cost;
min_cost_all(pixel.y,pixel.x) += min_cost; // atomic?
// Swap current and previous cost buffers
costtype_t *tmp_ptr = const_cast<costtype_t *>(data.previous);
data.previous = data.current;
data.current = tmp_ptr;
}
};
}
}
}
#endif
......@@ -48,6 +48,14 @@ public:
if (needs_free_ && data_.data) freeMemory(data_.data);
}
Array2D<T> &operator=(const Array2D<T> &c) {
data_ = c.data_;
width = c.width;
height = c.height;
needs_free_ = false;
return *this;
}
struct Data {
__host__ __device__ inline T& operator() (const int y, const int x) {
return data[x + y*pitch];
......
......@@ -83,22 +83,6 @@ template <typename F>
class PathAggregator {
public:
void queuePath(int id, F &f) {
F f1 = f;
f1.direction(path_data[id], out);
switch (id) {
case 0 : parallel1DWarp<algorithms::Aggregator<F, 1, 0>>({f1}, f.in.height, f.in.disparityRange()); break;
case 1 : parallel1DWarp<algorithms::Aggregator<F,-1, 0>>({f1}, f.in.height, f.in.disparityRange()); break;
case 2 : parallel1DWarp<algorithms::Aggregator<F, 0, 1>>({f1}, f.in.width, f.in.disparityRange()); break;
case 3 : parallel1DWarp<algorithms::Aggregator<F, 0,-1>>({f1}, f.in.width, f.in.disparityRange()); break;
case 4 : parallel1DWarp<algorithms::Aggregator<F, 1, 1>>({f1}, f.in.height+f.in.width, f.in.disparityRange()); break;
case 5 : parallel1DWarp<algorithms::Aggregator<F,-1,-1>>({f1}, f.in.height+f.in.width, f.in.disparityRange()); break;
case 6 : parallel1DWarp<algorithms::Aggregator<F, 1,-1>>({f1}, f.in.height+f.in.width, f.in.disparityRange()); break;
case 7 : parallel1DWarp<algorithms::Aggregator<F,-1, 1>>({f1}, f.in.height+f.in.width, f.in.disparityRange()); break;
}
}
DisparitySpaceImage<typename F::Type> &operator()(F &f, int paths) {
out.create(f.in.width, f.in.height, f.in.disp_min, f.in.disp_max);
out.clear();
......@@ -113,8 +97,38 @@ class PathAggregator {
return out;
}
typename F::DirectionData &getDirectionData(AggregationDirections d) {
switch (d) {
case AggregationDirections::LEFTRIGHT : return path_data[0];
case AggregationDirections::RIGHTLEFT : return path_data[1];
case AggregationDirections::UPDOWN : return path_data[2];
case AggregationDirections::DOWNUP : return path_data[3];
case AggregationDirections::TOPLEFTBOTTOMRIGHT : return path_data[4];
case AggregationDirections::BOTTOMRIGHTTOPLEFT : return path_data[5];
case AggregationDirections::BOTTOMLEFTTOPRIGHT : return path_data[6];
case AggregationDirections::TOPRIGHTBOTTOMLEFT : return path_data[7];
default: throw std::exception();
}
}
private:
DisparitySpaceImage<typename F::Type> out;
typename F::DirectionData path_data[8];
DisparitySpaceImage<typename F::Type> buffers[8];
void queuePath(int id, F &f) {
F f1 = f;
f1.direction(path_data[id], out);
switch (id) {
case 0 : parallel1DWarp<algorithms::Aggregator<F, 1, 0>>({f1}, f.in.height, f.in.disparityRange()); break;
case 1 : parallel1DWarp<algorithms::Aggregator<F,-1, 0>>({f1}, f.in.height, f.in.disparityRange()); break;
case 2 : parallel1DWarp<algorithms::Aggregator<F, 0, 1>>({f1}, f.in.width, f.in.disparityRange()); break;
case 3 : parallel1DWarp<algorithms::Aggregator<F, 0,-1>>({f1}, f.in.width, f.in.disparityRange()); break;
case 4 : parallel1DWarp<algorithms::Aggregator<F, 1, 1>>({f1}, f.in.height+f.in.width, f.in.disparityRange()); break;
case 5 : parallel1DWarp<algorithms::Aggregator<F,-1,-1>>({f1}, f.in.height+f.in.width, f.in.disparityRange()); break;
case 6 : parallel1DWarp<algorithms::Aggregator<F, 1,-1>>({f1}, f.in.height+f.in.width, f.in.disparityRange()); break;
case 7 : parallel1DWarp<algorithms::Aggregator<F,-1, 1>>({f1}, f.in.height+f.in.width, f.in.disparityRange()); break;
}
}
};
......@@ -25,6 +25,8 @@ namespace impl {
return ((r_val > min(l_val_n,l_val) && r_val < max(l_val_n,l_val)) || (r_val > min(l_val_p,l_val) && r_val < max(l_val_p,l_val))) ? 0 : abs(l_val-r_val);
}
static constexpr unsigned short COST_MAX = 255;
Array2D<uchar>::Data l;
Array2D<uchar>::Data r;
};
......
......@@ -28,6 +28,8 @@ namespace impl {
#endif
}
static constexpr unsigned short COST_MAX = 64;
Array2D<uint64_t>::Data ct_l;
Array2D<uint64_t>::Data ct_r;
};
......
......@@ -90,7 +90,7 @@ public:
this->data().weights_r = weights_r.data();
}
static constexpr short COST_MAX = A::COST_MAX + B::COST_MAX;
static constexpr short COST_MAX = A::DataType::COST_MAX + B::DataType::COST_MAX;
protected:
A &cost_a;
......
......@@ -25,6 +25,8 @@ namespace impl {
#endif
}
static constexpr unsigned short COST_MAX = 255;
Array2D<short>::Data l_dx;
Array2D<short>::Data r_dx;
Array2D<short>::Data l_dy;
......
......@@ -23,6 +23,8 @@ namespace impl {
return -(H1+H2-H12);
}
static constexpr unsigned short COST_MAX = 255;
Array2D<unsigned char>::Data l;
Array2D<unsigned char>::Data r;
Array2D<float>::Data h1;
......
......@@ -18,7 +18,7 @@ namespace impl {
uint16_t disp_min;
uint16_t disp_max;
static constexpr T COST_MAX = std::numeric_limits<T>::max();
//static constexpr T COST_MAX = std::numeric_limits<T>::max();
__host__ __device__ inline uint16_t disparityRange() const { return disp_max-disp_min+1; }
__host__ __device__ inline uint32_t size() const { return width * disparityRange() * height; }
......
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/core/cuda/common.hpp>
#include <opencv2/cudaarithm.hpp>
#include <opencv2/highgui.hpp>
#include "stereo.hpp"
#include "util_opencv.hpp"
#include "costs/census.hpp"
#include "dsi.hpp"
#include "wta.hpp"
#include "cost_aggregation.hpp"
#include "aggregations/adaptive_penalty.hpp"
#include "median_filter.hpp"
#ifdef __GNUG__
#include <chrono>
#include <iostream>
static std::chrono::time_point<std::chrono::system_clock> start;
static void timer_set() {
start = std::chrono::high_resolution_clock::now();
}
static void timer_print(const std::string &msg, const bool reset=true) {
auto stop = std::chrono::high_resolution_clock::now();
char buf[24];
snprintf(buf, sizeof(buf), "%5i ms ",
(int) std::chrono::duration_cast<std::chrono::milliseconds>(stop-start).count());
std::cout << buf << msg << "\n" << std::flush;
if (reset) { timer_set(); }
}
#else
static void timer_set() {}
static void timer_print(const std::string &msg, const bool reset=true) {}
#endif
using cv::Mat;
using cv::Size;
using ftl::stereo::aggregations::AdaptivePenaltySGM;
struct StereoCensusAdaptive::Impl {
//DisparitySpaceImage<unsigned short> dsi;
CensusMatchingCost cost;
Array2D<unsigned short> cost_min_paths;
Array2D<unsigned short> uncertainty;
Array2D<float> disparity_r;
Array2D<uchar> l;
Array2D<uchar> r;
Array2D<uchar> penalty;
PathAggregator<AdaptivePenaltySGM<CensusMatchingCost::DataType>> aggr;
WinnerTakesAll<DSImage16U,float> wta;
Impl(int width, int height, int min_disp, int max_disp) :
cost(width, height, min_disp, max_disp),
cost_min_paths(width, height),
uncertainty(width, height),
disparity_r(width, height), l(width, height), r(width, height),
penalty(width, height) {}
};
StereoCensusAdaptive::StereoCensusAdaptive() : impl_(nullptr) {
impl_ = new Impl(0, 0, 0, 0);
}
void StereoCensusAdaptive::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) {
cudaSetDevice(0);
if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) {
delete impl_; impl_ = nullptr;
impl_ = new Impl(l.cols(), l.rows(), params.d_min, params.d_max);
}
mat2gray(l, impl_->l);
mat2gray(r, impl_->r);
timer_set();
// CT
impl_->cost.set(impl_->l, impl_->r);
cudaSafeCall(cudaDeviceSynchronize());
if (params.debug) { timer_print("census transform"); }
impl_->penalty.toGpuMat().setTo(params.P1*4);
auto canny = cv::cuda::createCannyEdgeDetector(30,120);
cv::cuda::GpuMat edges;
canny->detect(impl_->l.toGpuMat(), edges);
impl_->penalty.toGpuMat().setTo(int(float(params.P1)*1.5f), edges);
cv::Mat penalties;
impl_->penalty.toGpuMat().download(penalties);
cv::imshow("Penalties", penalties);
// cost aggregation
AdaptivePenaltySGM<CensusMatchingCost::DataType> func = {impl_->cost.data(), impl_->cost_min_paths.data(), params.P1};
impl_->aggr.getDirectionData(AggregationDirections::LEFTRIGHT).penalties = impl_->penalty;
impl_->aggr.getDirectionData(AggregationDirections::RIGHTLEFT).penalties = impl_->penalty;
impl_->aggr.getDirectionData(AggregationDirections::UPDOWN).penalties = impl_->penalty;
impl_->aggr.getDirectionData(AggregationDirections::DOWNUP).penalties = impl_->penalty;
impl_->aggr.getDirectionData(AggregationDirections::TOPLEFTBOTTOMRIGHT).penalties = impl_->penalty;
impl_->aggr.getDirectionData(AggregationDirections::BOTTOMRIGHTTOPLEFT).penalties = impl_->penalty;
impl_->aggr.getDirectionData(AggregationDirections::BOTTOMLEFTTOPRIGHT).penalties = impl_->penalty;
impl_->aggr.getDirectionData(AggregationDirections::TOPRIGHTBOTTOMLEFT).penalties = impl_->penalty;
auto &out = impl_->aggr(func, params.paths);
cudaSafeCall(cudaDeviceSynchronize());
if (params.debug) { timer_print("Aggregation"); }
impl_->wta(out, 0);
cudaSafeCall(cudaDeviceSynchronize());
if (params.debug) { timer_print("WTA"); }
// Drory, A., Haubold, C., Avidan, S., & Hamprecht, F. A. (2014).
// Semi-global matching: A principled derivation in terms of
// message passing. Lecture Notes in Computer Science (Including Subseries
// Lecture Notes in Artificial Intelligence and Lecture Notes in
// Bioinformatics). https://doi.org/10.1007/978-3-319-11752-2_4
if (disparity.isGpuMat()) {
auto uncertainty = impl_->uncertainty.toGpuMat();
cv::cuda::subtract(impl_->wta.min_cost.toGpuMat(), impl_->cost_min_paths.toGpuMat(), uncertainty);
cv::cuda::compare(uncertainty, params.uniqueness, uncertainty, cv::CMP_GT);
impl_->wta.disparity.toGpuMat().setTo(0, uncertainty);
}
else {
auto uncertainty = impl_->uncertainty.toMat();
cv::subtract(impl_->wta.min_cost.toMat(), impl_->cost_min_paths.toMat(), uncertainty);
cv::compare(uncertainty, params.uniqueness, uncertainty, cv::CMP_GT);
impl_->wta.disparity.toGpuMat().setTo(0, uncertainty);
}
median_filter(impl_->wta.disparity, disparity);
if (params.debug) { timer_print("median filter"); }
}
StereoCensusAdaptive::~StereoCensusAdaptive() {
if (impl_) {
delete impl_;
impl_ = nullptr;
}
}
......@@ -92,13 +92,13 @@ void StereoGradientStree::compute(cv::InputArray l, cv::InputArray r, cv::Output
cudaSafeCall(cudaDeviceSynchronize());
//AggregationParameters aggr_params = {impl_->cost_min_paths.data(), params};
StandardSGM<GradientMatchingCostL2::DataType> func1 = {impl_->cost.data(), impl_->cost_min_paths.data(), params.P1, params.P2};
StandardSGM<GradientMatchingCostL2::DataType> func1 = {impl_->cost.data(), impl_->cost_min_paths.data(), int(params.P1), int(params.P2)};
auto &out1 = impl_->aggr1(func1, AggregationDirections::HORIZONTAL);
cudaSafeCall(cudaDeviceSynchronize());
if (params.debug) { timer_print("Aggregation 1"); }
StandardSGM<DisparitySpaceImage<unsigned short>::DataType> func2 = {out1.data(), impl_->cost_min_paths.data(), params.P1, params.P2};
StandardSGM<DisparitySpaceImage<unsigned short>::DataType> func2 = {out1.data(), impl_->cost_min_paths.data(), int(params.P1), int(params.P2)};
auto &out2 = impl_->aggr2(func2, AggregationDirections::VERTICAL);
cudaSafeCall(cudaDeviceSynchronize());
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment