From 113fa4926483d19dbcade34fe7317d162dca9621 Mon Sep 17 00:00:00 2001 From: Nicolas Pope <nicolas.pope@utu.fi> Date: Fri, 17 Apr 2020 09:13:42 +0300 Subject: [PATCH] Libstereo custom SGM algorithms --- CMakeLists.txt | 8 +- components/operators/CMakeLists.txt | 3 +- .../include/ftl/operators/disparity.hpp | 33 +- components/operators/src/depth.cpp | 2 + .../operators/src/disparity/libstereo.cpp | 68 +++ lib/libstereo/CMakeLists.txt | 72 ++++ lib/libstereo/include/stereo.hpp | 164 ++++++++ lib/libstereo/include/stereo_types.hpp | 21 + lib/libstereo/middlebury/CMakeLists.txt | 8 + lib/libstereo/middlebury/main.cpp | 159 +++++++ lib/libstereo/middlebury/middlebury.cpp | 171 ++++++++ lib/libstereo/middlebury/middlebury.hpp | 34 ++ .../src/aggregations/standard_sgm.hpp | 120 ++++++ lib/libstereo/src/array2d.hpp | 136 ++++++ lib/libstereo/src/consistency.hpp | 36 ++ lib/libstereo/src/cost_aggregation.hpp | 120 ++++++ lib/libstereo/src/costs/absolute_diff.hpp | 50 +++ lib/libstereo/src/costs/census.cu | 48 +++ lib/libstereo/src/costs/census.hpp | 70 ++++ lib/libstereo/src/costs/dual.hpp | 51 +++ lib/libstereo/src/costs/gradient.cu | 28 ++ lib/libstereo/src/costs/gradient.hpp | 65 +++ lib/libstereo/src/costs/mutual_information.cu | 157 +++++++ .../src/costs/mutual_information.hpp | 65 +++ lib/libstereo/src/dsbase.hpp | 64 +++ lib/libstereo/src/dsbase_impl.hpp | 5 + lib/libstereo/src/dsi.hpp | 74 ++++ lib/libstereo/src/dsi_impl.hpp | 118 ++++++ lib/libstereo/src/memory.hpp | 54 +++ lib/libstereo/src/stereo_adcensussgm.cu | 147 +++++++ lib/libstereo/src/stereo_adsgm.cu | 138 ++++++ lib/libstereo/src/stereo_censussgm.cu | 140 +++++++ lib/libstereo/src/stereo_gradientstree.cu | 141 +++++++ lib/libstereo/src/stereo_misgm.cu | 152 +++++++ lib/libstereo/src/stereo_sgmp.cpp | 394 ++++++++++++++++++ lib/libstereo/src/util.hpp | 135 ++++++ lib/libstereo/src/util_opencv.hpp | 153 +++++++ lib/libstereo/src/wta.hpp | 199 +++++++++ lib/libstereo/test/CMakeLists.txt | 65 +++ lib/libstereo/test/aggregation_unit.cpp | 276 ++++++++++++ lib/libstereo/test/array2d_unit.cpp | 11 + lib/libstereo/test/dsi_gpu_unit.cu | 11 + lib/libstereo/test/dsi_unit.cpp | 11 + lib/libstereo/test/matching_cost_unit.cpp | 40 ++ lib/libstereo/test/tests.cpp | 3 + lib/libstereo/test/wta_unit.cpp | 72 ++++ 46 files changed, 4083 insertions(+), 9 deletions(-) create mode 100644 components/operators/src/disparity/libstereo.cpp create mode 100644 lib/libstereo/CMakeLists.txt create mode 100644 lib/libstereo/include/stereo.hpp create mode 100644 lib/libstereo/include/stereo_types.hpp create mode 100644 lib/libstereo/middlebury/CMakeLists.txt create mode 100644 lib/libstereo/middlebury/main.cpp create mode 100644 lib/libstereo/middlebury/middlebury.cpp create mode 100644 lib/libstereo/middlebury/middlebury.hpp create mode 100644 lib/libstereo/src/aggregations/standard_sgm.hpp create mode 100644 lib/libstereo/src/array2d.hpp create mode 100644 lib/libstereo/src/consistency.hpp create mode 100644 lib/libstereo/src/cost_aggregation.hpp create mode 100644 lib/libstereo/src/costs/absolute_diff.hpp create mode 100644 lib/libstereo/src/costs/census.cu create mode 100644 lib/libstereo/src/costs/census.hpp create mode 100644 lib/libstereo/src/costs/dual.hpp create mode 100644 lib/libstereo/src/costs/gradient.cu create mode 100644 lib/libstereo/src/costs/gradient.hpp create mode 100644 lib/libstereo/src/costs/mutual_information.cu create mode 100644 lib/libstereo/src/costs/mutual_information.hpp create mode 100644 lib/libstereo/src/dsbase.hpp create mode 100644 lib/libstereo/src/dsbase_impl.hpp create mode 100644 lib/libstereo/src/dsi.hpp create mode 100644 lib/libstereo/src/dsi_impl.hpp create mode 100644 lib/libstereo/src/memory.hpp create mode 100644 lib/libstereo/src/stereo_adcensussgm.cu create mode 100644 lib/libstereo/src/stereo_adsgm.cu create mode 100644 lib/libstereo/src/stereo_censussgm.cu create mode 100644 lib/libstereo/src/stereo_gradientstree.cu create mode 100644 lib/libstereo/src/stereo_misgm.cu create mode 100644 lib/libstereo/src/stereo_sgmp.cpp create mode 100644 lib/libstereo/src/util.hpp create mode 100644 lib/libstereo/src/util_opencv.hpp create mode 100644 lib/libstereo/src/wta.hpp create mode 100644 lib/libstereo/test/CMakeLists.txt create mode 100644 lib/libstereo/test/aggregation_unit.cpp create mode 100644 lib/libstereo/test/array2d_unit.cpp create mode 100644 lib/libstereo/test/dsi_gpu_unit.cu create mode 100644 lib/libstereo/test/dsi_unit.cpp create mode 100644 lib/libstereo/test/matching_cost_unit.cpp create mode 100644 lib/libstereo/test/tests.cpp create mode 100644 lib/libstereo/test/wta_unit.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index d1a93b367..95a37c796 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -115,6 +115,12 @@ endif() # ============================================================================== +add_subdirectory(lib/libstereo) +include_directories(lib/libstereo/include) +set_property(TARGET libstereo PROPERTY FOLDER "dependencies") + +# + if (WITH_FIXSTARS) set(HAVE_LIBSGM true) add_subdirectory(lib/libsgm) @@ -438,4 +444,4 @@ endif() if (WIN32) # TODO(nick) Should do based upon compiler (VS) set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT ${VS_STARTUP_PROJECT}) set_property(TARGET ftl-vision PROPERTY VS_DEBUGGER_WORKING_DIRECTORY ${VS_DEBUG_WORKING_DIRECTORY}) -endif() \ No newline at end of file +endif() diff --git a/components/operators/CMakeLists.txt b/components/operators/CMakeLists.txt index becb9f60b..dea9b6df6 100644 --- a/components/operators/CMakeLists.txt +++ b/components/operators/CMakeLists.txt @@ -8,6 +8,7 @@ set(OPERSRC src/normals.cpp src/filling.cpp src/filling.cu + src/disparity/libstereo.cpp src/disparity/disp2depth.cu src/disparity/disparity_to_depth.cpp src/disparity/bilateral_filter.cpp @@ -54,6 +55,6 @@ target_include_directories(ftloperators PUBLIC $<INSTALL_INTERFACE:include> PRIVATE src) -target_link_libraries(ftloperators ftlrender ftlrgbd ftlcommon sgm Eigen3::Eigen Threads::Threads ${OpenCV_LIBS}) +target_link_libraries(ftloperators ftlrender ftlrgbd ftlcommon sgm libstereo Eigen3::Eigen Threads::Threads ${OpenCV_LIBS}) #ADD_SUBDIRECTORY(test) diff --git a/components/operators/include/ftl/operators/disparity.hpp b/components/operators/include/ftl/operators/disparity.hpp index eea830cc4..17cdced1d 100644 --- a/components/operators/include/ftl/operators/disparity.hpp +++ b/components/operators/include/ftl/operators/disparity.hpp @@ -17,11 +17,30 @@ namespace ftl { namespace operators { +class StereoDisparity : public ftl::operators::Operator { +public: + explicit StereoDisparity(ftl::Configurable* cfg); + + ~StereoDisparity(); + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) override; + + bool isMemoryHeavy() const override { return true; } + +private: + bool init(); + + struct Impl; + Impl *impl_; + + cv::cuda::GpuMat disp32f_; +}; + #ifdef HAVE_LIBSGM /* * FixstarsSGM https://github.com/fixstars/libSGM * - * Requires modified version https://gitlab.utu.fi/joseha/libsgm + * Requires modified version in lib/libsgm */ class FixstarsSGM : public ftl::operators::Operator { public: @@ -95,17 +114,17 @@ class DisparityToDepth : public ftl::operators::Operator { * disparity to depth steps. */ class DepthChannel : public ftl::operators::Operator { - public: - explicit DepthChannel(ftl::Configurable *cfg); - ~DepthChannel(); + public: + explicit DepthChannel(ftl::Configurable *cfg); + ~DepthChannel(); inline Operator::Type type() const override { return Operator::Type::ManyToMany; } - bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) override; + bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) override; bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) override; - private: - ftl::operators::Graph *pipe_; + private: + ftl::operators::Graph *pipe_; std::vector<cv::cuda::GpuMat> rbuf_; cv::Size depth_size_; diff --git a/components/operators/src/depth.cpp b/components/operators/src/depth.cpp index c3749419b..2c689c2b4 100644 --- a/components/operators/src/depth.cpp +++ b/components/operators/src/depth.cpp @@ -146,6 +146,8 @@ void DepthChannel::_createPipeline(size_t size) { #endif #ifdef HAVE_LIBSGM pipe_->append<ftl::operators::FixstarsSGM>("algorithm"); + #else + pipe_->append<ftl::operators::StereoDisparity>("algorithm"); #endif pipe_->append<ftl::operators::DisparityBilateralFilter>("bilateral_filter"); //pipe_->append<ftl::operators::OpticalFlowTemporalSmoothing>("optflow_filter", Channel::Disparity); diff --git a/components/operators/src/disparity/libstereo.cpp b/components/operators/src/disparity/libstereo.cpp new file mode 100644 index 000000000..22aaac630 --- /dev/null +++ b/components/operators/src/disparity/libstereo.cpp @@ -0,0 +1,68 @@ +#include <loguru.hpp> + +#include "ftl/operators/disparity.hpp" +#include <ftl/operators/cuda/disparity.hpp> + +#include <opencv2/cudaimgproc.hpp> +#include <opencv2/cudaarithm.hpp> + +#include <stereo.hpp> + +using cv::Size; +using cv::cuda::GpuMat; + +using ftl::rgbd::Format; +using ftl::codecs::Channel; +using ftl::rgbd::Frame; +using ftl::rgbd::Source; +using ftl::operators::StereoDisparity; + +struct StereoDisparity::Impl { + StereoCensusSgm sgm; +}; + +StereoDisparity::StereoDisparity(ftl::Configurable* cfg) : + ftl::operators::Operator(cfg), impl_(nullptr) { + + init(); +} + +StereoDisparity::~StereoDisparity() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} + +bool StereoDisparity::init() { + if (impl_) { delete impl_; } + impl_ = new Impl(); + impl_->sgm.params.d_min = 0; + impl_->sgm.params.d_max = 256; + return true; +} + +bool StereoDisparity::apply(Frame &in, Frame &out, cudaStream_t stream) { + if (!in.hasChannel(Channel::Left) || !in.hasChannel(Channel::Right)) { + LOG(ERROR) << "Left or Right channel missing for stereo disparity"; + return false; + } + + impl_->sgm.params.P1 = config()->value("P1", 10); + impl_->sgm.params.P2 = config()->value("P2", 60); + + const auto &l = in.get<GpuMat>(Channel::Left); + const auto &r = in.get<GpuMat>(Channel::Right); + disp32f_.create(l.size(), CV_32FC1); + + bool has_estimate = in.hasChannel(Channel::Disparity); + auto &disp = (!has_estimate) ? out.create<GpuMat>(Channel::Disparity, Format<short>(l.size())) : in.get<GpuMat>(Channel::Disparity); + + auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + + cvstream.waitForCompletion(); + impl_->sgm.compute(l, r, disp32f_); + + disp32f_.convertTo(disp, CV_16SC1, 16.0, 0.0, cvstream); + return true; +} diff --git a/lib/libstereo/CMakeLists.txt b/lib/libstereo/CMakeLists.txt new file mode 100644 index 000000000..8ebb62f64 --- /dev/null +++ b/lib/libstereo/CMakeLists.txt @@ -0,0 +1,72 @@ +cmake_minimum_required(VERSION 3.10 FATAL_ERROR) + +project(libstereo) + +option(BUILD_MIDDLEBURY "Build Middlebury evaluation" OFF) +option(BUILD_TESTS "Build unit tests" ON) +option(LIBSTEREO_SHARED "Build a shared library" OFF) + +find_package(OpenCV REQUIRED) +find_package(OpenMP REQUIRED) +find_package( Threads REQUIRED ) +#find_package(CUDA REQUIRED) + +enable_language(CUDA) + +set(CMAKE_CXX_CPPCHECK "cppcheck") +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_USE_RELATIVE_PATHS ON) +set(CMAKE_CXX_FLAGS_RELEASE) + +if (CMAKE_COMPILER_IS_GNUCXX) + set(CMAKE_CUDA_FLAGS "--gpu-architecture=compute_61 -std=c++14 -Xcompiler -fPIC -Xcompiler ${OpenMP_CXX_FLAGS}") + set(CMAKE_CUDA_FLAGS_RELEASE "-O3") +endif() + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +set(CMAKE_CXX_FLAGS_RELEASE "-O3") +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") + +include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) + +if (LIBSTEREO_SHARED) + add_library(libstereo SHARED + src/stereo_gradientstree.cu + src/stereo_misgm.cu + src/stereo_censussgm.cu + src/stereo_sgmp.cpp + src/costs/census.cu + src/costs/gradient.cu + src/costs/mutual_information.cu + ) +set_target_properties(libstereo PROPERTIES PUBLIC_HEADER include/stereo.hpp) + +else() + add_library(libstereo + src/stereo_gradientstree.cu + src/stereo_misgm.cu + src/stereo_censussgm.cu + src/stereo_adcensussgm.cu + src/stereo_adsgm.cu + #src/stereo_sgmp.cpp + src/costs/census.cu + src/costs/gradient.cu + src/costs/mutual_information.cu + ) +endif() + +if (USE_GPU) + target_compile_definitions(libstereo PUBLIC USE_GPU) +endif() + +target_include_directories(libstereo PRIVATE src/ include/) +target_include_directories(libstereo PUBLIC ${OpenCV_INCLUDE_DIRS}) +target_link_libraries(libstereo Threads::Threads ${OpenCV_LIBS} ${CUDA_LIBRARIES}) + +if (BUILD_MIDDLEBURY) + add_subdirectory(middlebury/) +endif() + +if (BUILD_TESTS) + add_subdirectory(test) +endif() diff --git a/lib/libstereo/include/stereo.hpp b/lib/libstereo/include/stereo.hpp new file mode 100644 index 000000000..bd7b38910 --- /dev/null +++ b/lib/libstereo/include/stereo.hpp @@ -0,0 +1,164 @@ +#pragma once + +#include <opencv2/core/mat.hpp> +#include <stereo_types.hpp> + +class StereoADCensusSgm { +public: + StereoADCensusSgm(); + ~StereoADCensusSgm(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + + struct Parameters { + int d_min = 0; + int d_max = 0; + unsigned short P1 = 5; + unsigned short P2 = 25; + float uniqueness = std::numeric_limits<unsigned short>::max(); + int subpixel = 1; // subpixel interpolation method + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + bool debug = false; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + +class StereoADSgm { +public: + StereoADSgm(); + ~StereoADSgm(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + + struct Parameters { + int d_min = 0; + int d_max = 0; + unsigned short P1 = 5; + unsigned short P2 = 25; + float uniqueness = std::numeric_limits<unsigned short>::max(); + int subpixel = 1; // subpixel interpolation method + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + bool debug = false; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + +class StereoCensusSgm { +public: + StereoCensusSgm(); + ~StereoCensusSgm(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + + struct Parameters { + int d_min = 0; + int d_max = 0; + unsigned short P1 = 5; + unsigned short P2 = 25; + float uniqueness = std::numeric_limits<unsigned short>::max(); + int subpixel = 1; // subpixel interpolation method + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + bool debug = false; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + +class StereoMiSgm { +public: + StereoMiSgm(); + ~StereoMiSgm(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + void setPrior(const cv::Mat &disp); + + struct Parameters { + int d_min = 0; + int d_max = 0; + unsigned short P1 = 2; + unsigned short P2 = 8; + float uniqueness = std::numeric_limits<short>::max(); + int subpixel = 1; // subpixel interpolation method + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + bool debug = false; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + + +/** + * Census + SGM + prior + * +class StereoCensusSgmP { +public: + StereoCensusSgmP(); + ~StereoCensusSgmP(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + void setPrior(const cv::Mat &prior); + + struct Parameters { + int d_min = 0; + int d_max = 0; + int range = 10; + unsigned short P1 = 5; + unsigned short P2 = 25; + float uniqueness = std::numeric_limits<unsigned short>::max(); + int subpixel = 1; // subpixel interpolation method + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +};*/ + +class StereoGradientStree { +public: + StereoGradientStree(); + ~StereoGradientStree(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + + struct Parameters { + int d_min = 0; + int d_max = 0; + float P1 = 5; + float P2 = 25; + float P3 = 64; + float uniqueness = std::numeric_limits<unsigned short>::max(); + int subpixel = 1; + bool debug = false; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; diff --git a/lib/libstereo/include/stereo_types.hpp b/lib/libstereo/include/stereo_types.hpp new file mode 100644 index 000000000..c4fa54de4 --- /dev/null +++ b/lib/libstereo/include/stereo_types.hpp @@ -0,0 +1,21 @@ +#ifndef _FTL_LIBSTEREO_TYPES_HPP_ +#define _FTL_LIBSTEREO_TYPES_HPP_ + +enum AggregationDirections { + LEFTRIGHT = 1, + RIGHTLEFT = 2, + HORIZONTAL = 1+2, + UPDOWN = 4, + DOWNUP = 8, + VERTICAL = 4+8, + TOPLEFTBOTTOMRIGHT = 16, + BOTTOMRIGHTTOPLEFT = 32, + DIAGONAL1 = 16+32, + BOTTOMLEFTTOPRIGHT = 64, + TOPRIGHTBOTTOMLEFT = 128, + DIAGONAL2 = 64+128, + DIAGONAL = DIAGONAL1+DIAGONAL2, + ALL = HORIZONTAL+VERTICAL+DIAGONAL, +}; + +#endif diff --git a/lib/libstereo/middlebury/CMakeLists.txt b/lib/libstereo/middlebury/CMakeLists.txt new file mode 100644 index 000000000..3f5632c35 --- /dev/null +++ b/lib/libstereo/middlebury/CMakeLists.txt @@ -0,0 +1,8 @@ +add_executable (middlebury + main.cpp + middlebury.cpp +) + +target_include_directories(middlebury PRIVATE ../include/) +target_include_directories(middlebury PUBLIC ${OpenCV_INCLUDE_DIRS}) +target_link_libraries(middlebury libstereo pthread dl ${OpenCV_LIBS}) diff --git a/lib/libstereo/middlebury/main.cpp b/lib/libstereo/middlebury/main.cpp new file mode 100644 index 000000000..6821cacef --- /dev/null +++ b/lib/libstereo/middlebury/main.cpp @@ -0,0 +1,159 @@ +#include <opencv2/opencv.hpp> + +#include <vector> +#include <chrono> + +#include "middlebury.hpp" +#include "stereo.hpp" + +/** + * @param disp disparity map + * @param out output parameter + * @param ndisp number of disparities + * + * If ndisp is greater than zero, color mapping is scaled to range [0, ndisp], + * otherwise scaling is automatically set to [0, max(disp)]. + */ +void colorize(const cv::Mat &disp, cv::Mat &out, int ndisp=-1) { + double dmin, dmax; + cv::minMaxLoc(disp, &dmin, &dmax); + cv::Mat dispf, dispc; + disp.convertTo(dispf, CV_32FC1); + dispf.convertTo(dispc, CV_8UC1, (1.0f / (ndisp > 0 ? (float) ndisp : dmax)) * 255.0f); + + //cv::applyColorMap(dispc, out, cv::COLORMAP_TURBO); + cv::applyColorMap(dispc, out, cv::COLORMAP_INFERNO); +} + +//////////////////////////////////////////////////////////////////////////////// + +using cv::Mat; +using std::vector; + +int main(int argc, char* argv[]) { + #if USE_GPU + std::cout << "GPU VERSION\n"; + #else + std::cout << "CPU VERSION\n"; + #endif + + Mat imL; + Mat imR; + Mat gtL; + Mat maskL; + MiddEvalCalib calib; + + if (argc < 2) { + std::cerr << "usage: middlebury [path]\n"; + return 1; + } + + imL = cv::imread(argv[1] + std::string("im0.png")); + imR = cv::imread(argv[1] + std::string("im1.png")); + gtL = read_pfm(argv[1] + std::string("disp0.pfm")); + if (gtL.empty()) { + gtL = read_pfm(argv[1] + std::string("disp0GT.pfm")); + } + //maskL = cv::imread(argv[1] + std::string("mask0nocc.png"), cv::IMREAD_GRAYSCALE); + calib = read_calibration(argv[1] + std::string("calib.txt")); + + maskL.create(imL.size(), CV_8UC1); + maskL.setTo(cv::Scalar(255)); + + int ndisp = calib.vmax - calib.vmin; + + auto stereo = StereoCensusSgm(); + stereo.params.P1 = 7; + stereo.params.P2 = 60; + + stereo.params.d_min = calib.vmin; + stereo.params.d_max = calib.vmax; + stereo.params.subpixel = 1; + stereo.params.debug = true; + //stereo.params.paths = AggregationDirections::ALL; + //stereo.params.uniqueness = 200; + + int i_max = 1; + float t = 4.0f; + + if (imL.empty() || imR.empty() || gtL.empty()) { + std::cerr << "can't load image\n"; + return 1; + } + + if (imL.size() != imR.size()) { + std::cerr << "images must be same size\n"; + return 1; + } + + Mat disp(imL.size(), CV_32FC1, cv::Scalar(0.0f)); + //stereo.setPrior(gtL); + + std::cout << "resolution: " << imL.cols << "x" << imL.rows << ", " << ndisp << " disparities [" << calib.vmin << "," << calib.vmax << "]\n"; + + std::vector<cv::Mat> disp_color; + std::vector<cv::Mat> err_color; + + for (int i = 0; i < i_max; i++) { + auto start = std::chrono::high_resolution_clock::now(); + + cv::cuda::GpuMat gpu_imL(imL); + cv::cuda::GpuMat gpu_imR(imR); + cv::cuda::GpuMat gpu_disp(disp); + stereo.compute(gpu_imL, gpu_imR, gpu_disp); + gpu_disp.download(disp); + + //stereo.compute(imL, imR, disp); + //stereo.setPrior(disp); + auto stop = std::chrono::high_resolution_clock::now(); + std::cout << "disparity complete in " + << std::chrono::duration_cast<std::chrono::milliseconds>(stop-start).count() + << " ms\n"; + + MiddEvalResult res; + + std::cout << "TYPE: " << gtL.type() << std::endl; + + for (const float t : {4.0f, 1.0f, 0.5f, 0.25f}) { + res = evaluate(disp, gtL, maskL, t); + if (i == 0 || i == i_max-1) { + printf("%9.2f %% correct (err < %.2f)\n", 100.0f * res.err_bad, res.threshold); + //printf("%9.2f %% correct (non-occluded, err < %.2f)\n", 100.0f * res.err_bad_nonoccl, res.threshold); + printf("%9.2f px RMSE (all)\n", res.rms_bad); + //printf("%9.2f px RMSE (non-occluded)\n", res.rms_bad_nonoccl); + } + } + + if (i == 0 || i == i_max-1) { + printf("%9.2f %% total\n", 100.0f * res.err_total); + printf("%9.2f %% non-occluded\n", 100.0f * res.err_nonoccl); + } + + Mat &err_color_ = err_color.emplace_back(); + Mat &disp_color_ = disp_color.emplace_back(); + + // visualize error: set missing values at 0 and scale error to [1.0, t+1.0] + // errors > t truncate to t + Mat err(gtL.size(), CV_32FC1); + cv::absdiff(gtL, disp, err); + err.setTo(t, err > t); + err += 1.0f; + err.setTo(0, disp == 0.0); + err.convertTo(err, CV_8UC1, 255.0f/(t+1.0f)); + cv::applyColorMap(err, err_color_, cv::COLORMAP_PLASMA); + + colorize(disp, disp_color_, calib.ndisp); + } + + cv::imshow(std::to_string(0) + ": " + "error (err < " + std::to_string(t) + ")", err_color[0]); + cv::imshow(std::to_string(0) + ": " + "disparity", disp_color[0]); + cv::imshow(std::to_string(i_max-1) + ": " + "error (err < " + std::to_string(t) + ")", err_color[i_max-1]); + cv::imshow(std::to_string(i_max-1) + ": " + "disparity", disp_color[i_max-1]); + + Mat gt_color; + colorize(gtL, gt_color, calib.ndisp); + cv::imshow("gt", gt_color); + cv::waitKey(); + + return 0; +} diff --git a/lib/libstereo/middlebury/middlebury.cpp b/lib/libstereo/middlebury/middlebury.cpp new file mode 100644 index 000000000..a54b4f153 --- /dev/null +++ b/lib/libstereo/middlebury/middlebury.cpp @@ -0,0 +1,171 @@ +#include "middlebury.hpp" + +#include <cmath> +#include <cstdio> +#include <fstream> +#include <string> +#include <iostream> +#include <vector> + +#include <opencv2/core.hpp> + +cv::Mat read_pfm(const std::string &filename) { + cv::Mat im; + FILE * fp; + + fp = fopen(filename.c_str(), "rb"); + char buf[32]; + int fsize; + int width; + int height; + float scale; + + if (fp == NULL) { return im; } + + if (fscanf(fp, "%31s", buf) == 0 || strcmp(buf, "Pf")) { goto cleanup; } + + if (fscanf(fp, "%31s", buf) == 0) { goto cleanup; } + width = atoi(buf); + + if (fscanf(fp, "%31s", buf) == 0) { goto cleanup; } + height = atoi(buf); + + if (fscanf(fp, " %31s", buf) == 0) { goto cleanup; } + scale = atof(buf); + + im.create(height, width, CV_32FC1); + + fseek(fp, 0, SEEK_END); + fseek(fp, ftell(fp)-width*height*sizeof(float), SEEK_SET); + + for (int y = 0; y < height; y++) { + float* im_ptr = im.ptr<float>(height-y-1); + int nread = 0; + do { + nread += fread(im_ptr+nread, sizeof(float), width-nread, fp); + if (ferror(fp)) { goto cleanup; } + } + while (nread != width); + } + + cleanup: + fclose(fp); + return im; +} + +static void parse_camera_parameters(const std::string &v, MiddEvalCalib &calib) { + std::string mat = std::string(v.substr(1, v.size()-2)); + std::string row; + + std::vector<float> values; + + for (int _ = 0; _ < 3; _++) { + size_t pos = mat.find(";"); + row = mat.substr(0, pos); + + std::istringstream sstr(row); + for(std::string val; sstr >> val;) { + values.push_back(atof(val.c_str())); + } + + mat.erase(0, pos+1); + } + + calib.f = values[0]; + calib.cx = values[2]; + calib.cy = values[5]; +} + +MiddEvalCalib read_calibration(const std::string &filename) { + MiddEvalCalib calib; + memset(&calib, 0, sizeof(calib)); + + std::ifstream f(filename); + + for(std::string line; std::getline(f, line);) { + auto m = line.find("="); + if (m == std::string::npos) { continue; } + auto k = line.substr(0, m); + auto v = line.substr(m+1); + + if (k == "baseline") { + calib.baseline = atof(v.c_str()); + } + else if (k == "doffs") { + calib.doffs = atof(v.c_str()); + } + else if (k == "ndisp") { + calib.ndisp = atoi(v.c_str()); + } + else if (k == "vmin") { + calib.vmin = atoi(v.c_str()); + } + else if (k == "vmax") { + calib.vmax = atoi(v.c_str()); + } + else if (k == "width") { + calib.width = atoi(v.c_str()); + } + else if (k == "height") { + calib.height = atoi(v.c_str()); + } + else if (k == "cam0") { + parse_camera_parameters(v, calib); + } + } + + return calib; +} + +MiddEvalResult evaluate(const cv::Mat &disp, const cv::Mat >, const cv::Mat &mask, float threshold) { + MiddEvalResult result {0.0f, 0.0f, 0.0f}; + result.threshold = threshold; + + cv::Mat diff1(disp.size(), CV_32FC1); + cv::Mat diff2; + + cv::absdiff(disp, gt, diff1); + diff1.setTo(0.0f, gt != gt); // NaN + + // ...? where do inf values come from; issue in subpixel interpolation? + // doesn't seem to happen when interpolation is enabled + diff1.setTo(0.0f, gt == INFINITY); + + float n, n_gt, n_bad, err2, err2_bad; + + // errors incl. occluded areas + n = countNonZero(disp); + n_gt = countNonZero(gt); + cv::pow(diff1, 2, diff2); + err2 = cv::sum(diff2)[0]; + + result.err_total = n/n_gt; + result.rms_total = sqrt(err2/n); + + diff2.setTo(0.0f, diff1 > threshold); + n_bad = countNonZero(diff1 <= threshold); + err2_bad = cv::sum(diff2)[0]; + result.err_bad = n_bad/n_gt; + result.rms_bad = sqrt(err2_bad/n); + + // errors ignoring occlusions (mask) + diff1.setTo(0.0f, mask != 255); + cv::pow(diff1, 2, diff2); + + cv::Mat tmp; + disp.copyTo(tmp); + tmp.setTo(0.0f, mask != 255); + n = countNonZero(tmp); + n_gt = countNonZero(mask == 255); + err2 = cv::sum(diff2)[0]; + + result.err_nonoccl = n/n_gt; + result.rms_nonoccl = sqrt(err2/n); + + diff2.setTo(0.0f, diff1 > threshold); + n_bad = countNonZero(diff1 <= threshold) - countNonZero(mask != 255); + err2_bad = cv::sum(diff2)[0]; + result.err_bad_nonoccl = n_bad/n_gt; + result.rms_bad_nonoccl = sqrt(err2_bad/n); + return result; +} diff --git a/lib/libstereo/middlebury/middlebury.hpp b/lib/libstereo/middlebury/middlebury.hpp new file mode 100644 index 000000000..e2c18e02b --- /dev/null +++ b/lib/libstereo/middlebury/middlebury.hpp @@ -0,0 +1,34 @@ +#pragma once + +#include <opencv2/core/mat.hpp> + +struct MiddEvalResult { + float err_total; // all pixels + float rms_total; // + float err_nonoccl; // non-masked pixels + float rms_nonoccl; // + float err_bad; // within threshold disparity from correct value + float rms_bad; // RMS for pixels within threshold disparity from correct value + float err_bad_nonoccl; + float rms_bad_nonoccl; + float threshold; +}; + +struct MiddEvalCalib { + float f; + float cx; + float cy; + float baseline; + float doffs; + int width; + int height; + int ndisp; + int vmin; + int vmax; +}; + +MiddEvalCalib read_calibration(const std::string &filename); + +MiddEvalResult evaluate(const cv::Mat &disp, const cv::Mat >, const cv::Mat &mask, float threshold=1.0f); + +cv::Mat read_pfm(const std::string &filename); diff --git a/lib/libstereo/src/aggregations/standard_sgm.hpp b/lib/libstereo/src/aggregations/standard_sgm.hpp new file mode 100644 index 000000000..e05d06f2e --- /dev/null +++ b/lib/libstereo/src/aggregations/standard_sgm.hpp @@ -0,0 +1,120 @@ +#ifndef _FTL_LIBSTEREO_AGGREGATIONS_STANDARD_HPP_ +#define _FTL_LIBSTEREO_AGGREGATIONS_STANDARD_HPP_ + +#include "../dsi.hpp" + +namespace ftl { +namespace stereo { +namespace aggregations { + +template <typename DSIIN> +struct StandardSGM { + typedef typename DSIIN::Type Type; + typedef typename DSIIN::Type costtype_t; + + // Provided externally + const DSIIN in; + const int P1; + const int P2; + + // Provided internally + typename DisparitySpaceImage<costtype_t>::DataType out; + typename DisparitySpaceImage<costtype_t>::DataType previous; + typename DisparitySpaceImage<costtype_t>::DataType updates; + + struct PathData : BasePathData<costtype_t> { + // Custom path data goes here... + costtype_t previous_cost_min; + costtype_t *previous; + costtype_t *current; + }; + + struct DirectionData { + DisparitySpaceImage<costtype_t> previous; + DisparitySpaceImage<costtype_t> updates; + }; + + /* Initialise buffers for a new path direction. */ + void direction(DirectionData &data, DisparitySpaceImage<costtype_t> &buffer) { + out = buffer.data(); + data.previous.create(out.width+out.height, 1, out.disp_min, out.disp_max); + data.updates.create(out.width+out.height, 1, out.disp_min, out.disp_max); + previous = data.previous.data(); + updates = data.updates.data(); + } + + /* Reset buffers to start a new path */ + __host__ __device__ inline void startPath(ushort2 pixel, ushort thread, ushort stride, PathData &data) { + data.previous = &previous(0,data.pathix,previous.disp_min); + data.current = &updates(0,data.pathix,updates.disp_min); + + for (int d=thread; d<=previous.disp_min; d+=stride) { + data.previous[d] = 0; + } + + // To ensure all threads have finished clearing the buffer + #ifdef __CUDA_ARCH__ + __syncwarp(); + #endif + } + + __host__ __device__ inline void endPath(ushort2 pixel, ushort thread, ushort stride, PathData &data) {} + + /* Main SGM cost aggregation function */ + __host__ __device__ inline costtype_t calculateCost(ushort2 pixel, int d, costtype_t *previous, int size, costtype_t previous_cost_min) { + const costtype_t L_min = + min(previous[d], + min(costtype_t(previous_cost_min + P2), + min(costtype_t(previous[min(d+1,size)] + P1), + costtype_t(previous[max(d-1,0)] + P1)) + ) + ); + + // Note: This clamping to min 0 does make a tiny difference + auto cost = L_min + in(pixel.y,pixel.x,d+in.disp_min); + return (cost > previous_cost_min) ? cost - previous_cost_min : 0; + } + + /* Stride over each disparity and calculate minimum cost */ + __host__ __device__ inline void operator()(ushort2 pixel, ushort thread, ushort stride, PathData &data) { + #ifndef __CUDA_ARCH__ + using std::min; + using std::max; + #endif + + const int d_stop = int(out.disp_max)-int(out.disp_min); + + costtype_t min_cost = 255; + + // For each disparity (striding the threads) + for (int d=thread; d<=d_stop; d+=stride) { + auto c = calculateCost(pixel, d, data.previous, d_stop, data.previous_cost_min); + + out(pixel.y,pixel.x,d+in.disp_min) += c; + data.current[d] = c; + + // Record the smallest disparity cost for this pixel + min_cost = (c < min_cost) ? c : min_cost; + } + + // WARP Aggregate on GPU only (assumes stride = 1 on CPU) + // Min cost from each thread must be combined for overall minimum + // Each thread then obtains thread global minimum + #ifdef __CUDA_ARCH__ + min_cost = warpMin(min_cost); + #endif + + data.previous_cost_min = min_cost; + + // Swap current and previous cost buffers + costtype_t *tmp_ptr = const_cast<costtype_t *>(data.previous); + data.previous = data.current; + data.current = tmp_ptr; + } +}; + +} +} +} + +#endif diff --git a/lib/libstereo/src/array2d.hpp b/lib/libstereo/src/array2d.hpp new file mode 100644 index 000000000..f8385ca2a --- /dev/null +++ b/lib/libstereo/src/array2d.hpp @@ -0,0 +1,136 @@ +#ifndef _FTL_LIBSTEREO_ARRAY2D_HPP_ +#define _FTL_LIBSTEREO_ARRAY2D_HPP_ + +#include "memory.hpp" + +template<typename T> +class Array2D { +public: + Array2D() : width(0), height(0), needs_free_(false) { + data_.data = nullptr; + } + + Array2D(int w, int h) : width(w), height(h), needs_free_(true) { + data_.data = allocateMemory2D<T>(w, h, data_.pitch); + } + + explicit Array2D(cv::Mat &m) : needs_free_(false) { + #ifdef USE_GPU + create(m.cols, m.rows); + cudaSafeCall(cudaMemcpy2D(data_.data, data_.pitch*sizeof(T), m.data, m.step, width*sizeof(T), height, cudaMemcpyHostToDevice)); + #else + needs_free_ = false; + data_.data = (T*)m.data; + data_.pitch = m.step / sizeof(T); + width = m.cols; + height = m.rows; + #endif + } + + explicit Array2D(cv::cuda::GpuMat &m) : needs_free_(false) { + #ifdef USE_GPU + needs_free_ = false; + data_.data = (T*)m.data; + data_.pitch = m.step / sizeof(T); + width = m.cols; + height = m.rows; + #else + create(m.cols, m.rows); + cudaSafeCall(cudaMemcpy2D(data_.data, data_.pitch*sizeof(T), m.data, m.step, width*sizeof(T), height, cudaMemcpyDeviceToHost)); + #endif + } + + ~Array2D() { + free(); + } + + void free() { + if (needs_free_ && data_.data) freeMemory(data_.data); + } + + struct Data { + __host__ __device__ inline T& operator() (const int y, const int x) { + return data[x + y*pitch]; + } + + __host__ __device__ inline const T& operator() (const int y, const int x) const { + return data[x + y*pitch]; + } + + __host__ __device__ inline T *ptr(const int y) { return &data[y*pitch]; } + __host__ __device__ inline const T *ptr(const int y) const { return &data[y*pitch]; } + + uint pitch; + T *data; + }; + + void create(int w, int h) { + if (w == width && h == height) return; + width = w; + height = h; + free(); + needs_free_ = true; + data_.data = allocateMemory2D<T>(w, h, data_.pitch); + } + + inline Data &data() { return data_; } + inline const Data &data() const { return data_; } + + void toMat(cv::Mat &m) { + #ifdef USE_GPU + cv::cuda::GpuMat gm; + toGpuMat(gm); + gm.download(m); + #else + m = cv::Mat(height, width, cv::traits::Type<T>::value, data_.data); + #endif + } + + cv::Mat toMat() { + #ifdef USE_GPU + cv::cuda::GpuMat gm; + toGpuMat(gm); + cv::Mat m; + gm.download(m); + return m; + #else + return cv::Mat(height, width, cv::traits::Type<T>::value, data_.data); + #endif + } + + const cv::Mat toMat() const { + #ifdef USE_GPU + cv::cuda::GpuMat gm(height, width, cv::traits::Type<T>::value, (void*)data_.data, size_t(size_t(data_.pitch)*sizeof(T))); + cv::Mat m; + gm.download(m); + return m; + #else + return cv::Mat(height, width, cv::traits::Type<T>::value, data_.data); + #endif + } + + void toGpuMat(cv::cuda::GpuMat &m) { + #ifdef USE_GPU + m = cv::cuda::GpuMat(height, width, cv::traits::Type<T>::value, (void*)data_.data, size_t(size_t(data_.pitch)*sizeof(T))); + #else + // TODO + #endif + } + + cv::cuda::GpuMat toGpuMat() { + #ifdef USE_GPU + return cv::cuda::GpuMat(height, width, cv::traits::Type<T>::value, (void*)data_.data, size_t(size_t(data_.pitch)*sizeof(T))); + #else + return cv::cuda::GpuMat(height, width, cv::traits::Type<T>::value); + #endif + } + + int width; + int height; + +private: + Data data_; + bool needs_free_; +}; + +#endif diff --git a/lib/libstereo/src/consistency.hpp b/lib/libstereo/src/consistency.hpp new file mode 100644 index 000000000..aa6208908 --- /dev/null +++ b/lib/libstereo/src/consistency.hpp @@ -0,0 +1,36 @@ +#ifndef _FTL_LIBSTEREO_CONSISTENCY_HPP_ +#define _FTL_LIBSTEREO_CONSISTENCY_HPP_ + +#include "util.hpp" +#include "array2d.hpp" + +namespace algorithms { + + /** + * Left/right disparity consistency check. + */ + template <typename T> + struct ConsistencyCheck { + typename Array2D<T>::Data left; + const typename Array2D<T>::Data right; + const float threshold=1.0f; + const float value=0.0f; + + __host__ __device__ void operator()(ushort2 thread, ushort2 stride, ushort2 size) { + for (int y = thread.y; y < size.y; y+=stride.y) { + T* ptr_l = left.ptr(y); + const T* ptr_r = right.ptr(y); + + for (int x = thread.x; x < size.x; x+=stride.x) { + const int d = round(ptr_l[x]); + + if ((x-d) < 0 || abs(ptr_l[x] - ptr_r[x-d]) > threshold) { + ptr_l[x] = value; + } + } + } + } + }; +} + +#endif diff --git a/lib/libstereo/src/cost_aggregation.hpp b/lib/libstereo/src/cost_aggregation.hpp new file mode 100644 index 000000000..d59613a9e --- /dev/null +++ b/lib/libstereo/src/cost_aggregation.hpp @@ -0,0 +1,120 @@ +#pragma once + +#include "dsi.hpp" +#include "util.hpp" + +#include <stereo_types.hpp> + +template <typename T> +struct BasePathData { + short2 direction; + int pathix; +}; + +namespace algorithms { + template <typename FUNCTOR, int DX, int DY> + struct Aggregator { + static_assert(DX != 0 || DY != 0, "DX or DY must be non-zero"); + static_assert(std::is_convertible<typename FUNCTOR::PathData, BasePathData<typename FUNCTOR::Type>>::value, "Path data must derive from BasePathData<T>"); + + FUNCTOR f; + + __cuda__ void operator()(ushort2 thread, ushort2 stride, ushort2 size) { + #ifndef __CUDA_ARCH__ + using std::min; + using std::max; + #endif + + // TODO: Use shared memory? + typename FUNCTOR::PathData data; + data.direction.x = DX; + data.direction.y = DY; + + for (int i = thread.y; i < size.y; i+=stride.y) { + data.pathix = i; + ushort2 pixel; + + if (DY == 0) { + pixel.y = i; + if (DX > 0) { pixel.x = 0; } + else { pixel.x = f.in.width - 1; } + } + else if (DX == 0) { + pixel.x = i; + if (DY > 0) { pixel.y = 0; } + else { pixel.y = f.in.height - 1; } + } + else if (DX == DY) { + if (DX > 0) { + pixel.x = max(0, i - f.in.height); + pixel.y = max(0, f.in.height - i); + } + else { + pixel.x = min(f.in.width - 1, f.in.width + f.in.height - i - 2); + pixel.y = min(f.in.height - 1, i); + } + } + else { + if (DX > 0) { + pixel.x = max(0, i - f.in.height + 1); + pixel.y = min(f.in.height - 1, i); + } + else { + pixel.x = min(i, f.in.width - 1); + pixel.y = max(0, i - f.in.width + 1); + } + } + + f.startPath(pixel, thread.x, stride.x, data); + + while (pixel.x < f.in.width && pixel.y < f.in.height) { + f(pixel, thread.x, stride.x, data); + pixel.x += DX; + pixel.y += DY; + } + + f.endPath(pixel, thread.x, stride.x, data); + } + } + }; +} + +template <typename F> +class PathAggregator { + public: + + void queuePath(int id, F &f) { + F f1 = f; + f1.direction(path_data[id], out); + + switch (id) { + case 0 : parallel1DWarp<algorithms::Aggregator<F, 1, 0>>({f1}, f.in.height, f.in.disparityRange()); break; + case 1 : parallel1DWarp<algorithms::Aggregator<F,-1, 0>>({f1}, f.in.height, f.in.disparityRange()); break; + case 2 : parallel1DWarp<algorithms::Aggregator<F, 0, 1>>({f1}, f.in.width, f.in.disparityRange()); break; + case 3 : parallel1DWarp<algorithms::Aggregator<F, 0,-1>>({f1}, f.in.width, f.in.disparityRange()); break; + case 4 : parallel1DWarp<algorithms::Aggregator<F, 1, 1>>({f1}, f.in.height+f.in.width, f.in.disparityRange()); break; + case 5 : parallel1DWarp<algorithms::Aggregator<F,-1,-1>>({f1}, f.in.height+f.in.width, f.in.disparityRange()); break; + case 6 : parallel1DWarp<algorithms::Aggregator<F, 1,-1>>({f1}, f.in.height+f.in.width, f.in.disparityRange()); break; + case 7 : parallel1DWarp<algorithms::Aggregator<F,-1, 1>>({f1}, f.in.height+f.in.width, f.in.disparityRange()); break; + } + } + + DisparitySpaceImage<typename F::Type> &operator()(F &f, int paths) { + out.create(f.in.width, f.in.height, f.in.disp_min, f.in.disp_max); + out.clear(); + + for (int i=0; i<8; ++i) { + if (paths & (1 << i)) { + queuePath(i, f); + //out.add(buffers[i]); + } + } + + return out; + } + + private: + DisparitySpaceImage<typename F::Type> out; + typename F::DirectionData path_data[8]; + DisparitySpaceImage<typename F::Type> buffers[8]; +}; diff --git a/lib/libstereo/src/costs/absolute_diff.hpp b/lib/libstereo/src/costs/absolute_diff.hpp new file mode 100644 index 000000000..460eb42e5 --- /dev/null +++ b/lib/libstereo/src/costs/absolute_diff.hpp @@ -0,0 +1,50 @@ +#ifndef _FTL_LIBSTEREO_COSTS_ADBT_HPP_ +#define _FTL_LIBSTEREO_COSTS_ADBT_HPP_ + +#include <opencv2/core/core.hpp> +#include "array2d.hpp" +#include "dsbase.hpp" + +#include <cuda_runtime.h> + +namespace impl { + // Basic data class for use on GPU and CPU + struct AbsDiffBT : DSImplBase<unsigned short> { + typedef unsigned short Type; + + AbsDiffBT(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<unsigned short>({w,h,dmin,dmax}) {} + + __host__ __device__ inline unsigned short operator()(const int y, const int x, const int d) const { + if ((x-d) < 0) { return 255; } + int l_val = l(y,x); + int l_val_p = (x > 0) ? l(y,x-1) : l_val; + int l_val_n = (x < width-1) ? l(y,x+1) : l_val; + int r_val = r(y,x-d); + + // Pseudo BT? + return ((r_val > min(l_val_n,l_val) && r_val < max(l_val_n,l_val)) || (r_val > min(l_val_p,l_val) && r_val < max(l_val_p,l_val))) ? 0 : abs(l_val-r_val); + } + + Array2D<uchar>::Data l; + Array2D<uchar>::Data r; + }; +} + +class AbsDiffBT : public DSBase<impl::AbsDiffBT> { +public: + typedef impl::AbsDiffBT DataType; + typedef unsigned short Type; + + AbsDiffBT() : DSBase<DataType>(0, 0, 0, 0) {}; + AbsDiffBT(int width, int height, int disp_min, int disp_max) + : DSBase<DataType>(width, height, disp_min, disp_max) {}; + + void set(const Array2D<uchar>& l, const Array2D<uchar>& r) { + data().l = l.data(); + data().r = r.data(); + } + + static constexpr short COST_MAX = 255; +}; + +#endif diff --git a/lib/libstereo/src/costs/census.cu b/lib/libstereo/src/costs/census.cu new file mode 100644 index 000000000..05d61c3b7 --- /dev/null +++ b/lib/libstereo/src/costs/census.cu @@ -0,0 +1,48 @@ +#include "census.hpp" +#include "../util.hpp" +#include "../util_opencv.hpp" + +#include <opencv2/imgproc.hpp> +#include <opencv2/core.hpp> + +#include <cuda_runtime.h> + +namespace algorithms { + template<int WINX, int WINY> + struct CensusTransform { + static_assert(WINX*WINY <= 64, "Census window is too large"); + + __host__ __device__ inline uint64_t ct_window(const int y, const int x) { + uint64_t ct = 0; + uchar center = im(y, x); + + for (int wy = -WINY/2; wy <= WINY/2; wy++) { + for (int wx = -WINX/2; wx <= WINX/2; wx++) { + const int y_ = y + wy; + const int x_ = x + wx; + ct = ct << 1; + ct |= (center < (im(y_,x_)) ? 1 : 0); + } + } + return ct; + } + + __host__ __device__ void operator()(ushort2 thread, ushort2 stride, ushort2 size) { + //for (int y = WINY/2+1; y < size.y - (WINY/2+1); y++) { + // for (int x = WINX/2+1; x < size.x - (WINX/2+1); x++) { + for (int y = thread.y+WINY/2; y<size.y-WINY/2-1; y+=stride.y) { + for (int x = thread.x+WINX/2; x<size.x-WINX/2-1; x+=stride.x) { + out(y, x) = ct_window(y, x); + } + } + } + + Array2D<uchar>::Data im; + Array2D<uint64_t>::Data out; + }; +} + +void CensusMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r) { + parallel2D<algorithms::CensusTransform<9,7>>({l.data(), ct_l_.data()}, l.width, l.height); + parallel2D<algorithms::CensusTransform<9,7>>({r.data(), ct_r_.data()}, r.width, r.height); +} diff --git a/lib/libstereo/src/costs/census.hpp b/lib/libstereo/src/costs/census.hpp new file mode 100644 index 000000000..bc20efd31 --- /dev/null +++ b/lib/libstereo/src/costs/census.hpp @@ -0,0 +1,70 @@ +#ifndef _FTL_LIBSTEREO_COSTS_CENSUS_HPP_ +#define _FTL_LIBSTEREO_COSTS_CENSUS_HPP_ + +#include <opencv2/core/core.hpp> +#include "array2d.hpp" +#include "dsbase.hpp" + +#include <cuda_runtime.h> + +namespace impl { + struct CensusMatchingCost : DSImplBase<unsigned short> { + typedef unsigned short Type; + + CensusMatchingCost(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<unsigned short>({w,h,dmin,dmax}) {} + + __host__ __device__ inline unsigned short operator()(const int y, const int x, const int d) const { + if ((x-d) < 0) { return 255; } // TODO: use window size + const uint64_t bits = + ct_l(y, x) ^ ct_r(y, x-d); + #if defined(__CUDA_ARCH__) + return __popcll(bits); + #elif defined(_MSC_VER) + return __popcnt64(bits); + #elif defined(__GNUC__) + return __builtin_popcountl(bits); + #else + static_assert(false, "unsupported compiler (popcount intrinsic)"); + #endif + } + + Array2D<uint64_t>::Data ct_l; + Array2D<uint64_t>::Data ct_r; + }; +} + +class CensusMatchingCost : public DSBase<impl::CensusMatchingCost> { +public: + typedef impl::CensusMatchingCost DataType; + typedef unsigned short Type; + + CensusMatchingCost() : DSBase<DataType>(0, 0, 0, 0) {}; + CensusMatchingCost(int width, int height, int disp_min, int disp_max, int wwidth, int wheight) + : DSBase<DataType>(width, height, disp_min, disp_max), + ct_l_(width, height), ct_r_(width,height), + //cost_max(wwidth * wheight), + ww_(wwidth), wh_(wheight) + { + if (wwidth != 9 || wheight != 7) { + // TODO: window size paramters (hardcoded) in matching_cost.cpp + throw std::exception(); + } + if (wwidth*wheight > 64) { + throw std::exception(); // todo: dynamic size + } + + data().ct_l = ct_l_.data(); + data().ct_r = ct_r_.data(); + } + + void set(const Array2D<uchar>& l, const Array2D<uchar>& r); + static constexpr short COST_MAX = 9*7; // TODO: window size paramters (hardcoded) in matching_cost.cpp + +protected: + Array2D<uint64_t> ct_l_; + Array2D<uint64_t> ct_r_; + int ww_; + int wh_; +}; + +#endif diff --git a/lib/libstereo/src/costs/dual.hpp b/lib/libstereo/src/costs/dual.hpp new file mode 100644 index 000000000..6ee5d15d2 --- /dev/null +++ b/lib/libstereo/src/costs/dual.hpp @@ -0,0 +1,51 @@ +#ifndef _FTL_LIBSTEREO_COSTS_DUAL_HPP_ +#define _FTL_LIBSTEREO_COSTS_DUAL_HPP_ + +#include <opencv2/core/core.hpp> +#include "array2d.hpp" +#include "dsbase.hpp" + +#include <cuda_runtime.h> + +namespace impl { + template <typename A, typename B> + struct DualCosts : DSImplBase<typename A::Type> { + static_assert(std::is_same<typename A::Type, typename B::Type>::value, "Cost types must be the same"); + typedef unsigned short Type; + + DualCosts(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<unsigned short>({w,h,dmin,dmax}), + cost_a(w,h,dmin,dmax), cost_b(w,h,dmin,dmax) {} + + __host__ __device__ inline unsigned short operator()(const int y, const int x, const int d) const { + return (cost_a(y, x, d) + cost_b(y, x, d)); + } + + A cost_a; + B cost_b; + }; +} + +template <typename A, typename B> +class DualCosts : public DSBase<impl::DualCosts<typename A::DataType,typename B::DataType>> { +public: + typedef impl::DualCosts<typename A::DataType,typename B::DataType> DataType; + typedef unsigned short Type; + + //DualCosts() : DSBase<DataType>(0, 0, 0, 0) {}; + DualCosts(int width, int height, int disp_min, int disp_max, A &a, B &b) + : DSBase<DataType>(width, height, disp_min, disp_max), + cost_a(a), cost_b(b) {}; + + void set() { + this->data().cost_a = cost_a.data(); + this->data().cost_b = cost_b.data(); + } + + static constexpr short COST_MAX = A::COST_MAX + B::COST_MAX; + +protected: + A &cost_a; + B &cost_b; +}; + +#endif diff --git a/lib/libstereo/src/costs/gradient.cu b/lib/libstereo/src/costs/gradient.cu new file mode 100644 index 000000000..05aef3f95 --- /dev/null +++ b/lib/libstereo/src/costs/gradient.cu @@ -0,0 +1,28 @@ +#include "gradient.hpp" +#include "../util.hpp" +#include "../util_opencv.hpp" + +#include <opencv2/imgproc.hpp> +#include <opencv2/core.hpp> + +#include <cuda_runtime.h> + +void GradientMatchingCostL2::set(const Array2D<uchar>& l, const Array2D<uchar>& r) { + // todo: size check + #if USE_GPU + cv::Mat m; + cv::Scharr(l.toMat(), m, CV_16S, 1, 0); + l_dx_.toGpuMat().upload(m); + cv::Scharr(l.toMat(), m, CV_16S, 0, 1); + l_dy_.toGpuMat().upload(m); + cv::Scharr(r.toMat(), m, CV_16S, 1, 0); + r_dx_.toGpuMat().upload(m); + cv::Scharr(r.toMat(), m, CV_16S, 0, 1); + l_dy_.toGpuMat().upload(m); + #else + cv::Scharr(l.toMat(), l_dx_.toMat(), CV_16S, 1, 0); + cv::Scharr(l.toMat(), l_dy_.toMat(), CV_16S, 0, 1); + cv::Scharr(r.toMat(), r_dx_.toMat(), CV_16S, 1, 0); + cv::Scharr(r.toMat(), r_dy_.toMat(), CV_16S, 0, 1); + #endif +} diff --git a/lib/libstereo/src/costs/gradient.hpp b/lib/libstereo/src/costs/gradient.hpp new file mode 100644 index 000000000..4e6ea23d8 --- /dev/null +++ b/lib/libstereo/src/costs/gradient.hpp @@ -0,0 +1,65 @@ +#ifndef _FTL_LIBSTEREO_COSTS_GRADIENT_HPP_ +#define _FTL_LIBSTEREO_COSTS_GRADIENT_HPP_ + +#include <opencv2/core/core.hpp> +#include "array2d.hpp" +#include "dsbase.hpp" + +#include <cuda_runtime.h> + +namespace impl { + // Basic data class for use on GPU and CPU + struct GradientMatchingCostL2 : DSImplBase<unsigned short> { + typedef unsigned short Type; + + GradientMatchingCostL2(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<unsigned short>({w,h,dmin,dmax}) {} + + __host__ __device__ inline unsigned short operator()(const int y, const int x, const int d) const { + if ((x-d) < 0) { return 255; } + const short a = l_dx(y,x) - r_dx(y,x-d); + const short b = l_dy(y,x) - r_dy(y,x-d); + #ifdef USE_GPU + return sqrtf(a*a + b*b); + #else + return sqrtf(a*a + b*b); + #endif + } + + Array2D<short>::Data l_dx; + Array2D<short>::Data r_dx; + Array2D<short>::Data l_dy; + Array2D<short>::Data r_dy; + }; +} + +class GradientMatchingCostL2 : public DSBase<impl::GradientMatchingCostL2> { +public: + typedef impl::GradientMatchingCostL2 DataType; + typedef unsigned short Type; + + GradientMatchingCostL2() : DSBase<DataType>(0, 0, 0, 0) {}; + GradientMatchingCostL2(int width, int height, int disp_min, int disp_max) + : DSBase<DataType>(width, height, disp_min, disp_max), + l_dx_(width, height), l_dy_(width, height), r_dx_(width, height), + r_dy_(width, height) + { + // Copy the pointers etc to the data object + data().l_dx = l_dx_.data(); + data().l_dy = l_dy_.data(); + data().r_dx = r_dx_.data(); + data().r_dy = r_dy_.data(); + }; + + void set(const Array2D<uchar>& l, const Array2D<uchar>& r); + + // Maximum value calculated from Scharr kernel weights + static constexpr short COST_MAX = 255*3 + 255*10 + 255*3; + +protected: + Array2D<short> l_dx_; + Array2D<short> r_dx_; + Array2D<short> l_dy_; + Array2D<short> r_dy_; +}; + +#endif \ No newline at end of file diff --git a/lib/libstereo/src/costs/mutual_information.cu b/lib/libstereo/src/costs/mutual_information.cu new file mode 100644 index 000000000..af32f0c53 --- /dev/null +++ b/lib/libstereo/src/costs/mutual_information.cu @@ -0,0 +1,157 @@ +#include "mutual_information.hpp" +#include "../util.hpp" +#include "../util_opencv.hpp" + +#include <opencv2/imgproc.hpp> +#include <opencv2/core.hpp> + +#include <cuda_runtime.h> + +/** + * Mutual Information matching cost. + * + * Hirschmüller, H. (2008). Stereo processing by Semiglobal Matching and Mutual + * Information. IEEE Transactions on Pattern Analysis and Machine Intelligence, + * 30(2), 328–341. https://doi.org/10.1109/TPAMI.2007.1166 + * + * Kim, J., Kolmogorov, V., & Zabih, R. (2003). Visual correspondence using + * energy minimization and mutual information. Proceedings of the IEEE + * International Conference on Computer Vision. + * https://doi.org/10.1109/iccv.2003.1238463 + */ + +/** Calculates intensity distribution. Assumes input in 8 bit grayscale. */ +void calculate_intensity_distribution(const cv::Mat &im1, const cv::Mat &im2, cv::Mat &out, const cv::Mat &mask) { + + if (im1.size() != im2.size()) { throw std::exception(); } + if (im1.type() != im2.type()) { throw std::exception(); } + if (im1.type() != CV_8UC1) { throw std::exception(); } + + if (!mask.empty() && im1.size() != mask.size()) { throw std::exception(); } + if (!mask.empty() && mask.type() != CV_32FC1) { throw std::exception(); } + + const int n = std::numeric_limits<uchar>::max() + 1; + out.create(cv::Size(n, n), CV_32FC1); + out.setTo(0.0f); + + for (int y = 0; y < im1.rows; y++) { + for (int x = 0; x < im1.cols; x++) { + if (!mask.empty() && mask.at<float>(y,x) == 0.0f) { continue; } + + const uchar i1 = im1.at<uchar>(y,x); + const uchar i2 = im2.at<uchar>(y,x); + out.at<float>(i1,i2) += 1.0f; + out.at<float>(i2,i1) += 1.0f; + } + } + + out /= float(im1.total()); +} + +/** Sum over columns (data term h1) **/ +void calculate_h1(const cv::Mat &P, cv::Mat &out) { + out.create(cv::Size(P.cols, 1), CV_32FC1); + for (int x = 0; x < P.cols; x++) { + float &sum = out.at<float>(x); + sum = 0.0; + for (int y = 0; y < P.rows; y++) { sum += P.at<float>(y,x); } + } +} + +/** Sum over rows (data term h2) **/ +void calculate_h2(const cv::Mat &P, cv::Mat &out) { + out.create(cv::Size(P.rows, 1), CV_32FC1); + for (int y = 0; y < P.rows; y++) { + float &sum = out.at<float>(y); + sum = 0.0; + for (int x = 0; x < P.cols; x++) { sum += P.at<float>(y,x); } + } +} + +/** Data term h12 */ +void calculate_h12(const cv::Mat &P, cv::Mat &out, int ksize=7, double sigma=1.0) { + if (P.empty() || P.type() != CV_32FC1) { throw std::exception(); } + + // possible numerical issues? should also be less than 1/(width*height) of + // original image. + static float e = 0.00000001; + cv::Mat tmp; + + P.copyTo(out); + out.setTo(e, P == 0.0f); + + cv::GaussianBlur(out, tmp, cv::Size(ksize,ksize), sigma); + cv::log(tmp, tmp); + tmp *= -1.0f; + + cv::GaussianBlur(tmp, out, cv::Size(ksize,ksize), sigma); +} + +/** Warp (remap) image using disparity map. + * + * @param disparity disparity map (CV_32FC1) + * @param im input image + * @param out result, same type as im + * @param mapx map for x + * @param mapy map for y, re-calculates if not provided (mapy.empty() == true) + * @param interpolation interpolation method, see cv::remap (default linear) + * @param sign disparity sign + */ +void remap_disparity(const cv::Mat &disparity, const cv::Mat &im, cv::Mat &out, + cv::Mat &mapx, cv::Mat &mapy, int interpolation=1, int sign=-1) { + + if (mapy.size() != im.size() || mapy.type() != CV_32FC1) { + mapy.create(disparity.size(), CV_32FC1); + for (int y = 0; y < disparity.rows; y++) { + float *ptr = mapy.ptr<float>(y); + for (int x = 0; x < disparity.cols; x++) { ptr[x] = float(y); } + } + } + + mapx.create(disparity.size(), CV_32FC1); + for (int y = 0; y < disparity.rows; y++) { + float *ptr = mapx.ptr<float>(y); + const float *d = disparity.ptr<float>(y); + for (int x = 0; x < disparity.cols; x++) { + if (d[x] == 0.0f) { + ptr[x] = NAN; + } + else { + ptr[x] = float(x) + d[x]*float(sign); + } + } + } + + cv::remap(im, out, mapx, mapy, interpolation, cv::BORDER_CONSTANT); +} + +void MutualInformationMatchingCost::set(const cv::Mat &l, const cv::Mat &r, const cv::Mat &disparity) { + cv::Mat P; + + if (l.type() != r.type()) { throw std::exception(); } + remap_disparity(disparity, r, r_warp_, mapx_, mapy_); + + mat2gray(l, l_); + mat2gray(r, r_); + cv::cvtColor(r_warp_, r_warp_, cv::COLOR_BGR2GRAY); + + calculate_intensity_distribution(l_.toMat(), r_warp_, P, disparity); + + cv::Mat h12; + cv::Mat h1; + cv::Mat h2; + + calculate_h12(P, h12); + calculate_h1(P, h1); + calculate_h2(P, h2); + + #if USE_GPU + h12_.toGpuMat().upload(h12); + h1_.toGpuMat().upload(h1); + h2_.toGpuMat().upload(h2); + #else + h12.copyTo(h12_.toMat()); + h1.copyTo(h1_.toMat()); + h2.copyTo(h2_.toMat()); + #endif +} diff --git a/lib/libstereo/src/costs/mutual_information.hpp b/lib/libstereo/src/costs/mutual_information.hpp new file mode 100644 index 000000000..722692ce5 --- /dev/null +++ b/lib/libstereo/src/costs/mutual_information.hpp @@ -0,0 +1,65 @@ +#ifndef _FTL_LIBSTEREO_COSTS_MI_HPP_ +#define _FTL_LIBSTEREP_COSTS_MI_HPP_ + +#include <opencv2/core/core.hpp> +#include "array2d.hpp" +#include "dsbase.hpp" + +#include <cuda_runtime.h> + +namespace impl { + struct MutualInformationMatchingCost : DSImplBase<unsigned short> { + typedef unsigned short Type; + + MutualInformationMatchingCost(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<unsigned short>({w,h,dmin,dmax}) {} + + __host__ __device__ inline unsigned short operator()(const int y, const int x, const int d) const { + if ((x-d) < 0) { return 24; } + const int I1 = l(y,x); + const int I2 = r(y,x-d); + const float H1 = h1(0,I1); + const float H2 = h2(0,I2); + const float H12 = h12(I1,I2); + return -(H1+H2-H12); + } + + Array2D<unsigned char>::Data l; + Array2D<unsigned char>::Data r; + Array2D<float>::Data h1; + Array2D<float>::Data h2; + Array2D<float>::Data h12; + }; +} + +class MutualInformationMatchingCost : public DSBase<impl::MutualInformationMatchingCost>{ +public: + typedef impl::MutualInformationMatchingCost DataType; + typedef unsigned short Type; + + MutualInformationMatchingCost(int width, int height, int disp_min, int disp_max) : + DSBase<DataType>(width, height, disp_min, disp_max), + l_(width, height), r_(width, height), + h1_(256, 1), h2_(256, 1), h12_(256, 256) + { + data().l = l_.data(); + data().r = r_.data(); + data().h1 = h1_.data(); + data().h2 = h2_.data(); + data().h12 = h12_.data(); + } + + void set(const cv::Mat &left, const cv::Mat &right, const cv::Mat &disparity); + +protected: + Array2D<unsigned char> l_; + Array2D<unsigned char> r_; + Array2D<float> h1_; + Array2D<float> h2_; + Array2D<float> h12_; + + cv::Mat mapx_; + cv::Mat mapy_; + cv::Mat r_warp_; +}; + +#endif \ No newline at end of file diff --git a/lib/libstereo/src/dsbase.hpp b/lib/libstereo/src/dsbase.hpp new file mode 100644 index 000000000..d5f99241d --- /dev/null +++ b/lib/libstereo/src/dsbase.hpp @@ -0,0 +1,64 @@ +#ifndef _FTL_LIBSTEREO_DSBASE_HPP_ +#define _FTL_LIBSTEREO_DSBASE_HPP_ + +#include <cuda_runtime.h> +#include <type_traits> + +namespace impl { + template <typename T> + struct DSImplBase { + typedef T Type; + + static_assert(std::is_arithmetic<T>::value, "Cost type is not numeric"); + + //DSImplBase(int w, int h, int dmin, int dmax) : width(w), height(h), disp_min(dmin), disp_max(dmax) {} + + uint16_t width; + uint16_t height; + uint16_t disp_min; + uint16_t disp_max; + + static constexpr T COST_MAX = std::numeric_limits<T>::max(); + + __host__ __device__ inline uint16_t disparityRange() const { return disp_max-disp_min+1; } + __host__ __device__ inline uint32_t size() const { return width * disparityRange() * height; } + __host__ __device__ inline uint32_t bytes() const { return size() * sizeof(Type); } + }; +} + +/** + * Base class representing a 3D disparity space structure. This may + * implement the 3D data structure as a stored memory object or through + * dynamic calculation using other data. + */ +template<typename T> +class DSBase { + public: + typedef T DataType; + typedef typename T::Type Type; + +#if __cplusplus >= 201703L + static_assert(std::is_invocable_r<Type,T,int,int,int>::value, "Incorrect disparity space operator"); +#endif + static_assert(std::is_convertible<T, impl::DSImplBase<Type>>::value, "Data type is not derived from DSImplBase"); + + DSBase(int w, int h, int dmin, int dmax) : data_(w,h,dmin,dmax) {} + ~DSBase() {}; + + DataType &data() { return data_; }; + const DataType &data() const { return data_; }; + + int width() const { return data_.width; } + int height() const { return data_.height; } + int numDisparities() const { return data_.disparityRange(); } + int maxDisparity() const { return data_.disp_max; } + int minDisparity() const { return data_.disp_min; } + Type maxCost() const { return DataType::COST_MAX; } + + protected: + DataType data_; +}; + +//#include "dsbase_impl.hpp" + +#endif diff --git a/lib/libstereo/src/dsbase_impl.hpp b/lib/libstereo/src/dsbase_impl.hpp new file mode 100644 index 000000000..853d4d700 --- /dev/null +++ b/lib/libstereo/src/dsbase_impl.hpp @@ -0,0 +1,5 @@ +template <typename T> +DSBase::DSBase(int w, int h, int dmin, int dmax) + : data_{w,h,dmin,dmax} +{ +} diff --git a/lib/libstereo/src/dsi.hpp b/lib/libstereo/src/dsi.hpp new file mode 100644 index 000000000..62b2754c9 --- /dev/null +++ b/lib/libstereo/src/dsi.hpp @@ -0,0 +1,74 @@ +#ifndef _FTL_LIBSTEREO_DSI_HPP_ +#define _FTL_LIBSTEREO_DSI_HPP_ + +#include "dsbase.hpp" +#include "memory.hpp" +#include <opencv2/core/mat.hpp> + +//////////////////////////////////////////////////////////////////////////////// + + + +namespace impl { + // Basic data class for use on GPU and CPU + template <typename T> + struct DSI : DSImplBase<T> { + typedef T Type; + + DSI() : data(nullptr) {} + DSI(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<T>({w,h,dmin,dmax}), data(nullptr) {} + + __host__ __device__ inline T& operator() (const int y, const int x, const int d) { + return data[d-this->disp_min + x*this->disparityRange() + y*this->width*this->disparityRange()]; + } + + __host__ __device__ inline const T& operator() (const int y, const int x, const int d) const { + return data[d-this->disp_min + x*this->disparityRange() + y*this->width*this->disparityRange()]; + } + + uint pitch; + T *data; + }; +} + +template<typename T> +class DisparitySpaceImage : public DSBase<impl::DSI<T>> { +public: + typedef impl::DSI<T> DataType; + typedef T Type; + + DisparitySpaceImage(); + DisparitySpaceImage(int width, int height, int disp_min, int disp_max); + ~DisparitySpaceImage(); + + DisparitySpaceImage(const DisparitySpaceImage<T> &)=delete; + + T operator()(int y, int x, int d) const; + + void create(int w, int h, int dmin, int dmax); + + void clear(); + + void set(const T& v); + + template<typename A> + void add(const A &other, double scale=1.0); + + void mul(double a); + + cv::Mat Mat(); + +private: + //DataType data_; + bool needs_free_; +}; + +#include "dsi_impl.hpp" + +// Common types +typedef DisparitySpaceImage<unsigned char> DSImage8U; +typedef DisparitySpaceImage<unsigned short> DSImage16U; +typedef DisparitySpaceImage<unsigned int> DSImage32U; +typedef DisparitySpaceImage<float> DSImageFloat; + +#endif diff --git a/lib/libstereo/src/dsi_impl.hpp b/lib/libstereo/src/dsi_impl.hpp new file mode 100644 index 000000000..5fcd38147 --- /dev/null +++ b/lib/libstereo/src/dsi_impl.hpp @@ -0,0 +1,118 @@ +#include <thrust/device_ptr.h> +#include <thrust/fill.h> + +template <typename T> +DisparitySpaceImage<T>::DisparitySpaceImage() : DSBase<DisparitySpaceImage<T>::DataType>(0,0,0,0) { + this->data_.data = nullptr; + needs_free_ = false; +} + +template <typename T> +DisparitySpaceImage<T>::DisparitySpaceImage(int width, int height, int disp_min, int disp_max) + : DSBase<DisparitySpaceImage<T>::DataType>(width, height, disp_min, disp_max) +{ + this->data_.data = allocateMemory2D<T>(width*(disp_max-disp_min+1), height, this->data_.pitch); + needs_free_ = true; +} + +template <typename T> +DisparitySpaceImage<T>::~DisparitySpaceImage() +{ + if (needs_free_ && this->data_.data) freeMemory(this->data_.data); +} + +template <typename T> +void DisparitySpaceImage<T>::create(int w, int h, int dmin, int dmax) { + if (this->data_.data && this->data_.width == w && this->data_.height == h) return; + if (needs_free_) freeMemory(this->data_.data); + needs_free_ = true; + this->data_.width = w; + this->data_.height = h; + this->data_.disp_min = dmin; + this->data_.disp_max = dmax; + this->data_.data = allocateMemory2D<T>(w*(dmax-dmin+1), h, this->data_.pitch); +} + +template <typename T> +void DisparitySpaceImage<T>::clear() { +#ifdef USE_GPU + //if (this->width() == 1 || this->height() == 1) { + cudaSafeCall(cudaMemset(this->data_.data, 0, this->data_.pitch*sizeof(T)*this->data_.height)); + //} else { + // cudaSafeCall(cudaMemset2D(this->data_.data, this->data_.pitch*sizeof(T), 0, this->data_.width*this->numDisparities()*sizeof(T), this->data_.height)); + //} +#else + memset(this->data_.data, 0, this->data_.pitch*sizeof(T)*this->data_.height); +#endif +} + +template <typename T> +T DisparitySpaceImage<T>::operator()(int y, int x, int d) const { +#ifdef USE_GPU + T t; + cudaMemcpy(&t, &this->data_(y,x,d), sizeof(T), cudaMemcpyDeviceToHost); + return t; +#else + return this->data_(y,x,d); +#endif +} + +template <typename T> +void DisparitySpaceImage<T>::set(const T& v) { +#ifdef USE_GPU + thrust::device_ptr<T> dev_ptr(this->data_.data); + thrust::fill(dev_ptr, dev_ptr + this->data_.size(), v); +#else + std::fill(this->data_.data, this->data_.data + this->data_.size(), v); +#endif +} + +template <typename T> +template<typename A> +void DisparitySpaceImage<T>::add(const A &other, double scale) { +#ifdef USE_GPU + +#else + #pragma omp parallel for + for (int y = 0; y < this->data_.height; y++) + for (int x = 0; x < this->data_.width; x++) + for (int d = this->data_.disp_min; d <= this->data_.disp_max; d++) {{{ + this->data_(y,x,d) += other(y,x,d)*scale; + }}} +#endif +} + +template <typename T> +void DisparitySpaceImage<T>::mul(double a) { + std::transform(this->data_.begin(), this->data_.end(), this->data_.begin(), [&a](T &elem) { return elem *= a; }); +} + +template <typename T> +cv::Mat DisparitySpaceImage<T>::Mat() { + int cvtype; + + if (std::is_same<char, T>::value) { + cvtype = CV_8SC1; + } + else if (std::is_same<short, T>::value) { + cvtype = CV_16SC1; + } + else if (std::is_same<int, T>::value) { + cvtype = CV_32SC1; + } + else if (std::is_same<float, T>::value) { + cvtype = CV_32FC1; + } + else if (std::is_same<double, T>::value) { + cvtype = CV_64FC1; + } + else { + return cv::Mat(); + } + +#ifdef USE_GPU + +#else + return cv::Mat(this->data_.width*this->data_.height*this->data_.disparityRange(), 1, cvtype, this->data_.data); +#endif +} diff --git a/lib/libstereo/src/memory.hpp b/lib/libstereo/src/memory.hpp new file mode 100644 index 000000000..924b4bb83 --- /dev/null +++ b/lib/libstereo/src/memory.hpp @@ -0,0 +1,54 @@ +#ifndef _FTL_LIBSTEREO_MEMORY_HPP_ +#define _FTL_LIBSTEREO_MEMORY_HPP_ + +#include <cuda_runtime.h> +#include <opencv2/core/mat.hpp> +#include <opencv2/core/cuda.hpp> +#include <opencv2/core/cuda/common.hpp> + +#ifdef USE_GPU +using OpenCVMat = cv::cuda::GpuMat; +#else +using OpenCVMat = cv::Mat; +#endif + +template <typename T> +T *allocateMemory(size_t size) { +#ifdef USE_GPU + T *ptr; + cudaSafeCall(cudaMalloc(&ptr, size*sizeof(T))); + return ptr; +#else + return new T[size]; +#endif +} + +template <typename T> +T *allocateMemory2D(size_t width, size_t height, uint &pitch) { + if (width == 1 || height == 1) { + pitch = width; + return allocateMemory<T>((width > height) ? width: height); + } else { + #ifdef USE_GPU + T *ptr; + size_t ptmp; + cudaSafeCall(cudaMallocPitch(&ptr, &ptmp, width*sizeof(T), height)); + pitch = ptmp/sizeof(T); + return ptr; + #else + pitch = width; //*sizeof(T); + return new T[width*height]; + #endif + } +} + +template <typename T> +void freeMemory(T *ptr) { +#ifdef USE_GPU + cudaSafeCall(cudaFree(ptr)); +#else + delete [] ptr; +#endif +} + +#endif diff --git a/lib/libstereo/src/stereo_adcensussgm.cu b/lib/libstereo/src/stereo_adcensussgm.cu new file mode 100644 index 000000000..9cd9838cf --- /dev/null +++ b/lib/libstereo/src/stereo_adcensussgm.cu @@ -0,0 +1,147 @@ +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> +#include <opencv2/core/cuda/common.hpp> + +#include "stereo.hpp" + +#include "util_opencv.hpp" +#include "costs/absolute_diff.hpp" +#include "costs/census.hpp" +#include "costs/dual.hpp" +#include "dsi.hpp" + +#include "wta.hpp" +#include "cost_aggregation.hpp" +#include "aggregations/standard_sgm.hpp" + +#ifdef __GNUG__ + +#include <chrono> +#include <iostream> + +static std::chrono::time_point<std::chrono::system_clock> start; + +static void timer_set() { + start = std::chrono::high_resolution_clock::now(); +} + +static void timer_print(const std::string &msg, const bool reset=true) { + auto stop = std::chrono::high_resolution_clock::now(); + + char buf[24]; + snprintf(buf, sizeof(buf), "%5i ms ", + (int) std::chrono::duration_cast<std::chrono::milliseconds>(stop-start).count()); + + std::cout << buf << msg << "\n" << std::flush; + if (reset) { timer_set(); } +} + +#else + +static void timer_set() {} +static void timer_print(const std::string &msg, const bool reset=true) {} + +#endif + +using cv::Mat; +using cv::Size; +using ftl::stereo::aggregations::StandardSGM; + +static int ct_windows_w = 9; +static int ct_windows_h = 7; + +struct StereoADCensusSgm::Impl { + DisparitySpaceImage<unsigned short> dsi; + AbsDiffBT ad_cost; + CensusMatchingCost census_cost; + DualCosts<AbsDiffBT,CensusMatchingCost> cost; + Array2D<unsigned short> cost_min; + Array2D<unsigned short> cost_min_paths; + Array2D<unsigned short> uncertainty; + Array2D<float> confidence; + Array2D<float> disparity_r; + Array2D<uchar> l; + Array2D<uchar> r; + + PathAggregator<StandardSGM<typename DualCosts<AbsDiffBT,CensusMatchingCost>::DataType>> aggr; + WinnerTakesAll<DSImage16U,float> wta; + + Impl(int width, int height, int min_disp, int max_disp) : + dsi(width, height, min_disp, max_disp), + ad_cost(width, height, min_disp, max_disp), + census_cost(width, height, min_disp, max_disp, 9, 7), + cost(width, height, min_disp, max_disp, ad_cost, census_cost), + cost_min(width, height), + cost_min_paths(width, height), + uncertainty(width, height), + confidence(width, height), + disparity_r(width, height), l(width, height), r(width, height) {} + +}; + +StereoADCensusSgm::StereoADCensusSgm() : impl_(nullptr) { + impl_ = new Impl(0, 0, 0, 0); +} + +void StereoADCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + cudaSetDevice(0); + + if (l.rows() != impl_->dsi.height() || r.cols() != impl_->dsi.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(l.cols(), l.rows(), params.d_min, params.d_max); + } + + impl_->dsi.clear(); + impl_->uncertainty.toMat().setTo(0); + + mat2gray(l, impl_->l); + mat2gray(r, impl_->r); + + timer_set(); + + // CT + impl_->census_cost.set(impl_->l, impl_->r); + impl_->ad_cost.set(impl_->l, impl_->r); + impl_->cost.set(); + + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("census transform"); } + + // cost aggregation + StandardSGM<DualCosts<AbsDiffBT,CensusMatchingCost>::DataType> func = {impl_->cost.data(), params.P1, params.P2}; + auto &out = impl_->aggr(func, params.paths); + + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("Aggregation"); } + + impl_->wta(out, 0); + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("WTA"); } + if (disparity.isGpuMat()) { + impl_->wta.disparity.toGpuMat(disparity.getGpuMatRef()); + } + else { + cv::Mat &disparity_ = disparity.getMatRef(); + impl_->wta.disparity.toMat(disparity_); + cv::medianBlur(disparity_, disparity_, 3); + } + // confidence estimate + + // Drory, A., Haubold, C., Avidan, S., & Hamprecht, F. A. (2014). + // Semi-global matching: A principled derivation in terms of + // message passing. Lecture Notes in Computer Science (Including Subseries + // Lecture Notes in Artificial Intelligence and Lecture Notes in + // Bioinformatics). https://doi.org/10.1007/978-3-319-11752-2_4 + //cv::Mat uncertainty; + //uncertainty = impl_->cost_min.toMat() - impl_->cost_min_paths.toMat(); + // confidence threshold + // TODO: estimate confidence from uncertainty and plot ROC curve. + //disparity.setTo(0.0f, uncertainty > params.uniqueness); +} + +StereoADCensusSgm::~StereoADCensusSgm() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/stereo_adsgm.cu b/lib/libstereo/src/stereo_adsgm.cu new file mode 100644 index 000000000..a79abcf34 --- /dev/null +++ b/lib/libstereo/src/stereo_adsgm.cu @@ -0,0 +1,138 @@ +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> +#include <opencv2/core/cuda/common.hpp> + +#include "stereo.hpp" + +#include "util_opencv.hpp" +#include "costs/absolute_diff.hpp" +#include "dsi.hpp" + +#include "wta.hpp" +#include "cost_aggregation.hpp" +#include "aggregations/standard_sgm.hpp" + +#ifdef __GNUG__ + +#include <chrono> +#include <iostream> + +static std::chrono::time_point<std::chrono::system_clock> start; + +static void timer_set() { + start = std::chrono::high_resolution_clock::now(); +} + +static void timer_print(const std::string &msg, const bool reset=true) { + auto stop = std::chrono::high_resolution_clock::now(); + + char buf[24]; + snprintf(buf, sizeof(buf), "%5i ms ", + (int) std::chrono::duration_cast<std::chrono::milliseconds>(stop-start).count()); + + std::cout << buf << msg << "\n" << std::flush; + if (reset) { timer_set(); } +} + +#else + +static void timer_set() {} +static void timer_print(const std::string &msg, const bool reset=true) {} + +#endif + +using cv::Mat; +using cv::Size; +using ftl::stereo::aggregations::StandardSGM; + +static int ct_windows_w = 9; +static int ct_windows_h = 7; + +struct StereoADSgm::Impl { + DisparitySpaceImage<unsigned short> dsi; + AbsDiffBT cost; + Array2D<unsigned short> cost_min; + Array2D<unsigned short> cost_min_paths; + Array2D<unsigned short> uncertainty; + Array2D<float> confidence; + Array2D<float> disparity_r; + Array2D<uchar> l; + Array2D<uchar> r; + + PathAggregator<StandardSGM<typename AbsDiffBT::DataType>> aggr; + WinnerTakesAll<DSImage16U,float> wta; + + Impl(int width, int height, int min_disp, int max_disp) : + dsi(width, height, min_disp, max_disp), + cost(width, height, min_disp, max_disp), + cost_min(width, height), + cost_min_paths(width, height), + uncertainty(width, height), + confidence(width, height), + disparity_r(width, height), l(width, height), r(width, height) {} + +}; + +StereoADSgm::StereoADSgm() : impl_(nullptr) { + impl_ = new Impl(0, 0, 0, 0); +} + +void StereoADSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + cudaSetDevice(0); + + if (l.rows() != impl_->dsi.height() || r.cols() != impl_->dsi.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(l.cols(), l.rows(), params.d_min, params.d_max); + } + + impl_->dsi.clear(); + impl_->uncertainty.toMat().setTo(0); + + mat2gray(l, impl_->l); + mat2gray(r, impl_->r); + + timer_set(); + + impl_->cost.set(impl_->l, impl_->r); + + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("census transform"); } + + // cost aggregation + StandardSGM<AbsDiffBT::DataType> func = {impl_->cost.data(), params.P1, params.P2}; + auto &out = impl_->aggr(func, params.paths); + + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("Aggregation"); } + + impl_->wta(out, 0); + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("WTA"); } + if (disparity.isGpuMat()) { + impl_->wta.disparity.toGpuMat(disparity.getGpuMatRef()); + } + else { + cv::Mat &disparity_ = disparity.getMatRef(); + impl_->wta.disparity.toMat(disparity_); + cv::medianBlur(disparity_, disparity_, 3); + } + // confidence estimate + + // Drory, A., Haubold, C., Avidan, S., & Hamprecht, F. A. (2014). + // Semi-global matching: A principled derivation in terms of + // message passing. Lecture Notes in Computer Science (Including Subseries + // Lecture Notes in Artificial Intelligence and Lecture Notes in + // Bioinformatics). https://doi.org/10.1007/978-3-319-11752-2_4 + //cv::Mat uncertainty; + //uncertainty = impl_->cost_min.toMat() - impl_->cost_min_paths.toMat(); + // confidence threshold + // TODO: estimate confidence from uncertainty and plot ROC curve. + //disparity.setTo(0.0f, uncertainty > params.uniqueness); +} + +StereoADSgm::~StereoADSgm() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/stereo_censussgm.cu b/lib/libstereo/src/stereo_censussgm.cu new file mode 100644 index 000000000..7cb8f635e --- /dev/null +++ b/lib/libstereo/src/stereo_censussgm.cu @@ -0,0 +1,140 @@ +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> +#include <opencv2/core/cuda/common.hpp> + +#include "stereo.hpp" + +#include "util_opencv.hpp" +#include "costs/census.hpp" +#include "dsi.hpp" + +#include "wta.hpp" +#include "cost_aggregation.hpp" +#include "aggregations/standard_sgm.hpp" + +#ifdef __GNUG__ + +#include <chrono> +#include <iostream> + +static std::chrono::time_point<std::chrono::system_clock> start; + +static void timer_set() { + start = std::chrono::high_resolution_clock::now(); +} + +static void timer_print(const std::string &msg, const bool reset=true) { + auto stop = std::chrono::high_resolution_clock::now(); + + char buf[24]; + snprintf(buf, sizeof(buf), "%5i ms ", + (int) std::chrono::duration_cast<std::chrono::milliseconds>(stop-start).count()); + + std::cout << buf << msg << "\n" << std::flush; + if (reset) { timer_set(); } +} + +#else + +static void timer_set() {} +static void timer_print(const std::string &msg, const bool reset=true) {} + +#endif + +using cv::Mat; +using cv::Size; +using ftl::stereo::aggregations::StandardSGM; + +static int ct_windows_w = 9; +static int ct_windows_h = 7; + +struct StereoCensusSgm::Impl { + //DisparitySpaceImage<unsigned short> dsi; + CensusMatchingCost cost; + Array2D<unsigned short> cost_min; + Array2D<unsigned short> cost_min_paths; + Array2D<unsigned short> uncertainty; + Array2D<float> confidence; + Array2D<float> disparity_r; + Array2D<uchar> l; + Array2D<uchar> r; + + PathAggregator<StandardSGM<CensusMatchingCost::DataType>> aggr; + WinnerTakesAll<DSImage16U,float> wta; + + Impl(int width, int height, int min_disp, int max_disp) : + //dsi(width, height, min_disp, max_disp), + cost(width, height, min_disp, max_disp, ct_windows_w, ct_windows_h), + cost_min(width, height), + cost_min_paths(width, height), + uncertainty(width, height), + confidence(width, height), + disparity_r(width, height), l(width, height), r(width, height) {} + +}; + +StereoCensusSgm::StereoCensusSgm() : impl_(nullptr) { + impl_ = new Impl(0, 0, 0, 0); +} + +void StereoCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + cudaSetDevice(0); + + if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(l.cols(), l.rows(), params.d_min, params.d_max); + } + + //impl_->dsi.clear(); + impl_->uncertainty.toMat().setTo(0); + + mat2gray(l, impl_->l); + mat2gray(r, impl_->r); + timer_set(); + + // CT + impl_->cost.set(impl_->l, impl_->r); + + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("census transform"); } + + // cost aggregation + StandardSGM<CensusMatchingCost::DataType> func = {impl_->cost.data(), params.P1, params.P2}; + auto &out = impl_->aggr(func, params.paths); + + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("Aggregation"); } + + impl_->wta(out, 0); + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("WTA"); } + + if (disparity.isGpuMat()) { + impl_->wta.disparity.toGpuMat(disparity.getGpuMatRef()); + } + else { + cv::Mat &disparity_ = disparity.getMatRef(); + impl_->wta.disparity.toMat(disparity_); + cv::medianBlur(disparity_, disparity_, 3); + } + + // confidence estimate + + // Drory, A., Haubold, C., Avidan, S., & Hamprecht, F. A. (2014). + // Semi-global matching: A principled derivation in terms of + // message passing. Lecture Notes in Computer Science (Including Subseries + // Lecture Notes in Artificial Intelligence and Lecture Notes in + // Bioinformatics). https://doi.org/10.1007/978-3-319-11752-2_4 + //cv::Mat uncertainty; + //uncertainty = impl_->cost_min.toMat() - impl_->cost_min_paths.toMat(); + // confidence threshold + // TODO: estimate confidence from uncertainty and plot ROC curve. + //disparity.setTo(0.0f, uncertainty > params.uniqueness); +} + +StereoCensusSgm::~StereoCensusSgm() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/stereo_gradientstree.cu b/lib/libstereo/src/stereo_gradientstree.cu new file mode 100644 index 000000000..74ec61ed8 --- /dev/null +++ b/lib/libstereo/src/stereo_gradientstree.cu @@ -0,0 +1,141 @@ +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> +#include <opencv2/core/cuda/common.hpp> + +#include "stereo.hpp" + +#include "util_opencv.hpp" +#include "costs/gradient.hpp" +#include "dsi.hpp" + +#include "wta.hpp" +#include "cost_aggregation.hpp" +#include "aggregations/standard_sgm.hpp" + +#ifdef __GNUG__ + +#include <chrono> +#include <iostream> + +static std::chrono::time_point<std::chrono::system_clock> start; + +static void timer_set() { + start = std::chrono::high_resolution_clock::now(); +} + +static void timer_print(const std::string &msg, const bool reset=true) { + auto stop = std::chrono::high_resolution_clock::now(); + + char buf[24]; + snprintf(buf, sizeof(buf), "%5i ms ", + (int) std::chrono::duration_cast<std::chrono::milliseconds>(stop-start).count()); + + std::cout << buf << msg << "\n" << std::flush; + if (reset) { timer_set(); } +} + +#else + +static void timer_set() {} +static void timer_print(const std::string &msg, const bool reset=true) {} + +#endif + +using cv::Mat; +using cv::Size; +using ftl::stereo::aggregations::StandardSGM; + +static int ct_windows_w = 9; +static int ct_windows_h = 7; + +struct StereoGradientStree::Impl { + GradientMatchingCostL2 cost; + Array2D<unsigned short> cost_min; + Array2D<unsigned short> cost_min_paths; + Array2D<unsigned short> uncertainty; + Array2D<float> confidence; + Array2D<float> disparity_r; + Array2D<uchar> l; + Array2D<uchar> r; + + PathAggregator<StandardSGM<GradientMatchingCostL2::DataType>> aggr1; + PathAggregator<StandardSGM<DisparitySpaceImage<unsigned short>::DataType>> aggr2; + WinnerTakesAll<DSImage16U,float> wta; + + Impl(int width, int height, int min_disp, int max_disp) : + cost(width, height, min_disp, max_disp), + cost_min(width, height), + cost_min_paths(width, height), + uncertainty(width, height), + confidence(width, height), + disparity_r(width, height), l(width, height), r(width, height) {} + +}; + +StereoGradientStree::StereoGradientStree() : impl_(nullptr) { + impl_ = new Impl(0, 0, 0, 0); +} + +void StereoGradientStree::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + cudaSetDevice(0); + + if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(l.cols(), l.rows(), params.d_min, params.d_max); + } + + impl_->uncertainty.toMat().setTo(0); + + mat2gray(l, impl_->l); + mat2gray(r, impl_->r); + + timer_set(); + + impl_->cost.set(impl_->l, impl_->r); + cudaSafeCall(cudaDeviceSynchronize()); + + //AggregationParameters aggr_params = {impl_->cost_min_paths.data(), params}; + StandardSGM<GradientMatchingCostL2::DataType> func1 = {impl_->cost.data(), params.P1, params.P2}; + auto &out1 = impl_->aggr1(func1, AggregationDirections::HORIZONTAL); + + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("Aggregation 1"); } + + StandardSGM<DisparitySpaceImage<unsigned short>::DataType> func2 = { out1.data(), params.P1, params.P2}; + auto &out2 = impl_->aggr2(func2, AggregationDirections::VERTICAL); + + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("Aggregation 2"); } + + impl_->wta(out2, 0); + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("WTA"); } + + if (disparity.isGpuMat()) { + impl_->wta.disparity.toGpuMat(disparity.getGpuMatRef()); + } + else { + cv::Mat &disparity_ = disparity.getMatRef(); + impl_->wta.disparity.toMat(disparity_); + cv::medianBlur(disparity_, disparity_, 3); + } + // confidence estimate + + // Drory, A., Haubold, C., Avidan, S., & Hamprecht, F. A. (2014). + // Semi-global matching: A principled derivation in terms of + // message passing. Lecture Notes in Computer Science (Including Subseries + // Lecture Notes in Artificial Intelligence and Lecture Notes in + // Bioinformatics). https://doi.org/10.1007/978-3-319-11752-2_4 + //cv::Mat uncertainty; + //uncertainty = impl_->cost_min.toMat() - impl_->cost_min_paths.toMat(); + // confidence threshold + // TODO: estimate confidence from uncertainty and plot ROC curve. + //disparity.setTo(0.0f, uncertainty > params.uniqueness); +} + +StereoGradientStree::~StereoGradientStree() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/stereo_misgm.cu b/lib/libstereo/src/stereo_misgm.cu new file mode 100644 index 000000000..aebc09a65 --- /dev/null +++ b/lib/libstereo/src/stereo_misgm.cu @@ -0,0 +1,152 @@ +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> +#include <opencv2/core/cuda/common.hpp> + +#include "stereo.hpp" + +#include "util_opencv.hpp" +#include "costs/mutual_information.hpp" +#include "dsi.hpp" + +#include "wta.hpp" +#include "cost_aggregation.hpp" +#include "aggregations/standard_sgm.hpp" + +#ifdef __GNUG__ + +#include <chrono> +#include <iostream> + +static std::chrono::time_point<std::chrono::system_clock> start; + +static void timer_set() { + start = std::chrono::high_resolution_clock::now(); +} + +static void timer_print(const std::string &msg, const bool reset=true) { + auto stop = std::chrono::high_resolution_clock::now(); + + char buf[24]; + snprintf(buf, sizeof(buf), "%5i ms ", + (int) std::chrono::duration_cast<std::chrono::milliseconds>(stop-start).count()); + + std::cout << buf << msg << "\n" << std::flush; + if (reset) { timer_set(); } +} + +#else + +static void timer_set() {} +static void timer_print(const std::string &msg, const bool reset=true) {} + +#endif + +using cv::Mat; +using cv::Size; +using ftl::stereo::aggregations::StandardSGM; + +struct StereoMiSgm::Impl { + MutualInformationMatchingCost cost; + Array2D<unsigned short> cost_min; + Array2D<unsigned short> cost_min_paths; + Array2D<unsigned short> uncertainty; + Array2D<float> confidence; + Array2D<float> disparity_r; + Array2D<uchar> l; + Array2D<uchar> r; + + Mat prior; // used only to calculate MI + + PathAggregator<StandardSGM<MutualInformationMatchingCost::DataType>> aggr; + WinnerTakesAll<DSImage16U,float> wta; + + Impl(int width, int height, int min_disp, int max_disp) : + cost(width, height, min_disp, max_disp), + cost_min(width, height), + cost_min_paths(width, height), + uncertainty(width, height), + confidence(width, height), + disparity_r(width, height), l(width, height), r(width, height) {} + +}; + +StereoMiSgm::StereoMiSgm() : impl_(nullptr) { + impl_ = new Impl(0, 0, 0, 0); +} + +void StereoMiSgm::setPrior(const cv::Mat &prior) { + prior.copyTo(impl_->prior); +} + + +void StereoMiSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + cudaSetDevice(0); + + if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(l.cols(), l.rows(), params.d_min, params.d_max); + } + + //todo prior + if (impl_->prior.empty() || impl_->prior.size() != l.size()) { + impl_->prior.create(l.rows(), l.cols(), CV_32FC1); + cv::randu(impl_->prior, params.d_min, params.d_max); + } + + impl_->uncertainty.toMat().setTo(0); + + timer_set(); + + // CT + if (l.isMat()) { + impl_->cost.set(l.getMat(), r.getMat(), impl_->prior); + } + else if (l.isGpuMat()) { + Mat l_; + Mat r_; + l.getGpuMat().download(l_); + r.getGpuMat().download(r_); + impl_->cost.set(l_, r_, impl_->prior); + } + if (params.debug) { timer_print("mutual information"); } + + cudaSafeCall(cudaDeviceSynchronize()); + // cost aggregation + //AggregationParameters aggr_params = {impl_->cost_min_paths.data(), params}; + StandardSGM<MutualInformationMatchingCost::DataType> func = {impl_->cost.data(), params.P1, params.P2}; + auto &out = impl_->aggr(func, AggregationDirections::ALL); // params.paths + + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("Aggregation"); } + + impl_->wta(out, 0); + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("WTA"); } + if (disparity.isGpuMat()) { + impl_->wta.disparity.toGpuMat(disparity.getGpuMatRef()); + } + else { + cv::Mat &disparity_ = disparity.getMatRef(); + impl_->wta.disparity.toMat(disparity_); + cv::medianBlur(disparity_, disparity_, 3); + } + // confidence estimate + + // Drory, A., Haubold, C., Avidan, S., & Hamprecht, F. A. (2014). + // Semi-global matching: A principled derivation in terms of + // message passing. Lecture Notes in Computer Science (Including Subseries + // Lecture Notes in Artificial Intelligence and Lecture Notes in + // Bioinformatics). https://doi.org/10.1007/978-3-319-11752-2_4 + //cv::Mat uncertainty; + //uncertainty = impl_->cost_min.toMat() - impl_->cost_min_paths.toMat(); + // confidence threshold + // TODO: estimate confidence from uncertainty and plot ROC curve. + //disparity.setTo(0.0f, uncertainty > params.uniqueness); +} + +StereoMiSgm::~StereoMiSgm() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/stereo_sgmp.cpp b/lib/libstereo/src/stereo_sgmp.cpp new file mode 100644 index 000000000..22c6082b1 --- /dev/null +++ b/lib/libstereo/src/stereo_sgmp.cpp @@ -0,0 +1,394 @@ +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> + +#include "stereo.hpp" +#include "matching_cost.hpp" +#include "stereo_common.hpp" +#include "dsi.hpp" + +#include "cost_aggregation.hpp" + +#ifdef __GNUG__ + +#include <chrono> +#include <iostream> +/* +static std::chrono::time_point<std::chrono::system_clock> start; + +static void timer_set() { + start = std::chrono::high_resolution_clock::now(); +} + +static void timer_print(const std::string &msg, const bool reset=true) { + auto stop = std::chrono::high_resolution_clock::now(); + + char buf[24]; + snprintf(buf, sizeof(buf), "%5i ms ", + (int) std::chrono::duration_cast<std::chrono::milliseconds>(stop-start).count()); + + std::cout << buf << msg << "\n" << std::flush; + if (reset) { timer_set(); } +} +*/ +static void timer_set() {} +static void timer_print(const std::string &msg, const bool reset=true) {} + +#else + +static void timer_set() {} +static void timer_print(const std::string &msg, const bool reset=true) {} + +#endif + +using cv::Mat; +using cv::Size; + +static int ct_windows_w = 9; +static int ct_windows_h = 7; + +struct StereoCensusSgmP::Impl { + DisparitySpaceImage<unsigned short> dsi; + CensusMatchingCost cost; + Mat cost_min; + Mat cost_min_paths; + Mat uncertainty; + Mat confidence; + Mat disparity_r; + Mat prior_disparity; + Mat l; + Mat r; + + Mat prior; + Mat search; + + Impl(int width, int height, int min_disp, int max_disp) : + dsi(width, height, min_disp, max_disp, ct_windows_w*ct_windows_h), + cost(width, height, min_disp, max_disp, ct_windows_w, ct_windows_h), + cost_min(height, width, CV_16UC1), + cost_min_paths(height, width, CV_16UC1), + uncertainty(height, width, CV_16UC1), + confidence(height, width, CV_32FC1), + disparity_r(height, width, CV_32FC1), + prior_disparity(height, width, CV_32FC1) + {} + + void cvtColor(const cv::Mat &iml, const cv::Mat &imr) { + switch (iml.channels()) { + case 4: + cv::cvtColor(iml, l, cv::COLOR_BGRA2GRAY); + break; + + case 3: + cv::cvtColor(iml, l, cv::COLOR_BGR2GRAY); + break; + + case 1: + l = iml; + break; + + default: + throw std::exception(); + } + + switch (imr.channels()) { + case 4: + cv::cvtColor(imr, r, cv::COLOR_BGRA2GRAY); + break; + + case 3: + cv::cvtColor(imr, r, cv::COLOR_BGR2GRAY); + break; + + case 1: + r = imr; + break; + + default: + throw std::exception(); + } + } +}; + +static void compute_P2(const cv::Mat &prior, cv::Mat &P2) { + +} + +// prior CV32_FC1 +// out CV8_UC2 +// +// TODO: range could be in depth units to get more meningful bounds +static void compute_search_range(const cv::Mat &prior, cv::Mat &out, int dmin, int dmax, int range) { + out.create(prior.size(), CV_8UC2); + out.setTo(0); + + for (int y = 0; y < out.rows; y++) { + for (int x = 0; x < out.cols; x ++) { + int d = round(prior.at<float>(y,x)); + auto &v = out.at<cv::Vec2b>(y,x); + if ((d != 0) && (d >= dmin) && (d <= dmax)) { + v[0] = std::max(dmin, d-range); + v[1] = std::min(dmax, d+range); + } + else { + v[0] = dmin; + v[1] = dmax; + } + } + } +} + + +/** + * Compute 2D offset image. SGM sweeping direction set with dx and dy. + * + * Scharstein, D., Taniai, T., & Sinha, S. N. (2018). Semi-global stereo + * matching with surface orientation priors. Proceedings - 2017 International + * Conference on 3D Vision, 3DV 2017. https://doi.org/10.1109/3DV.2017.00033 + */ +static void compute_offset_image(const cv::Mat &prior, cv::Mat &out, + const int dx, const int dy) { + + if (prior.empty()) { return; } + + out.create(prior.size(), CV_16SC1); + out.setTo(0); + + int y_start; + int y_stop; + int x_start; + int x_stop; + + if (dy < 0) { + y_start = -dy; + y_stop = prior.rows; + } + else { + y_start = 0; + y_stop = prior.rows - dy; + } + + if (dx < 0) { + x_start = -dx; + x_stop = prior.cols; + } + else { + x_start = 0; + x_stop = prior.cols - dx; + } + + for (int y = y_start; y < y_stop; y++) { + const float *ptr_prior = prior.ptr<float>(y); + const float *ptr_prior_r = prior.ptr<float>(y+dy); + short *ptr_out = out.ptr<short>(y); + + for (int x = x_start; x < x_stop; x++) { + // TODO types (assumes signed ptr_out and floating point ptr_prior) + if (ptr_prior[x] != 0.0 && ptr_prior_r[x+dx] != 0.0) { + ptr_out[x] = round(ptr_prior[x] - ptr_prior_r[x+dx]); + } + } + } +} + +struct AggregationParameters { + Mat &prior; // + Mat &search; // search range for each pixel CV_8UC2 + Mat &min_cost; + const StereoCensusSgmP::Parameters ¶ms; +}; + +inline int get_jump(const int y, const int x, const int d, const int dy, const int dx, const cv::Mat &prior) { + if (prior.empty()) { + return 0; + } + else if (dx == 1 && dy == 0) { + return prior.at<short>(y, x); + } + else if (dx == -1 && dy == 0) { + if (x == prior.cols - 1) { return 0; } + return -prior.at<short>(y, x+1); + } + else if (dx == 0 && dy == 1) { + return prior.at<short>(y, x); + } + else if (dx == 0 && dy == -1) { + if (y == prior.rows - 1) { return 0; } + return -prior.at<short>(y+1, x); + } + else if (dx == 1 && dy == 1) { + return prior.at<short>(y, x); + } + else if (dx == -1 && dy == -1) { + if (y == prior.rows - 1 || x == prior.cols - 1) { return 0; } + return -prior.at<short>(y+1, x+1); + } + else if (dx == 1 && dy == -1) { + return prior.at<short>(y, x); + } + else if (dx == -1 && dy == 1) { + if (y == prior.rows - 1 || x == 0) { return 0; } + return -prior.at<short>(y+1, x-1); + } +} + +template<typename T=unsigned short> +inline void aggregate( + AggregationData<CensusMatchingCost, DisparitySpaceImage<T>, T> &data, + AggregationParameters ¶ms) { + + auto &previous_cost_min = data.previous_cost_min; + auto &previous = data.previous; + auto &updates = data.updates; + auto &out = data.out; + const auto &in = data.in; + const auto &x = data.x; + const auto &y = data.y; + const auto &i = data.i; + const T P1 = params.params.P1; + const T P2 = params.params.P2; + + T cost_min = in.cost_max; + + //int d_start = params.search.at<cv::Vec2b>(y,x)[0]; + //int d_stop = params.search.at<cv::Vec2b>(y,x)[1]; + int d_start = in.disp_min; + int d_stop = in.disp_max; + + for (int d = d_start; d <= d_stop; d++) { + const int j = get_jump(y, x, d, data.dy, data.dx, params.prior); + const int d_j = std::max(std::min(d+j, previous.disp_max), previous.disp_min); + + const T L_min = + std::min<T>(previous(0,i,d_j), + std::min<T>(T(previous_cost_min + P2), + std::min<T>(T(previous(0,i,std::min(d_j+1, previous.disp_max)) + P1), + T(previous(0,i,std::max(d_j-1, previous.disp_min)) + P1)) + ) + ); + + T C = in(y,x,d); + T cost_update = L_min + C - previous_cost_min; + + // stop if close to overflow + if (cost_update > (std::numeric_limits<T>::max() - T(in.cost_max))) { throw std::exception(); } + + updates(0,i,d) = cost_update; + cost_min = cost_update * (cost_update < cost_min) + cost_min * (cost_update >= cost_min); + } + + /*const T update_skipped = params.params.P3; + for (int d = out.disp_min; d < d_start; d++) { + previous(0,i,d) = update_skipped; + out(y,x,d) += update_skipped; + }*/ + for (int d = d_start; d <= d_stop; d++) { + previous(0,i,d) = updates(0,i,d); + out(y,x,d) += updates(0,i,d); + } + /*for (int d = d_stop+1; d <= out.disp_max; d++) { + previous(0,i,d) = update_skipped; + out(y,x,d) += update_skipped; + }*/ + + params.min_cost.at<T>(y, x) = cost_min; + previous_cost_min = cost_min; +} + +StereoCensusSgmP::StereoCensusSgmP() : impl_(nullptr) { + impl_ = new Impl(0, 0, 0, 0); +} + +void StereoCensusSgmP::setPrior(const cv::Mat &prior) { + prior.copyTo(impl_->prior_disparity); +} + +void StereoCensusSgmP::compute(const cv::Mat &l, const cv::Mat &r, cv::Mat disparity) { + impl_->dsi.clear(); + impl_->uncertainty.setTo(0); + + if (l.rows != impl_->dsi.height || r.cols != impl_->dsi.width) { + Mat prior = impl_->prior_disparity; + + delete impl_; impl_ = nullptr; + impl_ = new Impl(l.cols, l.rows, params.d_min, params.d_max); + + if (prior.size() == l.size()) { + impl_->prior_disparity = prior; + } + } + + impl_->cvtColor(l, r); + + timer_set(); + + // CT + impl_->cost.setLeft(impl_->l); + impl_->cost.setRight(impl_->r); + + if (params.debug) { timer_print("census transform"); } + + // cost aggregation + AggregationParameters aggr_params = {impl_->prior, impl_->search, impl_->cost_min_paths, params}; + compute_search_range(impl_->prior_disparity, impl_->search, params.d_min, params.d_max, params.range); + + if (params.paths & AggregationDirections::HORIZONTAL) { + compute_offset_image(impl_->prior_disparity, impl_->prior, 1, 0); + aggregate_horizontal_all<unsigned short>( + aggregate<unsigned short>, impl_->cost, impl_->dsi, aggr_params); + if (params.debug) { timer_print("horizontal aggregation"); } + } + + if (params.paths & AggregationDirections::VERTICAL) { + compute_offset_image(impl_->prior_disparity, impl_->prior, 0, 1); + aggregate_vertical_all<unsigned short>( + aggregate<unsigned short>, impl_->cost, impl_->dsi, aggr_params); + if (params.debug) { timer_print("vertical aggregation"); } + } + + if (params.paths & AggregationDirections::DIAGONAL1) { + compute_offset_image(impl_->prior_disparity, impl_->prior, 1, 1); + aggregate_diagonal_upper_all<unsigned short>( + aggregate<unsigned short>, impl_->cost, impl_->dsi, aggr_params); + if (params.debug) { timer_print("upper diagonal aggregation"); } + } + + if (params.paths & AggregationDirections::DIAGONAL2) { + compute_offset_image(impl_->prior_disparity, impl_->prior, 1, -1); + aggregate_diagonal_lower_all<unsigned short>( + aggregate<unsigned short>, impl_->cost, impl_->dsi, aggr_params); + if (params.debug) { timer_print("lower diagonal aggregation"); } + } + + if (!(params.paths & AggregationDirections::ALL)) { + throw std::exception(); + } + + // wta + consistency + wta(impl_->dsi, disparity, impl_->cost_min, params.subpixel); + wta_diagonal(impl_->disparity_r, impl_->dsi); + consistency_check(disparity, impl_->disparity_r); + + // confidence estimate + + // Drory, A., Haubold, C., Avidan, S., & Hamprecht, F. A. (2014). + // Semi-global matching: A principled derivation in terms of + // message passing. Lecture Notes in Computer Science (Including Subseries + // Lecture Notes in Artificial Intelligence and Lecture Notes in + //Bioinformatics). https://doi.org/10.1007/978-3-319-11752-2_4 + impl_->uncertainty = impl_->cost_min - impl_->cost_min_paths; + + // instead of difference, use ratio + cv::divide(impl_->cost_min, impl_->cost_min_paths, impl_->confidence, CV_32FC1); + + // confidence threshold + disparity.setTo(0.0f, impl_->uncertainty > params.uniqueness); + + cv::medianBlur(disparity, disparity, 3); +} + +StereoCensusSgmP::~StereoCensusSgmP() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/util.hpp b/lib/libstereo/src/util.hpp new file mode 100644 index 000000000..4c50f082b --- /dev/null +++ b/lib/libstereo/src/util.hpp @@ -0,0 +1,135 @@ +#ifndef _FTL_LIBSTEREO_UTIL_HPP_ +#define _FTL_LIBSTEREO_UTIL_HPP_ + +#include <cuda_runtime.h> + +#ifdef USE_GPU +#define __cuda__ __host__ __device__ +#else +#define __cuda__ +#endif + +static constexpr int WARP_SIZE = 32; +static constexpr unsigned int FULL_MASK = 0xFFFFFFFF; + +template <typename T> +__device__ inline T warpMin(T e) { + for (int i = WARP_SIZE/2; i > 0; i /= 2) { + const T other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e = min(e, other); + } + return e; +} + +#ifdef USE_GPU + +template <typename FUNCTOR> +__global__ void kernel2D(FUNCTOR f, ushort2 size) { + const ushort2 thread{ushort(threadIdx.x+blockIdx.x*blockDim.x), ushort(threadIdx.y+blockIdx.y*blockDim.y)}; + const ushort2 stride{ushort(blockDim.x * gridDim.x), ushort(blockDim.y * gridDim.y)}; + f(thread, stride, size); +} + +template <typename FUNCTOR> +__global__ void kernel1D(FUNCTOR f, ushort size) { + const ushort thread = threadIdx.x+blockIdx.x*blockDim.x; + const ushort stride = blockDim.x * gridDim.x; + f(thread, stride, size); +} + +#endif + +/** + * Wrapper to initiate a parallel processing job using a given functor. The + * width and height parameters are used to determine the number of threads. + */ +template <typename FUNCTOR> +void parallel1D(const FUNCTOR &f, int size) { +#if __cplusplus >= 201703L + static_assert(std::is_invocable<FUNCTOR,int,int,int>::value, "Parallel1D requires a valid functor: void()(int,int,int)"); +#endif + +#ifdef USE_GPU + cudaSafeCall(cudaGetLastError()); + kernel1D<<<64, 256, 0, 0>>>(f,size); + cudaSafeCall(cudaGetLastError()); + //cudaSafeCall(cudaDeviceSynchronize()); +#else + static constexpr int MAX_THREADS=32; + int stride = MAX_THREADS; + FUNCTOR f2 = f; // Make a copy + #pragma omp parallel for + for (int i=0; i<MAX_THREADS; ++i) { + f2(i, stride, size); + } +#endif +} + + +/** + * Wrapper to initiate a parallel processing job using a given functor. The + * width and height parameters are used to determine the number of threads. + */ +template <typename FUNCTOR> +void parallel1DWarp(const FUNCTOR &f, int size, int size2) { +#if __cplusplus >= 201703L + //static_assert(std::is_invocable<FUNCTOR,int,int,int>::value, "Parallel1D requires a valid functor: void()(int,int,int)"); + // Is this static_assert correct? + static_assert(std::is_invocable<FUNCTOR,ushort2,ushort2,ushort2>::value, "Parallel1D requires a valid functor: void()(ushort2,ushort2,ushort2)"); +#endif + +#ifdef USE_GPU + cudaSafeCall(cudaGetLastError()); + const dim3 gridSize(1, (size+8-1) / 8); + const dim3 blockSize(32, 8); + ushort2 tsize{ushort(size2), ushort(size)}; + kernel2D<<<gridSize, blockSize, 0, 0>>>(f,tsize); + cudaSafeCall(cudaGetLastError()); + //cudaSafeCall(cudaDeviceSynchronize()); +#else + static constexpr int MAX_THREADS=32; + FUNCTOR f2 = f; // Make a copy + #pragma omp parallel for + for (int i=0; i<MAX_THREADS; ++i) { + ushort2 thread{0,ushort(i)}; + ushort2 stride{1, MAX_THREADS}; //(height+MAX_THREADS-1)/MAX_THREADS}; + ushort2 tsize{ushort(size2), ushort(size)}; + f2(thread, stride, tsize); + } +#endif +} + +/** + * Wrapper to initiate a parallel processing job using a given functor. The + * width and height parameters are used to determine the number of threads. + */ +template <typename FUNCTOR> +void parallel2D(const FUNCTOR &f, int width, int height) { + //static_assert(std::is_function<FUNCTOR>::value, "Parallel2D requires a valid functor"); +#if __cplusplus >= 201703L + static_assert(std::is_invocable<FUNCTOR,ushort2,ushort2,ushort2>::value, "Parallel2D requires a valid functor: void()(ushort2,ushort2,ushort2)"); +#endif + assert(width > 0 && width <= 4096); + assert(height > 0 && height <= 4096); + +#ifdef USE_GPU + static constexpr uint T_PER_BLOCK=8; + const dim3 gridSize((width + T_PER_BLOCK - 1)/T_PER_BLOCK, (height + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + ushort2 size{ushort(width), ushort(height)}; + kernel2D<<<gridSize, blockSize, 0, 0>>>(f,size); + cudaSafeCall(cudaGetLastError()); +#else + static constexpr int MAX_THREADS=32; + FUNCTOR f2 = f; // Make a copy + #pragma omp parallel for + for (int i=0; i<MAX_THREADS; ++i) { + ushort2 thread{0,ushort(i)}; + ushort2 stride{1, MAX_THREADS}; //(height+MAX_THREADS-1)/MAX_THREADS}; + ushort2 size{ushort(width), ushort(height)}; + f2(thread, stride, size); + } +#endif +} + +#endif diff --git a/lib/libstereo/src/util_opencv.hpp b/lib/libstereo/src/util_opencv.hpp new file mode 100644 index 000000000..94e9db6fe --- /dev/null +++ b/lib/libstereo/src/util_opencv.hpp @@ -0,0 +1,153 @@ +#pragma once + +#include "array2d.hpp" + +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> + +#if USE_GPU +#include <opencv2/core/cuda.hpp> +#include <opencv2/cudaimgproc.hpp> + +static void mat2gray(const cv::cuda::GpuMat &in, Array2D<unsigned char> &out) { + if (in.depth() != CV_8U) { throw std::exception(); } + if (out.width != in.cols || out.height != in.rows) { throw std::exception(); } + + switch (in.channels()) { + case 4: + cv::cuda::cvtColor(in, out.toGpuMat(), cv::COLOR_BGRA2GRAY); + break; + + case 3: + cv::cuda::cvtColor(in, out.toGpuMat(), cv::COLOR_BGR2GRAY); + break; + + case 1: + in.copyTo(out.toGpuMat()); + break; + + default: + throw std::exception(); + } +} +#endif + +static void mat2gray(const cv::Mat &in, Array2D<unsigned char> &out) { + if (in.depth() != CV_8U) { throw std::exception(); } + if (out.width != in.cols || out.height != in.rows) { throw std::exception(); } + +#ifndef USE_GPU + switch (in.channels()) { + case 4: + cv::cvtColor(in, out.toMat(), cv::COLOR_BGRA2GRAY); + break; + + case 3: + cv::cvtColor(in, out.toMat(), cv::COLOR_BGR2GRAY); + break; + + case 1: + in.copyTo(out.toMat()); + break; + + default: + throw std::exception(); + } +#else + cv::Mat tmp; + switch (in.channels()) { + case 4: + cv::cvtColor(in, tmp, cv::COLOR_BGRA2GRAY); + break; + + case 3: + cv::cvtColor(in, tmp, cv::COLOR_BGR2GRAY); + break; + + case 1: + in.copyTo(tmp); + break; + + default: + throw std::exception(); + } + + out.toGpuMat().upload(tmp); +#endif +} + +static void mat2gray(cv::InputArray in, Array2D<unsigned char> &out) { + if (in.isGpuMat()) { + #if USE_GPU + mat2gray(in.getGpuMat(), out); + #endif + } + else if (in.isMat()) { + mat2gray(in.getMat(), out); + } + else { + throw std::exception(); + } +} + +/* +template<typename T, int channels> +constexpr int cpp2cvtype() { + static_assert(channels > 0 && channels <= 4, "unsupported number of channels"); + + if (std::is_same<T, char>::value) { + switch(channels) { + case 1: return CV_8SC1; + case 2: return CV_8SC2; + case 3: return CV_8SC3; + case 4: return CV_8SC4; + } + } + else if (std::is_same<T, unsigned char>::value) { + switch(channels) { + case 1: return CV_8SC1; + case 2: return CV_8UC2; + case 3: return CV_8UC3; + case 4: return CV_8UC4; + } + } + else if (std::is_same<T, short>::value) { + switch(channels) { + case 1: return CV_16SC1; + case 2: return CV_16SC2; + case 3: return CV_16SC3; + case 4: return CV_16SC4; + } + } + else if (std::is_same<T, unsigned short>::value) { + switch(channels) { + case 1: return CV_16UC1; + case 2: return CV_16UC2; + case 3: return CV_16UC3; + case 4: return CV_16UC4; + } + } + + else if (std::is_same<T, float>::value) { + switch(channels) { + case 1: return CV_32FC1; + case 2: return CV_32FC2; + case 3: return CV_32FC3; + case 4: return CV_32FC4; + } + } + else if (std::is_same<T, double>::value) { + switch(channels) { + case 1: return CV_64FC1; + case 2: return CV_64FC2; + case 3: return CV_64FC3; + case 4: return CV_64FC4; + } + } + else { + // ideally should be compile time error + throw std::logic_error("no matching OpenCV type"); + } + + return -1; +}*/ diff --git a/lib/libstereo/src/wta.hpp b/lib/libstereo/src/wta.hpp new file mode 100644 index 000000000..679638c9e --- /dev/null +++ b/lib/libstereo/src/wta.hpp @@ -0,0 +1,199 @@ +#pragma once +#include "array2d.hpp" +#include "util.hpp" +#include "consistency.hpp" + +#define _USE_MATH_DEFINES +#include <math.h> + +namespace algorithms { + template<typename T> + __cuda__ inline float subpixel_estimate_parabola(const T &dsi, const int x, const int y, const int d) { + if (d == dsi.disp_max || d == dsi.disp_min) { return d; } + + const float val_l = dsi(y,x,d-1); + const float val_r = dsi(y,x,d+1); + const float val = dsi(y,x,d); + const float a = val_r - val_l; + const float b = 2.0f * val - val_l - val_r; + if (b != 0.0f) { return d + 0.5f *a/b; } + else { return d; } + } + template<typename T> + __cuda__ inline float subpixel_estimate_equiangular(const T &dsi, const int x, const int y, const int d) { + #ifndef __CUDA_ARCH__ + using std::max; + #endif + + if (d == dsi.disp_max || d == dsi.disp_min) { return d; } + + const float val_l = dsi(y,x,d-1); + const float val_r = dsi(y,x,d+1); + const float val = dsi(y,x,d); + const float a = val_r - val_l; + const float b = 2.0f * (val - max(val_l, val_r)); + if (b != 0.0f) { return d + a/b; } + else { return d; } + } + + template<typename T> + __cuda__ inline float subpixel_estimate_sortsgm(const T &dsi, const int x, const int y, const int d) { + if (d == dsi.disp_max || d == dsi.disp_min) { return d; } + + /** + * Eq. 20 in + * + * Pantilie, C. D., & Nedevschi, S. (2012). SORT-SGM: Subpixel optimized + * real-time semiglobal matching for intelligent vehicles. IEEE Transactions + * on Vehicular Technology. https://doi.org/10.1109/TVT.2012.2186836 + */ + + const float C_l = dsi(y,x,d-1); + const float C_r = dsi(y,x,d+1); + const float C = dsi(y,x,d); + + const float delta_l = C_l - C; + const float delta_r = C_r - C; + + const float rho = (delta_l <= delta_r) ? delta_l/delta_r : delta_r/delta_l; + const float g = (sin((M_PI/2.0f)*(rho - 1.0f)) + 1.0f)/2.0f; + + if (delta_l <= delta_r) { return float(d) - 0.5f + g; } + else { return float(d) + 0.5f - g; } + } + + template <typename DSI, typename TDisp, int SUBPIXEL> + struct WTA { + const typename DSI::DataType dsi; + typename Array2D<TDisp>::Data disparity; + typename Array2D<typename DSI::Type>::Data min_cost; + + __cuda__ void operator()(ushort2 thread, ushort2 stride, ushort2 size) { + using TCost = typename DSI::Type; + + for (int y = thread.y; y < size.y; y+=stride.y) { + auto* ptr_disparity = disparity.ptr(y); + auto* ptr_min_cost = min_cost.ptr(y); + + for (int x = 0; x < dsi.width; ++x) { + TCost min1 = TCost(-1); //std::numeric_limits<TCost>::max(); + //TCost min2 = TCost(-1); //std::numeric_limits<TCost>::max(); + TDisp d = TDisp(0); + + for (int d_ = thread.x + dsi.disp_min; d_ <= dsi.disp_max; d_+=stride.x) { + const TCost val = dsi(y,x,d_); + + if (val < min1) { + //min2 = min1; + min1 = val; + d = d_; + } + + // next best local minima + /*if ((d_ != dsi.disp_min && d != dsi.disp_max) && + (dsi(y,x,d_-1) > val && val < dsi(y,x,d_+1)) && + (d != d_) && (val < min2)) { + + min2 = val; + }*/ + + } + + #ifdef __CUDA_ARCH__ + TCost mincost = warpMin(min1); + if (min1 != mincost) d = -1; + // FIXME: Possible non-determinism, need to choose + #endif + + /*if (dsi(y, x, dsi.disp_max) < min2) { + min2 = dsi(y, x, dsi.disp_max); + }*/ + + if (d >= 0) { + if (SUBPIXEL == 1) { + d = subpixel_estimate_parabola(dsi, x, y, d); + } + else if (SUBPIXEL == 2) { + d = subpixel_estimate_equiangular(dsi, x, y, d); + } + else if (SUBPIXEL == 3) { + d = subpixel_estimate_sortsgm(dsi, x, y, d); + } + + ptr_disparity[x] = d; + ptr_min_cost[x] = min1; + } + } + } + } + }; + + /** + * Diagonal WTA (right disparity). No subpixel estimation. + */ + template <typename DSI, typename TDisp> + struct WTADiagonal { + const typename DSI::DataType dsi; + typename Array2D<TDisp>::Data disparity; + + __cuda__ void operator()(ushort2 thread, ushort2 stride, ushort2 size) { + using TCost = typename DSI::Type; + #ifndef __CUDA_ARCH__ + using std::min; + #endif + + for (int y = thread.y; y < size.y; y+=stride.y) { + TDisp* ptr_disparity = disparity.ptr(y); + + for (int x = thread.x; x < size.x; x+=stride.x) { + TCost min1 = TCost(-1); //std::numeric_limits<TCost>::max(); + TDisp d = TDisp(0); + + for (int d_ = dsi.disp_min; d_ < min(dsi.disp_max+1, dsi.width-x); d_++) { + const TCost val = dsi(y,x+d_,d_); + + if (val < min1) { + min1 = val; + d = d_; + } + } + + ptr_disparity[x] = d; + } + } + } + }; +} + +template<typename DSI, typename TDisp=float> +struct WinnerTakesAll { + Array2D<TDisp> disparity; + Array2D<TDisp> disparity_right; + Array2D<typename DSI::Type> min_cost; + + Array2D<TDisp> &operator()(const DSI& dsi, const int subpixel=0) { + disparity.create(dsi.width(), dsi.height()); + disparity_right.create(dsi.width(), dsi.height()); + min_cost.create(dsi.width(), dsi.height()); + + if (subpixel == 0) { + algorithms::WTA<DSI,float,0> wta = {dsi.data(), disparity.data(), min_cost.data()}; + parallel1DWarp(wta, dsi.height(), dsi.numDisparities()); + } else if (subpixel == 1) { + algorithms::WTA<DSI,float,1> wta = {dsi.data(), disparity.data(), min_cost.data()}; + parallel1DWarp(wta, dsi.height(), dsi.numDisparities()); + } else if (subpixel == 2) { + algorithms::WTA<DSI,float,2> wta = {dsi.data(), disparity.data(), min_cost.data()}; + parallel1DWarp(wta, dsi.height(), dsi.numDisparities()); + } else if (subpixel == 3) { + algorithms::WTA<DSI,float,3> wta = {dsi.data(), disparity.data(), min_cost.data()}; + parallel1DWarp(wta, dsi.height(), dsi.numDisparities()); + } + + algorithms::WTADiagonal<DSI,float> wtadiag = {dsi.data(), disparity_right.data()}; + parallel2D(wtadiag, dsi.width(), dsi.height()); + + parallel2D<algorithms::ConsistencyCheck<float>>({disparity.data(), disparity_right.data()}, dsi.width(), dsi.height()); + return disparity; + } +}; diff --git a/lib/libstereo/test/CMakeLists.txt b/lib/libstereo/test/CMakeLists.txt new file mode 100644 index 000000000..ca8165583 --- /dev/null +++ b/lib/libstereo/test/CMakeLists.txt @@ -0,0 +1,65 @@ +#add_library(CatchTest OBJECT ./tests.cpp) + +add_executable(dsi_cpu_unit +$<TARGET_OBJECTS:CatchTest> + ./dsi_unit.cpp +) +target_include_directories(dsi_cpu_unit PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include") +target_include_directories(dsi_cpu_unit PUBLIC ${OpenCV_INCLUDE_DIRS}) +target_link_libraries(dsi_cpu_unit pthread dl ${OpenCV_LIBS}) + +add_test(DSICPUUnitTest dsi_cpu_unit) + +add_executable(dsi_gpu_unit +$<TARGET_OBJECTS:CatchTest> + ./dsi_gpu_unit.cu +) +target_include_directories(dsi_gpu_unit PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include") +target_include_directories(dsi_gpu_unit PUBLIC ${OpenCV_INCLUDE_DIRS}) +target_compile_definitions(dsi_gpu_unit PUBLIC USE_GPU) +target_link_libraries(dsi_gpu_unit pthread dl ${OpenCV_LIBS}) + +add_test(DSIGPUUnitTest dsi_gpu_unit) + +add_executable(array2d_unit +$<TARGET_OBJECTS:CatchTest> + ./array2d_unit.cpp +) +target_include_directories(array2d_unit PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include") +target_include_directories(array2d_unit PUBLIC ${OpenCV_INCLUDE_DIRS}) +target_link_libraries(array2d_unit pthread dl ${OpenCV_LIBS}) + +add_test(Array2DUnitTest array2d_unit) + +add_executable(matching_cost_unit +$<TARGET_OBJECTS:CatchTest> + ../src/costs/census.cu + ../src/costs/gradient.cu + ../src/costs/mutual_information.cu + ./matching_cost_unit.cpp +) +target_include_directories(matching_cost_unit PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include" "${CMAKE_CURRENT_SOURCE_DIR}/../src") +target_include_directories(matching_cost_unit PUBLIC ${OpenCV_INCLUDE_DIRS}) +target_link_libraries(matching_cost_unit pthread dl ${OpenCV_LIBS}) + +add_test(MatchingCostUnitTest matching_cost_unit) + +add_executable(aggregation_unit +$<TARGET_OBJECTS:CatchTest> + ./aggregation_unit.cpp +) +target_include_directories(aggregation_unit PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include") +target_include_directories(aggregation_unit PUBLIC ${OpenCV_INCLUDE_DIRS}) +target_link_libraries(aggregation_unit pthread dl ${OpenCV_LIBS}) + +add_test(AggregationUnitTest aggregation_unit) + +add_executable(wta_unit +$<TARGET_OBJECTS:CatchTest> + ./wta_unit.cpp +) +target_include_directories(wta_unit PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include") +target_include_directories(wta_unit PUBLIC ${OpenCV_INCLUDE_DIRS}) +target_link_libraries(wta_unit pthread dl ${OpenCV_LIBS}) + +add_test(WTAUnitTest wta_unit) diff --git a/lib/libstereo/test/aggregation_unit.cpp b/lib/libstereo/test/aggregation_unit.cpp new file mode 100644 index 000000000..b589a3367 --- /dev/null +++ b/lib/libstereo/test/aggregation_unit.cpp @@ -0,0 +1,276 @@ +#include "catch.hpp" +#include "../src/dsbase.hpp" +#include "../src/cost_aggregation.hpp" + +#include <atomic> +#include <iostream> + +template <typename DSIIN> +struct TestAggregateIncrement { + typedef typename DSIIN::Type Type; + typedef typename DSIIN::Type costtype_t; + + const DSIIN in; + typename DisparitySpaceImage<costtype_t>::DataType out; + + struct PathData : BasePathData<costtype_t> { + // No extra data... + }; + + struct DirectionData { + + }; + + void direction(DirectionData &data, DisparitySpaceImage<costtype_t> &buffer) { + out = buffer.data(); + } + + __host__ __device__ inline void startPath(ushort2 pixel, ushort thread, ushort stride, PathData &data) {} + __host__ __device__ inline void endPath(ushort2 pixel, ushort thread, ushort stride, PathData &data) {} + + __host__ __device__ inline void operator()(ushort2 pixel, ushort thread, ushort stride, PathData &data) { + int x_prev = pixel.x - data.direction.x; + int y_prev = pixel.y - data.direction.y; + costtype_t c = 0; + + if (x_prev >= 0 && y_prev >= 0 && x_prev < out.width && y_prev < out.height) { + c = out(y_prev,x_prev,in.disp_min) + 1; + } + + const int d_stop = int(out.disp_max)-int(out.disp_min); + + for (int d=thread; d<=d_stop; d+=stride) { + // for tests, each value updated exactly once (each path separately) + // NOT reliable (threading) + if (out(pixel.y,pixel.x,d+in.disp_min) != 0) { throw std::exception(); } + + out(pixel.y,pixel.x,d+in.disp_min) += c; + } + } + + /* __host__ __device__ void operator()(int x, int y, int i, const int dx, const int dy, PathData &data) { + int x_prev = x - dx; + int y_prev = y - dy; + int c = 0; + + if (x_prev >= 0 && y_prev >= 0 && x_prev < out.width && y_prev < out.height) { + c = out(y_prev,x_prev,in.disp_min) + 1; + } + for (int d=in.disp_min; d <= in.disp_max; d++) { + if (out(y,x,d) != 0) { throw std::exception(); } + out(y,x,d) += c; + } + }*/ +}; + +struct DummyCostImpl : impl::DSImplBase<short> { + typedef short Type; + + DummyCostImpl(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<short>({w,h,dmin,dmax}) {} + + __host__ __device__ inline short operator()(const int y, const int x, const int d) const { + return 0; + } +}; + +class DummyCost : public DSBase<DummyCostImpl> { +public: + typedef DummyCostImpl DataType; + typedef short Type; + + DummyCost() : DSBase<DataType>(0, 0, 0, 0) {}; + DummyCost(int width, int height, int disp_min, int disp_max) + : DSBase<DataType>(width, height, disp_min, disp_max) {} +}; + +static void test_aggregation_increment(const int w, const int h, const int dmin, const int dmax) { + const int size = w*h*(dmax-dmin+1); + + DummyCost dsi(w,h,dmin,dmax); + TestAggregateIncrement<DummyCost::DataType> func = {dsi.data()}; + PathAggregator<TestAggregateIncrement<DummyCost::DataType>> aggr; + + SECTION("Left to right") { + auto &out = aggr(func, AggregationDirections::LEFTRIGHT); + int valid = 0; + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + for (int d = dmin; d <= dmax; d++) { + auto val = out(y,x,d); + valid += (out(y,x,d) == x) ? 1 : 0; + }}} + + REQUIRE(valid == size); + } + + SECTION("Right to left") { + auto &out = aggr(func, AggregationDirections::RIGHTLEFT); + int valid = 0; + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + for (int d = dmin; d <= dmax; d++) { + valid += (out(y,x,d) == w-1-x) ? 1 : 0; + }}} + + REQUIRE(valid == size); + } + + + SECTION("Top to bottom") { + auto &out = aggr(func, AggregationDirections::UPDOWN); + int valid = 0; + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + for (int d = dmin; d <= dmax; d++) { + valid += (out(y,x,d) == y) ? 1 : 0; + }}} + + REQUIRE(valid == size); + } + + SECTION("Bottom to top") { + auto &out = aggr(func, AggregationDirections::DOWNUP); + int valid = 0; + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + for (int d = dmin; d <= dmax; d++) { + valid += (out(y,x,d) == h-y-1) ? 1 : 0; + }}} + + REQUIRE(valid == size); + } + + SECTION("Top left to bottom right") { + auto &out = aggr(func, AggregationDirections::TOPLEFTBOTTOMRIGHT); + int valid = 0; + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + for (int d = dmin; d <= dmax; d++) { + valid += (out(y,x,d) == std::min(x,y)) ? 1 : 0; + }}} + + REQUIRE(valid == size); + } + + SECTION("Bottom right to top left") { + auto &out = aggr(func, AggregationDirections::BOTTOMRIGHTTOPLEFT); + int valid = 0; + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + for (int d = dmin; d <= dmax; d++) { + valid += (out(y,x,d) == std::min(w-1-x,h-1-y)) ? 1 : 0; + }}} + + REQUIRE(valid == size); + } + + SECTION("Bottom left to top right") { + auto &out = aggr(func, AggregationDirections::BOTTOMLEFTTOPRIGHT); + int valid = 0; + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + for (int d = dmin; d <= dmax; d++) { + valid += (out(y,x,d) == std::min(x,h-1-y)) ? 1 : 0; + }}} + + REQUIRE(valid == size); + } + + SECTION("Top right to bottom left") { + auto &out = aggr(func, AggregationDirections::TOPRIGHTBOTTOMLEFT); + int valid = 0; + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + for (int d = dmin; d <= dmax; d++) { + valid += (out(y,x,d) == std::min(w-1-x,y)) ? 1 : 0; + }}} + + REQUIRE(valid == size); + } +} + +TEST_CASE("Increment paths [100x100]", "") { + test_aggregation_increment(100, 100, 10, 20); +} + +TEST_CASE("Increment paths [100x200]", "") { + test_aggregation_increment(100, 200, 10, 20); +} + +TEST_CASE("Increment paths [200x100]", "") { + test_aggregation_increment(200, 100, 10, 20); +} + +/*TEST_CASE("Vertical aggregation", "") { + SECTION("Perform an aggregation") { + CensusMatchingCost dsi(100,100,10,20,9,7); + + cv::Mat left(100,100, CV_8UC1); + cv::Mat right(100,100, CV_8UC1); + left.setTo(cv::Scalar(0)); + right.setTo(cv::Scalar(0)); + dsi.setLeft(left); + dsi.setRight(right); + + DisparitySpaceImage<unsigned short> out(100,100,10,20); + out.set(1); + DisparitySpaceImage<unsigned short> previous(100,1,10,20); + + Aggregate<CensusMatchingCost, unsigned short> aggr = {0,0, out.data(), previous.data(), dsi.data()}; + aggregate_vertical_all(aggr); + + REQUIRE( out.data()(50,50,12) == 1 ); + } +} + +TEST_CASE("Diagonal upper aggregation", "") { + SECTION("Perform an aggregation") { + CensusMatchingCost dsi(100,100,10,20,9,7); + + cv::Mat left(100,100, CV_8UC1); + cv::Mat right(100,100, CV_8UC1); + left.setTo(cv::Scalar(0)); + right.setTo(cv::Scalar(0)); + dsi.setLeft(left); + dsi.setRight(right); + + DisparitySpaceImage<unsigned short> out(100,100,10,20); + out.set(1); + DisparitySpaceImage<unsigned short> previous(100+100,1,10,20); + + Aggregate<CensusMatchingCost, unsigned short> aggr = {0,0, out.data(), previous.data(), dsi.data()}; + aggregate_diagonal_upper_all(aggr); + + REQUIRE( out.data()(50,50,12) == 1 ); + } +} + +TEST_CASE("Diagonal lower aggregation", "") { + SECTION("Perform an aggregation") { + CensusMatchingCost dsi(100,100,10,20,9,7); + + cv::Mat left(100,100, CV_8UC1); + cv::Mat right(100,100, CV_8UC1); + left.setTo(cv::Scalar(0)); + right.setTo(cv::Scalar(0)); + dsi.setLeft(left); + dsi.setRight(right); + + DisparitySpaceImage<unsigned short> out(100,100,10,20); + out.set(1); + DisparitySpaceImage<unsigned short> previous(100+100,1,10,20); + + Aggregate<CensusMatchingCost, unsigned short> aggr = {0,0, out.data(), previous.data(), dsi.data()}; + aggregate_diagonal_lower_all(aggr); + + REQUIRE( out.data()(50,50,12) == 1 ); + } +}*/ diff --git a/lib/libstereo/test/array2d_unit.cpp b/lib/libstereo/test/array2d_unit.cpp new file mode 100644 index 000000000..8cc2c385c --- /dev/null +++ b/lib/libstereo/test/array2d_unit.cpp @@ -0,0 +1,11 @@ +#include "catch.hpp" +#include "../src/array2d.hpp" + +TEST_CASE("Array2D", "") { + SECTION("Construct a float array2d") { + Array2D<float> array(100,100); + + array.data()(5,5) = 677; + REQUIRE( array.data()(5,5) == 677 ); + } +} diff --git a/lib/libstereo/test/dsi_gpu_unit.cu b/lib/libstereo/test/dsi_gpu_unit.cu new file mode 100644 index 000000000..96f42cccb --- /dev/null +++ b/lib/libstereo/test/dsi_gpu_unit.cu @@ -0,0 +1,11 @@ +#include "catch.hpp" +#include "../src/dsi.hpp" + +TEST_CASE("DisparitySpaceImage", "") { + SECTION("Construct a ushort DSI") { + DisparitySpaceImage<unsigned short> dsi(100,100,10,20); + + dsi.set(677); + REQUIRE( dsi(5,5,2) == 677 ); + } +} diff --git a/lib/libstereo/test/dsi_unit.cpp b/lib/libstereo/test/dsi_unit.cpp new file mode 100644 index 000000000..9ea3dac80 --- /dev/null +++ b/lib/libstereo/test/dsi_unit.cpp @@ -0,0 +1,11 @@ +#include "catch.hpp" +#include "../src/dsi.hpp" + +TEST_CASE("DisparitySpaceImage", "") { + SECTION("Construct a ushort DSI") { + DisparitySpaceImage<unsigned short> dsi(100,100,10,20); + + dsi.data()(5,5,2) = 677; + REQUIRE( dsi.data()(5,5,2) == 677 ); + } +} diff --git a/lib/libstereo/test/matching_cost_unit.cpp b/lib/libstereo/test/matching_cost_unit.cpp new file mode 100644 index 000000000..883d584e0 --- /dev/null +++ b/lib/libstereo/test/matching_cost_unit.cpp @@ -0,0 +1,40 @@ +#include "catch.hpp" +#include "costs/census.hpp" +#include "costs/gradient.hpp" +#include "util_opencv.hpp" + +TEST_CASE("Gradient Matching Cost", "") { + SECTION("Construct a Gradient matching cost") { + GradientMatchingCostL2 dsi(100,100,10,20); + + cv::Mat left(100,100, CV_8UC1); + cv::Mat right(100,100, CV_8UC1); + + left.setTo(cv::Scalar(0)); + right.setTo(cv::Scalar(0)); + + Array2D<uchar> l(left); + Array2D<uchar> r(right); + dsi.set(l, r); + + REQUIRE( dsi.data()(5,5,2) == 0 ); + } +} + +TEST_CASE("Census Matching Cost", "") { + SECTION("Construct a Census matching cost") { + CensusMatchingCost dsi(100,100,10,20,9,7); + + cv::Mat left(100,100, CV_8UC1); + cv::Mat right(100,100, CV_8UC1); + + left.setTo(cv::Scalar(0)); + right.setTo(cv::Scalar(0)); + + Array2D<uchar> l(left); + Array2D<uchar> r(right); + dsi.set(l, r); + + REQUIRE( dsi.data()(5,5,2) == 0 ); + } +} diff --git a/lib/libstereo/test/tests.cpp b/lib/libstereo/test/tests.cpp new file mode 100644 index 000000000..178916eab --- /dev/null +++ b/lib/libstereo/test/tests.cpp @@ -0,0 +1,3 @@ +#define CATCH_CONFIG_MAIN +#include "catch.hpp" + diff --git a/lib/libstereo/test/wta_unit.cpp b/lib/libstereo/test/wta_unit.cpp new file mode 100644 index 000000000..ebb845f68 --- /dev/null +++ b/lib/libstereo/test/wta_unit.cpp @@ -0,0 +1,72 @@ +#include "catch.hpp" +#include "../src/dsi.hpp" +#include "../src/wta.hpp" +#include "../src/util.hpp" +#include "../src/consistency.hpp" + +using algorithms::WTA; +using algorithms::WTADiagonal; +using algorithms::ConsistencyCheck; + +TEST_CASE("Winner Takes All", "") { + const int w = 100; + const int h = 100; + const int dmin = 10; + const int dmax = 20; + const int d_true = 12; + + DSImage16U dsi(w, h, dmin, dmax); + Array2D<float> disparity(w, h); + Array2D<unsigned short> min_cost(w, h); + + // d(y,x,d) == 0 iff d == d_true, otherwise 100 + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + for (int d = dmin; d <= dmax; d++) { + dsi.data()(y,x,d) = (d == d_true) ? 0 : 100; + } + } + } + + SECTION("WTA from a ushort DSI") { + WTA<DSImage16U, float, 0> wta = {dsi.data(), disparity.data(), min_cost.data()}; + parallel2D(wta, dsi.width(), dsi.height()); + int valid = 0; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + valid += (float(d_true) == disparity.data()(y,x)) ? 1 : 0; + } + } + REQUIRE(valid == w*h); + } + + SECTION("WTA diagonal from a ushort DSI") { + WTADiagonal<DSImage16U,float> wta = {dsi.data(), disparity.data()}; + parallel2D(wta, dsi.width(), dsi.height()); + + int valid = 0; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + valid += (float(d_true) == disparity.data()(y,x)) ? 1 : 0; + } + } + } + + SECTION("WTA with consistency check") { + WTA<DSImage16U, float, 0> wta = {dsi.data(), disparity.data(), min_cost.data()}; + parallel2D(wta, dsi.width(), dsi.height()); + + Array2D<float> disparityr(100,100); + + WTADiagonal<DSImage16U, float> wtadiag = {dsi.data(), disparityr.data()}; + parallel2D(wtadiag, dsi.width(), dsi.height()); + + parallel2D<ConsistencyCheck<float>>({disparity.data(), disparityr.data()}, dsi.width(), dsi.height()); + } +} + +TEST_CASE("WTA Wrapper") { + WinnerTakesAll<DSImage16U, float> wta; + DSImage16U dsi(100,100,10,20); + wta(dsi); +} -- GitLab