diff --git a/lib/libstereo/CMakeLists.txt b/lib/libstereo/CMakeLists.txt index 78f367ad5b0e5d7965bf679c704d43ad4b68cccd..64e19cd84f2ec398fad46ab9c1c64e751bbeab17 100644 --- a/lib/libstereo/CMakeLists.txt +++ b/lib/libstereo/CMakeLists.txt @@ -36,34 +36,72 @@ include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) if (LIBSTEREO_SHARED) add_library(libstereo SHARED src/stereo_gradientstree.cu + src/stereo_gtsgm.cu src/stereo_misgm.cu - src/stereo_misgm2.cu - src/stereo_censussgm.cu - src/stereo_sgmp.cpp + src/algorithms/censussgm.cu + src/algorithms/excensussgm.cu + src/algorithms/stablesgm.cu + src/algorithms/tcensussgm.cu + src/algorithms/hcensussgm.cu + src/algorithms/hwcensussgm.cu + src/algorithms/brefcensussgm.cu + src/algorithms/meancensussgm.cu + src/algorithms/hstablesgm.cu + #src/stereo_hier_census.cu + src/stereo_wcensussgm.cu + src/stereo_census_adaptive.cu + src/stereo_cp_censussgm.cu + src/stereo_adcensussgm.cu + src/stereo_varcensussgm.cu + src/stereo_adsgm.cu + src/stereo_sad.cu + #src/stereo_sgmp.cpp + src/stereo_wadcensus.cu src/costs/census.cu + src/costs/tcensus.cu src/costs/gradient.cu + src/costs/sad.cu + src/costs/stable.cu src/costs/mutual_information.cu src/median_filter.cu src/median_filter_fixstars.cu + src/dsi_tools.cu ) -set_target_properties(libstereo PROPERTIES PUBLIC_HEADER include/stereo.hpp) + set_target_properties(libstereo PROPERTIES PUBLIC_HEADER include/stereo.hpp) else() add_library(libstereo src/stereo_gradientstree.cu src/stereo_gtsgm.cu src/stereo_misgm.cu - src/stereo_misgm2.cu - src/stereo_censussgm.cu + src/algorithms/censussgm.cu + src/algorithms/excensussgm.cu + src/algorithms/stablesgm.cu + src/algorithms/tcensussgm.cu + src/algorithms/hcensussgm.cu + src/algorithms/hwcensussgm.cu + src/algorithms/brefcensussgm.cu + src/algorithms/meancensussgm.cu + src/algorithms/hstablesgm.cu + #src/stereo_hier_census.cu + src/stereo_wcensussgm.cu src/stereo_census_adaptive.cu + src/stereo_cp_censussgm.cu src/stereo_adcensussgm.cu + src/stereo_varcensussgm.cu src/stereo_adsgm.cu + src/stereo_sad.cu #src/stereo_sgmp.cpp + src/stereo_wadcensus.cu src/costs/census.cu + src/costs/tcensus.cu src/costs/gradient.cu + src/costs/sad.cu + src/costs/stable.cu src/costs/mutual_information.cu src/median_filter.cu src/median_filter_fixstars.cu + src/dsi_tools.cu ) endif() diff --git a/lib/libstereo/include/stereo.hpp b/lib/libstereo/include/stereo.hpp index 16e0648e93711f6dc4dfdcece8b6d0109d13dd14..c596986b02bf3243f15a4f00db4b919a2fa062db 100644 --- a/lib/libstereo/include/stereo.hpp +++ b/lib/libstereo/include/stereo.hpp @@ -72,6 +72,365 @@ public: unsigned short P2 = 25; float uniqueness = std::numeric_limits<unsigned short>::max(); int subpixel = 1; // subpixel interpolation method + bool lr_consistency = true; + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + bool debug = false; + CensusPattern pattern = CensusPattern::STANDARD; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + +class StereoExCensusSgm { +public: + StereoExCensusSgm(); + ~StereoExCensusSgm(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + void setPrior(cv::InputArray disp) {}; + + struct Parameters { + int d_min = 0; + int d_max = 0; + unsigned short P1 = 5; + unsigned short P2 = 25; + float uniqueness = std::numeric_limits<unsigned short>::max(); + int subpixel = 1; // subpixel interpolation method + bool lr_consistency = true; + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + bool debug = false; + /** normalization of variance to range [alpha, beta] */ + float alpha = 0.2; + float beta = 1.0; + /** variance window size */ + int var_window = 5; + CensusPattern pattern = CensusPattern::STANDARD; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + +/** + * Mean reference pixel. Doesn't work since detail is lost form reference + * + * @see "Froba, B., Ernst, A. "Face detection with the modified census transform," In: Sixth IEEE International Conference on Automatic Face and Gesture Recognition. IEEE Computer Society Press, Los Alamitos (2004)." + * @see "A robust local census-based stereo matching insensitive to illumination changes" (2012) + * @see "A stereo matching algorithm based on four-moded census and relative confidence plane fitting" (2015) + * @see "Stereo matching based on improved census transform" (2018) + */ +class StereoMeanCensusSgm { +public: + StereoMeanCensusSgm(); + ~StereoMeanCensusSgm(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + void setPrior(cv::InputArray disp) {}; + + struct Parameters { + int d_min = 0; + int d_max = 0; + unsigned short P1 = 5; + unsigned short P2 = 25; + float uniqueness = std::numeric_limits<unsigned short>::max(); + int subpixel = 1; // subpixel interpolation method + bool lr_consistency = true; + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + bool debug = false; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + +/** + * Bilateral filtering for reference pixel. Doesn't make things better, but + * still better than mean value for reference. + */ +class StereoBRefCensusSgm { +public: + StereoBRefCensusSgm(); + ~StereoBRefCensusSgm(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + void setPrior(cv::InputArray disp) {}; + + struct Parameters { + int d_min = 0; + int d_max = 0; + unsigned short P1 = 5; + unsigned short P2 = 25; + float uniqueness = std::numeric_limits<unsigned short>::max(); + int subpixel = 1; // subpixel interpolation method + bool lr_consistency = true; + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + bool debug = false; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + +/** + * STABLE Binary descriptor. This is a general implementation. + * + * @see K. ValentÃn, R. Huber-Mörk, and S. Å tolc, “Binary descriptor-based dense + * line-scan stereo matching,†J. Electron. Imaging, vol. 26, no. 1, 2017. + */ +class StereoStableSgm { +public: + StereoStableSgm(); + ~StereoStableSgm(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + void setPrior(cv::InputArray disp) {}; + + struct Parameters { + int d_min = 0; + int d_max = 0; + unsigned short P1 = 5; + unsigned short P2 = 25; + short wsize = 17; + float uniqueness = std::numeric_limits<unsigned short>::max(); + int subpixel = 1; // subpixel interpolation method + bool lr_consistency = true; + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + bool debug = false; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + +/** + * Three resolutions, fine, medium and coarse, are combined. In each case the + * reference pixel always comes from the fine resolution. Each resolution is + * variance weighted for that resolution to increase and decrease influence, + * allowing textured high res to dominate when possible. + */ +class StereoHStableSgm { +public: + StereoHStableSgm(); + ~StereoHStableSgm(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + void setPrior(cv::InputArray disp) {}; + + struct Parameters { + int d_min = 0; + int d_max = 0; + unsigned short P1 = 5; + unsigned short P2 = 25; + float uniqueness = std::numeric_limits<unsigned short>::max(); + int subpixel = 1; // subpixel interpolation method + bool lr_consistency = true; + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + /** normalization of variance to range [alpha, beta] */ + float alpha = 0.2; + float beta = 1.0; + /** variance window size */ + int var_window = 9; + bool debug = false; + short wsize = 17; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + +/** + * Three resolutions, fine, medium and coarse, are combined. In each case the + * reference pixel always comes from the fine resolution. Each resolution is + * variance weighted for that resolution to increase and decrease influence, + * allowing textured high res to dominate when possible. + */ +class StereoHierCensusSgm { +public: + StereoHierCensusSgm(); + ~StereoHierCensusSgm(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + void setPrior(cv::InputArray disp) {}; + + struct Parameters { + int d_min = 0; + int d_max = 0; + unsigned short P1 = 5; + unsigned short P2 = 25; + float uniqueness = std::numeric_limits<unsigned short>::max(); + int subpixel = 1; // subpixel interpolation method + bool lr_consistency = true; + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + /** normalization of variance to range [alpha, beta] */ + float alpha = 0.2; + float beta = 1.0; + /** variance window size */ + int var_window = 9; + bool debug = false; + CensusPattern pattern = CensusPattern::STANDARD; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + +/** + * Three resolutions, fine, medium and coarse, are combined. In each case the + * reference pixel always comes from the fine resolution. Each resolution is + * variance weighted for that resolution to increase and decrease influence, + * allowing textured high res to dominate when possible. + */ +class StereoHierWeightCensusSgm { +public: + StereoHierWeightCensusSgm(); + ~StereoHierWeightCensusSgm(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + void setPrior(cv::InputArray disp) {}; + + struct Parameters { + int d_min = 0; + int d_max = 0; + unsigned short P1 = 5; + unsigned short P2 = 25; + float uniqueness = std::numeric_limits<unsigned short>::max(); + int subpixel = 1; // subpixel interpolation method + bool lr_consistency = true; + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + /** normalization of variance to range [alpha, beta] */ + float alpha = 0.2; + float beta = 1.0; + /** variance window size */ + int var_window = 7; + bool debug = false; + CensusPattern pattern = CensusPattern::STANDARD; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + +class StereoCPCensusSgm { +public: + StereoCPCensusSgm(); + ~StereoCPCensusSgm(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + void setPrior(cv::InputArray disp) {}; + + struct Parameters { + int d_min = 0; + int d_max = 0; + unsigned short P1 = 5; + unsigned short P2 = 25; + float uniqueness = std::numeric_limits<unsigned short>::max(); + int subpixel = 1; // subpixel interpolation method + bool lr_consistency = true; + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + bool debug = false; + bool filtercosts = true; + /** normalization of variance to range [alpha, beta] */ + float alpha = 0.5; + float beta = 1.0; + /** variance window size */ + int var_window = 7; + CensusPattern pattern = CensusPattern::STANDARD; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + +class StereoWCensusSgm { +public: + StereoWCensusSgm(); + ~StereoWCensusSgm(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + void setPrior(cv::InputArray disp) {}; + + struct Parameters { + int d_min = 0; + int d_max = 0; + unsigned short P1 = 5; + unsigned short P2 = 25; + float uniqueness = std::numeric_limits<unsigned short>::max(); + int subpixel = 1; // subpixel interpolation method + bool lr_consistency = true; + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + bool debug = false; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + +/** + * Ternary census, or 3 moded, where there is a noise threshold and where + * pixels can be identified as no luminance change in addition to above or + * below. + * + * @see "TEXTURE-AWARE DENSE IMAGE MATCHING USING TERNARY CENSUS TRANSFORM" (2016) + * @see "Local disparity estimation with three-moded cross census and advanced support weight" (2013) + */ +class StereoTCensusSgm { +public: + StereoTCensusSgm(); + ~StereoTCensusSgm(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + void setPrior(cv::InputArray disp) {}; + + struct Parameters { + int d_min = 0; + int d_max = 0; + uchar t = 8; + unsigned short P1 = 5; + unsigned short P2 = 25; + float uniqueness = std::numeric_limits<unsigned short>::max(); + int subpixel = 1; // subpixel interpolation method + bool lr_consistency = true; int paths = AggregationDirections::HORIZONTAL | AggregationDirections::VERTICAL | AggregationDirections::DIAGONAL; @@ -165,6 +524,76 @@ public: float alpha = 0.2; /** alpha: minimum weight for census cost */ float beta = 0.7; /** 1-beta: minimum weight for MI cost */ + int var_window = 9; + bool debug = false; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + +class StereoWADCensus { +public: + StereoWADCensus(); + ~StereoWADCensus(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + void setPrior(cv::InputArray disp) {}; + + struct Parameters { + int d_min = 0; + int d_max = 0; + float P1 = 1; + float P2 = 16; + float l1 = 1.0; + float l2 = 1.0; + int wsize = 31; + float uniqueness = std::numeric_limits<short>::max(); + int subpixel = 1; // subpixel interpolation method + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + + float alpha = 0.2; /** alpha: minimum weight for census cost */ + float beta = 0.7; /** 1-beta: minimum weight for MI cost */ + int var_window = 9; + bool debug = false; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + +class StereoVarCensus { +public: + StereoVarCensus(); + ~StereoVarCensus(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + void setPrior(cv::InputArray disp) {}; + + struct Parameters { + int d_min = 0; + int d_max = 0; + float P1 = 1; + float P2 = 16; + int wsize = 31; + float uniqueness = std::numeric_limits<short>::max(); + int subpixel = 1; /** subpixel interpolation method */ + bool lr_consistency = true; + int paths = AggregationDirections::HORIZONTAL | + AggregationDirections::VERTICAL | + AggregationDirections::DIAGONAL; + + /** normalization of variance to range [alpha, beta] */ + float alpha = 0.2; + float beta = 1.0; + /** variance window size */ + int var_window = 9; bool debug = false; }; Parameters params; @@ -174,6 +603,29 @@ private: Impl *impl_; }; +class StereoSad { +public: + StereoSad(); + ~StereoSad(); + + void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity); + void setPrior(cv::InputArray disp); + + struct Parameters { + int wsize = 63; + int d_min = 0; + int d_max = 0; + int subpixel = 1; // subpixel interpolation method + bool debug = false; + }; + Parameters params; + +private: + struct Impl; + Impl *impl_; +}; + + /** * SGM aggregation on ground truth cost */ @@ -204,7 +656,6 @@ private: Impl *impl_; }; - /** * Census + SGM + prior * diff --git a/lib/libstereo/include/stereo_types.hpp b/lib/libstereo/include/stereo_types.hpp index fc57ddcf960f647c2bae5d356a29963d37fdde9d..6eb259b9d447432178e7fb731b127ef592053df3 100644 --- a/lib/libstereo/include/stereo_types.hpp +++ b/lib/libstereo/include/stereo_types.hpp @@ -18,4 +18,10 @@ enum AggregationDirections : int { ALL = HORIZONTAL+VERTICAL+DIAGONAL, }; +enum class CensusPattern { + STANDARD, + GENERALISED, + STAR +}; + #endif diff --git a/lib/libstereo/middlebury/CMakeLists.txt b/lib/libstereo/middlebury/CMakeLists.txt index 3f5632c35ffb34bb037bde503328ca5cf7996402..a4e88a69fca1aa10b0fd2ac1a51d32033c01e6bf 100644 --- a/lib/libstereo/middlebury/CMakeLists.txt +++ b/lib/libstereo/middlebury/CMakeLists.txt @@ -5,4 +5,4 @@ add_executable (middlebury target_include_directories(middlebury PRIVATE ../include/) target_include_directories(middlebury PUBLIC ${OpenCV_INCLUDE_DIRS}) -target_link_libraries(middlebury libstereo pthread dl ${OpenCV_LIBS}) +target_link_libraries(middlebury libstereo pthread stdc++fs dl ${OpenCV_LIBS}) diff --git a/lib/libstereo/middlebury/algorithms.hpp b/lib/libstereo/middlebury/algorithms.hpp new file mode 100644 index 0000000000000000000000000000000000000000..604f0f57c73529ffa5ae6b88b9a42465f6e51a3f --- /dev/null +++ b/lib/libstereo/middlebury/algorithms.hpp @@ -0,0 +1,358 @@ +#pragma once + +#include <map> + +#include "middlebury.hpp" +#include "stereo.hpp" + +struct Algorithm { + virtual void run(const MiddleburyData &data, cv::Mat &disparity)=0; + float P1 = 4.0f; + float P2 = 80.0f; + bool subpixel = true; + bool lr_consistency = true; +}; + +namespace Impl { + + struct CensusSGM : public Algorithm { + CensusSGM() { P1 = 30.0f; P2 = 132.0f; } // Tuned to total error 2.0 + + virtual void run(const MiddleburyData &data, cv::Mat &disparity) override { + StereoCensusSgm stereo; + stereo.params.P1 = P1; + stereo.params.P2 = P2; + stereo.params.subpixel = subpixel; + stereo.params.lr_consistency = lr_consistency; + + stereo.params.debug = false; + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.compute(data.imL, data.imR, disparity); + } + }; + + struct ECensusSGM : public Algorithm { + ECensusSGM() { P1 = 8.0f; P2 = 40.0f; } // Tuned to total error 2.0 + + virtual void run(const MiddleburyData &data, cv::Mat &disparity) override { + StereoExCensusSgm stereo; + stereo.params.P1 = P1; + stereo.params.P2 = P2; + stereo.params.subpixel = subpixel; + stereo.params.lr_consistency = lr_consistency; + + stereo.params.debug = false; + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.params.alpha = 0.5f; + stereo.params.beta = 1.0f; + stereo.params.var_window = 7; + stereo.compute(data.imL, data.imR, disparity); + } + }; + + struct GCensusSGM : public Algorithm { + GCensusSGM() { P1 = 20.0f; P2 = 110.0f; } // Tuned to total error 2.0 + + virtual void run(const MiddleburyData &data, cv::Mat &disparity) override { + StereoCensusSgm stereo; + stereo.params.P1 = P1; + stereo.params.P2 = P2; + stereo.params.subpixel = subpixel; + stereo.params.lr_consistency = lr_consistency; + + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.params.pattern = CensusPattern::GENERALISED; + stereo.compute(data.imL, data.imR, disparity); + } + }; + + struct MeanCensusSGM : public Algorithm { + MeanCensusSGM() { P1 = 12.0f; P2 = 32.0f; } + + virtual void run(const MiddleburyData &data, cv::Mat &disparity) override { + StereoMeanCensusSgm stereo; + stereo.params.P1 = P1; + stereo.params.P2 = P2; + stereo.params.subpixel = subpixel; + stereo.params.lr_consistency = lr_consistency; + + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.params.debug = false; + stereo.compute(data.imL, data.imR, disparity); + } + }; + + struct StableSGM : public Algorithm { + StableSGM() { P1 = 1.0f; P2 = 8.0f; } + + virtual void run(const MiddleburyData &data, cv::Mat &disparity) override { + StereoStableSgm stereo; + stereo.params.P1 = P1; + stereo.params.P2 = P2; + stereo.params.subpixel = subpixel; + stereo.params.lr_consistency = lr_consistency; + stereo.params.wsize = 9; + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.params.debug = false; + stereo.compute(data.imL, data.imR, disparity); + } + }; + + struct HStableSGM : public Algorithm { + HStableSGM() { P1 = 3.0f; P2 = 24.0f; } + + virtual void run(const MiddleburyData &data, cv::Mat &disparity) override { + StereoHStableSgm stereo; + stereo.params.P1 = P1; + stereo.params.P2 = P2; + stereo.params.subpixel = subpixel; + stereo.params.lr_consistency = lr_consistency; + stereo.params.wsize = 7; + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.params.debug = false; + stereo.params.alpha = 0.5f; + stereo.params.beta = 1.0f; + stereo.compute(data.imL, data.imR, disparity); + } + }; + + struct BRefCensusSGM : public Algorithm { + BRefCensusSGM() { P1 = 12.0f; P2 = 32.0f; } + + virtual void run(const MiddleburyData &data, cv::Mat &disparity) override { + StereoBRefCensusSgm stereo; + stereo.params.P1 = P1; + stereo.params.P2 = P2; + stereo.params.subpixel = subpixel; + stereo.params.lr_consistency = lr_consistency; + + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.params.debug = false; + stereo.compute(data.imL, data.imR, disparity); + } + }; + + struct HCensusSGM : public Algorithm { + HCensusSGM() { P1 = 6.0f; P2 = 26.0f; } // Tuned to total error 2.0 + + virtual void run(const MiddleburyData &data, cv::Mat &disparity) override { + StereoHierCensusSgm stereo; + stereo.params.P1 = P1; + stereo.params.P2 = P2; + stereo.params.subpixel = subpixel; + stereo.params.lr_consistency = lr_consistency; + stereo.params.var_window = 5; + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.params.debug = false; + stereo.params.alpha = 0.5f; + stereo.params.beta = 1.0f; + + stereo.compute(data.imL, data.imR, disparity); + + /*cv::cuda::GpuMat gdisp; + cv::cuda::GpuMat gdisp2; + cv::cuda::GpuMat gl; + gl.upload(data.imL); + gdisp.create(gl.size(), CV_32F); + stereo.compute(data.imL, data.imR, gdisp); + + gdisp.convertTo(gdisp2, CV_16S); + auto blf = cv::cuda::createDisparityBilateralFilter(stereo.params.d_max-stereo.params.d_min+1,3,10); + blf->apply(gdisp2, gl, gdisp2); + gdisp2.convertTo(gdisp, CV_32F); + gdisp.download(disparity);*/ + } + }; + + struct HWCensusSGM : public Algorithm { + HWCensusSGM() { P1 = 8.0f; P2 = 38.0f; } // Tuned to total error 2.0 + + virtual void run(const MiddleburyData &data, cv::Mat &disparity) override { + StereoHierWeightCensusSgm stereo; + stereo.params.P1 = P1; + stereo.params.P2 = P2; + stereo.params.subpixel = subpixel; + stereo.params.lr_consistency = lr_consistency; + stereo.params.var_window = 5; + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.params.debug = false; + stereo.params.alpha = 0.5f; + stereo.params.beta = 1.0f; + + stereo.compute(data.imL, data.imR, disparity); + } + }; + + struct HGCensusSGM : public Algorithm { + HGCensusSGM() { P1 = 36.0f; P2 = 96.0f; } + + virtual void run(const MiddleburyData &data, cv::Mat &disparity) override { + StereoHierCensusSgm stereo; + stereo.params.P1 = P1; + stereo.params.P2 = P2; + stereo.params.subpixel = subpixel; + stereo.params.lr_consistency = lr_consistency; + + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.params.debug = false; + stereo.params.alpha = 0.5f; + stereo.params.beta = 1.0f; + stereo.params.pattern = CensusPattern::GENERALISED; + stereo.compute(data.imL, data.imR, disparity); + } + }; + + struct CPCensusSGM : public Algorithm { + CPCensusSGM() { P1 = 4.0f; P2 = 60.0f; } + + virtual void run(const MiddleburyData &data, cv::Mat &disparity) override { + StereoCPCensusSgm stereo; + stereo.params.P1 = P1; + stereo.params.P2 = P2; + stereo.params.subpixel = subpixel; + stereo.params.lr_consistency = lr_consistency; + + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.params.debug = false; + stereo.params.debug = false; + stereo.params.uniqueness = 20; + stereo.params.filtercosts = false; + stereo.params.alpha = 0.2f; + stereo.params.beta = 1.0f; + stereo.params.var_window = 7; + for (int i=0; i<10; ++i) { + if (i == 10-1) { + stereo.params.uniqueness = std::numeric_limits<unsigned short>::max(); + stereo.params.filtercosts = false; + } + stereo.compute(data.imL, data.imR, disparity); + } + } + }; + + struct TCensusSGM : public Algorithm { + TCensusSGM() { P1 = 24.0f; P2 = 64.0f; } + + virtual void run(const MiddleburyData &data, cv::Mat &disparity) override { + StereoTCensusSgm stereo; + stereo.params.P1 = P1; + stereo.params.P2 = P2; + stereo.params.subpixel = subpixel; + stereo.params.lr_consistency = lr_consistency; + + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.params.debug = false; + stereo.compute(data.imL, data.imR, disparity); + } + }; + + struct WCensusSGM : public Algorithm { + WCensusSGM() { P1 = 12.0f; P2 = 32.0f; } + + virtual void run(const MiddleburyData &data, cv::Mat &disparity) override { + StereoWCensusSgm stereo; + stereo.params.P1 = P1; + stereo.params.P2 = P2; + stereo.params.subpixel = subpixel; + stereo.params.lr_consistency = lr_consistency; + + stereo.params.debug = false; + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.compute(data.imL, data.imR, disparity); + } + }; + + struct MiSGM : public Algorithm { + MiSGM() { P1 = 4.0f; P2 = 18.0f; } + + virtual void run(const MiddleburyData &data, cv::Mat &disparity) override { + StereoMiSgm stereo; + stereo.params.P1 = P1; + stereo.params.P2 = P2; + stereo.params.subpixel = subpixel; + //stereo.params.lr_consistency = lr_consistency; + + stereo.params.debug = false; + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.compute(data.imL, data.imR, disparity); + stereo.setPrior(disparity); + stereo.compute(data.imL, data.imR, disparity); + } + }; + + struct WADCensusSGM : public Algorithm { + WADCensusSGM() { P1 = 0.2f; P2 = 0.5f; } + + virtual void run(const MiddleburyData &data, cv::Mat &disparity) override { + StereoWADCensus stereo; + stereo.params.P1 = P1; + stereo.params.P2 = P2; + stereo.params.subpixel = subpixel; + //stereo.params.lr_consistency = lr_consistency; + + stereo.params.l1 = 1.0; + stereo.params.l2 = 0.0; + stereo.params.alpha = 0.4; + stereo.params.beta = 1.0; + stereo.params.wsize = 9; + stereo.params.var_window = 15; + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.params.debug = false; + stereo.compute(data.imL, data.imR, disparity); + } + }; + + struct VarCensusSGM : public Algorithm { + VarCensusSGM() { P1 = 12.0f; P2 = 52.0f; } // Tuned to total error 2.0 + + virtual void run(const MiddleburyData &data, cv::Mat &disparity) override { + StereoVarCensus stereo; + stereo.params.P1 = P1; + stereo.params.P2 = P2; + stereo.params.subpixel = subpixel; + stereo.params.lr_consistency = lr_consistency; + + stereo.params.alpha = 0.33; + stereo.params.beta = 1.0; + stereo.params.var_window = 27; + stereo.params.debug = false; + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.compute(data.imL, data.imR, disparity); + } + }; + +} + +static const std::map<std::string, Algorithm*> algorithms = { + { "censussgm", new Impl::CensusSGM() }, + { "mcensussgm", new Impl::MeanCensusSGM() }, + { "gcensussgm", new Impl::GCensusSGM() }, + { "ecensussgm", new Impl::ECensusSGM() }, + { "stablesgm", new Impl::StableSGM() }, + { "hstablesgm", new Impl::HStableSGM() }, + { "brefcensus", new Impl::BRefCensusSGM() }, + { "hgcensussgm", new Impl::HGCensusSGM() }, + { "hcensussgm", new Impl::HCensusSGM() }, + { "hwcensussgm", new Impl::HWCensusSGM() }, + { "cpcensussgm", new Impl::CPCensusSGM() }, + { "tcensussgm", new Impl::TCensusSGM() }, + { "wcensussgm", new Impl::WCensusSGM() }, + { "misgm", new Impl::MiSGM() }, + { "varcensus", new Impl::VarCensusSGM() }, +}; diff --git a/lib/libstereo/middlebury/main.cpp b/lib/libstereo/middlebury/main.cpp index d1e0529ccbcaa1825b994a7ab06af99795a3278c..75e68a99c0e5c851d5ce1f816c71f7b8994edaaa 100644 --- a/lib/libstereo/middlebury/main.cpp +++ b/lib/libstereo/middlebury/main.cpp @@ -1,132 +1,27 @@ +#include <unistd.h> + #include <opencv2/opencv.hpp> #include <vector> -#include <map> +#include <utility> #include <chrono> #include "middlebury.hpp" -#include "stereo.hpp" - -#include "../../components/common/cpp/include/ftl/config.h" - -/** - * @param disp disparity map - * @param out output parameter - * @param ndisp number of disparities - * - * If ndisp is greater than zero, color mapping is scaled to range [0, ndisp], - * otherwise scaling is automatically set to [0, max(disp)]. - */ -void colorize(const cv::Mat &disp, cv::Mat &out, int ndisp=-1) { - double dmin, dmax; - cv::minMaxLoc(disp, &dmin, &dmax); - cv::Mat dispf, dispc; - disp.convertTo(dispf, CV_32FC1); - dispf.convertTo(dispc, CV_8UC1, (1.0f / (ndisp > 0 ? (float) ndisp : dmax)) * 255.0f); - - #if OPENCV_VERSION >= 40102 - cv::applyColorMap(dispc, out, cv::COLORMAP_TURBO); - #else - cv::applyColorMap(dispc, out, cv::COLORMAP_INFERNO); - #endif -} +#include "algorithms.hpp" -//////////////////////////////////////////////////////////////////////////////// +#include <cuda_runtime.h> -struct MiddleburyData { - const std::string name; - const cv::Mat imL; - const cv::Mat imR; - const cv::Mat gtL; - const cv::Mat maskL; - const MiddEvalCalib calib; -}; +#include "../../components/common/cpp/include/ftl/config.h" struct Result { std::vector<MiddEvalResult> results; cv::Mat disp; }; -static void run_censussgm(MiddleburyData &data, cv::Mat &disparity) { - auto stereo = StereoCensusSgm(); - stereo.params.P1 = 4; - stereo.params.P2 = 18; - - stereo.params.d_min = data.calib.vmin; - stereo.params.d_max = data.calib.vmax; - stereo.params.subpixel = 1; - stereo.params.debug = false; - stereo.compute(data.imL, data.imR, disparity); -} - -static void run_misgm(MiddleburyData &data, cv::Mat &disparity) { - auto stereo = StereoMiSgm(); - stereo.params.P1 = 4; - stereo.params.P2 = 18; - - stereo.params.d_min = data.calib.vmin; - stereo.params.d_max = data.calib.vmax; - stereo.params.subpixel = 1; - stereo.params.debug = false; - stereo.compute(data.imL, data.imR, disparity); - stereo.setPrior(disparity); - stereo.compute(data.imL, data.imR, disparity); -} - - -static void run_gtsgm(MiddleburyData &data, cv::Mat &disparity) { - auto stereo = StereoGtSgm(); - stereo.params.P1 = 0.1; - stereo.params.P2 = 1.0; - - stereo.params.d_min = data.calib.vmin; - stereo.params.d_max = data.calib.vmax; - stereo.params.subpixel = 1; - stereo.params.debug = true; - stereo.setPrior(data.gtL); - stereo.compute(data.imL, data.imR, disparity); -} - -static const std::map<std::string, std::function<void(MiddleburyData&, cv::Mat&)>> algorithms = { - { "censussgm", run_censussgm }, - { "misgm", run_misgm }, -}; - -//////////////////////////////////////////////////////////////////////////////// - -MiddleburyData load_input(const std::string &path) { - cv::Mat imL; - cv::Mat imR; - cv::Mat gtL; - cv::Mat maskL; - - imL = cv::imread(path + std::string("im0.png")); - imR = cv::imread(path + std::string("im1.png")); - gtL = read_pfm(path + std::string("disp0.pfm")); - if (gtL.empty()) { - gtL = read_pfm(path + std::string("disp0GT.pfm")); - } - //maskL = cv::imread(argv[1] + std::string("mask0nocc.png"), cv::IMREAD_GRAYSCALE); - auto calib = read_calibration(path + std::string("calib.txt")); - - maskL.create(imL.size(), CV_8UC1); - maskL.setTo(cv::Scalar(255)); - - if (imL.empty() || imR.empty() || gtL.empty() || maskL.empty()) { - throw std::exception(); - } - - std::string name; - name = path.substr(0, path.size()-1); - name = name.substr(name.find_last_of("/") + 1); - - return {name, imL, imR, gtL, maskL, calib}; -} - -Result run_one(MiddleburyData &input, std::function<void(MiddleburyData&, cv::Mat&)> f) { +Result run_one(MiddleburyData &input, Algorithm *stereo) { Result result; - f(input, result.disp); - for (const float t : {4.0f, 1.0f, 0.5f, 0.25f}) { + stereo->run(input, result.disp); + for (const float t : {4.0f, 2.0f, 1.0f, 0.5f}) { result.results.push_back(evaluate(result.disp, input.gtL, input.maskL, t)); } return result; @@ -139,7 +34,7 @@ Result run_one(MiddleburyData &input, std::string name) { std::map<std::string, Result> run_all(MiddleburyData &input) { std::map<std::string, Result> results; - for (auto const& [name, f] : algorithms) { + for (auto &[name, f] : algorithms) { results[name] = run_one(input, f); } @@ -148,15 +43,38 @@ std::map<std::string, Result> run_all(MiddleburyData &input) { //////////////////////////////////////////////////////////////////////////////// +/** + * @param disp disparity map + * @param out output parameter + * @param ndisp number of disparities + * + * If ndisp is greater than zero, color mapping is scaled to range [0, ndisp], + * otherwise scaling is automatically set to [0, max(disp)]. + */ +void colorize(const cv::Mat &disp, cv::Mat &out, int ndisp=-1) { + double dmin, dmax; + cv::minMaxLoc(disp, &dmin, &dmax); + cv::Mat dispf, dispc; + disp.convertTo(dispf, CV_32FC1); + dispf.convertTo(dispc, CV_8UC1, (1.0f / (ndisp > 0 ? (float) ndisp : dmax)) * 255.0f); + + #if OPENCV_VERSION >= 40102 + cv::applyColorMap(dispc, out, cv::COLORMAP_TURBO); + #else + cv::applyColorMap(dispc, out, cv::COLORMAP_INFERNO); + #endif +} + cv::Mat visualize_error(const cv::Mat &disp, const cv::Mat >, float t) { cv::Mat err(gt.size(), CV_32FC1); cv::Mat err_color(gt.size(), CV_8UC1); + float bad_t = 0.75; cv::absdiff(gt, disp, err); err.setTo(t, err > t); - err += 1.0f; + err += bad_t; err.setTo(0, disp == 0.0); - err.convertTo(err, CV_8UC1, 255.0f/(t+1.0f)); + err.convertTo(err, CV_8UC1, 255.0f/(t+bad_t)); cv::applyColorMap(err, err_color, cv::COLORMAP_PLASMA); return err_color; @@ -167,8 +85,12 @@ void show_result(std::string name, const MiddleburyData &data, const Result &res cv::Mat disp_color(data.gtL.size(), CV_8UC1); colorize(result.disp, disp_color, data.calib.ndisp); - cv::imshow(data.name + " (disparity) - " + name, disp_color); - cv::imshow(data.name + " (error) - " + name, err_color); + cv::Mat tmp; + float scale = 1280.0f / float(disp_color.cols); + cv::resize(disp_color, tmp, cv::Size(disp_color.cols*scale, disp_color.rows*scale)); + cv::imshow(data.name + " (disparity) - " + name, tmp); + cv::resize(err_color, tmp, cv::Size(err_color.cols*scale, err_color.rows*scale)); + cv::imshow(data.name + " (error) - " + name, tmp); printf("\n%s: %s\n", name.c_str(), data.name.c_str()); int nt = result.results.size(); @@ -193,20 +115,139 @@ void show_result(std::string name, const MiddleburyData &data, const Result &res printf("\n"); printf("%-8s", "RMSE"); - for (auto res : result.results) { printf("%8.2fpx ", res.rms_good); } + for (auto res : result.results) { printf("%8.2fpx ", sqrtf(res.mse_good)); } printf("\n"); + + printf("%s: %2.2fpx, %s: %2.2fpx\n", "avgerr", result.results[0].avgerr, + "RMSE (total)", sqrtf(result.results[0].mse_total)); } -void show_results( const MiddleburyData &data, const std::map<std::string, Result> &results) { - for (auto const& [name, result] : results) { - show_result(name, data, result); - } +void save_result(std::string name, const MiddleburyData &data, const Result &result) { + cv::Mat err_color = visualize_error(result.disp, data.gtL, 4.0); + cv::Mat disp_color(data.gtL.size(), CV_8UC1); + colorize(result.disp, disp_color, data.calib.ndisp); + cv::imwrite(data.name + "_" + name + "_disp.png", disp_color); + cv::imwrite(data.name + "_" + name + "_err.png", err_color); } //////////////////////////////////////////////////////////////////////////////// -using cv::Mat; -using std::vector; +/** + * Calculate disparities and errors for all Middlebury pairs in paths using + * Algorithms in algorithms (if not specified, uses all available algorithms). + * Results visualized and error values printed to stdout. Visualization can be + * saved. + */ +void main_default(const std::vector<std::string> &paths, + const std::vector<std::string> &algorithms) { + + std::vector<std::pair<MiddleburyData, std::map<std::string, Result>>> results; + + int devices=0; + cudaGetDeviceCount(&devices); + cudaSetDevice(devices-1); + + for (const auto &dir : paths) { + auto input = load_input(dir); + std::map<std::string, Result> result; + + if (algorithms.size() == 0) { + result = run_all(input); + } + else { + for (const auto &algorithm : algorithms) { + result[algorithm] = run_one(input, algorithm); + } + } + + results.push_back(std::make_pair(input, result)); + } + + for (const auto &[input, data] : results) { + for (const auto &[name, result] : data) { + show_result(name, input, result); + } + } + + printf("\nPress S to save (visualization to working directory), ESC to exit\n"); + + while(uchar k = cv::waitKey()) { + if (k == 's') { + for (const auto &[input, data] : results) { + for (const auto &[name, result] : data) { + save_result(name, input, result); + } + } + std::cout << "Saved\n"; + } + else if (k == 27) { + return; + } + } +} + +/** + * Perform grid search for P1 and P2 with P2 >= P1 minimizing total (R)MSE for + * whole dataset. Should be used with masked ground truth. + * (occluded areas not useful) + */ +void main_gridsearch_p1p2(const std::vector<std::string> &paths, + const std::vector<std::string> &algorithms_str, + float P1_min=0, float P1_max=128, float P2_max=256, float step=1.0) { + + // Expects path to dataset directory. TODO: If empty, try loading paths + // separately (assume each path points to directory containing one dataset + // image pair) + auto dataset = load_directory(paths.at(0)); + + for (auto s : algorithms_str) { + Algorithm* stereo = algorithms.at(s); + // density aftel l/r cheeck will affect MSE, mask should be used to + // ignore occlusions + stereo->lr_consistency = false; + float err_min = INFINITY; + float best_P1 = 0.0f; + float best_P2 = 0.0f; + + // first axis: P1, second axis P2 + std::vector<std::vector<float>> search_results(P1_max/step); + for (auto &row : search_results) { + row = std::vector<float>(P2_max/step, 0.0f); + } + + for (float P1 = P1_min; P1 < P1_max; P1 += step) { + for (float P2 = P1; P2 < P2_max; P2 += step) { // constraint: P2 >= P1 + + stereo->P1 = P1; + stereo->P2 = P2; + float mse = 0.0; + float n = 0.0; + + for (auto &input : dataset) { + auto res = run_one(input, stereo); + auto &res2 = res.results[2]; + + float n_update = res2.n - res2.invalid; + n += n_update; + mse += res2.mse_total*n_update; + } + mse /= n; + search_results.at((P1 - P1_min)/step) + .at((P2 - P1_min)/step) = mse; + + if (mse < err_min) { + //printf("RMSE: %.4f (P1: %.0f, P2: %.0f)\n", sqrtf(mse), P1, P2); + err_min = mse; + best_P1 = P1; + best_P2 = P2; + } + } + } + + printf("Best paramters for %s: P1=%.0f, P2=%0.f\n", s.c_str(), best_P1, best_P2); + // TODO: save full results in csv (heatmap plot?) + } +} int main(int argc, char* argv[]) { #if USE_GPU @@ -215,28 +256,55 @@ int main(int argc, char* argv[]) { std::cout << "CPU VERSION\n"; #endif - if (argc < 2) { - std::cerr << "usage: path [algorithm]\n"; - return 1; + std::vector<std::string> paths; + std::vector<std::string> algorithms; + char test_type = 'd'; + int opt; + + while ((opt = getopt(argc, argv, "d:t:a:")) != -1) { + switch (opt) { + case 'a': + algorithms.push_back(optarg); + break; + + case 'd': + paths.push_back(optarg); + break; + + case 't': + test_type = optarg[0]; + break; + + default: /* '?' */ + printf("Usage: %s [-d directory] [-t test type]\n", argv[0]); + exit(EXIT_FAILURE); + } } - if (argc == 2) { - auto input = load_input(argv[1]); - auto results = run_all(input); - show_results(input, results); - while(cv::waitKey() != 27); - return 0; - } - else if (argc == 3) { - auto input = load_input(argv[1]); - auto result = run_one(input, argv[2]); - show_result(argv[2], input, result); - while(cv::waitKey() != 27); - return 0; + if (paths.size() == 0) { + printf("At least one directory required."); + exit(EXIT_FAILURE); } - // todo: save results + switch (test_type) { + case 'd': + /* Default: calculate disparities with given algorithms (or all if + * not specified) and visualize results */ + main_default(paths, algorithms); + exit(EXIT_SUCCESS); + break; + + case 'g': + /* grid search P1 and P2 for given algorithms + *(parse additional options using getops again?) */ + main_gridsearch_p1p2(paths, algorithms); + exit(EXIT_SUCCESS); + break; + + default: + printf("Unknown test type %c", test_type); + exit(EXIT_FAILURE); + } - std::cerr << "error"; - return 1; + return 0; } diff --git a/lib/libstereo/middlebury/middlebury.cpp b/lib/libstereo/middlebury/middlebury.cpp index e9c607c57030d76cea584c3954935ff2d8431d51..34d2892409f984d78052de944d3aaf1cd0a61b16 100644 --- a/lib/libstereo/middlebury/middlebury.cpp +++ b/lib/libstereo/middlebury/middlebury.cpp @@ -8,6 +8,10 @@ #include <vector> #include <opencv2/core.hpp> +#include <opencv2/imgcodecs.hpp> + +#include <experimental/filesystem> +namespace fs = std::experimental::filesystem; cv::Mat read_pfm(const std::string &filename) { cv::Mat im; @@ -114,11 +118,65 @@ MiddEvalCalib read_calibration(const std::string &filename) { } } + if (calib.vmax == 0) { + calib.vmax = calib.ndisp; + } + return calib; } +MiddleburyData load_input(const fs::path &path) { + cv::Mat imL; + cv::Mat imR; + cv::Mat gtL; + cv::Mat maskL; + + imL = cv::imread(path/"im0.png"); + imR = cv::imread(path/"im1.png"); + gtL = read_pfm(path/"disp0.pfm"); + if (gtL.empty()) { + gtL = read_pfm(path/std::string("disp0GT.pfm")); + } + if (gtL.empty()) { + gtL.create(imL.size(), CV_32FC1); + gtL.setTo(cv::Scalar(0.0f)); + } + + maskL = cv::imread(path/std::string("mask0nocc.png"), cv::IMREAD_GRAYSCALE); + if (maskL.empty()) { + maskL.create(imL.size(), CV_8UC1); + maskL.setTo(cv::Scalar(255)); + } + + auto calib = read_calibration(path/std::string("calib.txt")); + if (imL.empty() || imR.empty() || gtL.empty() || maskL.empty()) { + throw std::exception(); + } + + std::string name = path.filename() == "." ? + path.parent_path().filename().c_str() : + path.filename().c_str(); + + return {name, imL, imR, gtL, maskL, calib}; +} + +MiddleburyData load_input(const std::string &path) { + return load_input(fs::path(path)); +} + +std::vector<MiddleburyData> load_directory(const std::string &path) { + std::vector<MiddleburyData> res; + for(auto& p: fs::directory_iterator(path)) { + try { + res.push_back(load_input(p.path())); + } + catch (...) {} + } + return res; +} + MiddEvalResult evaluate(const cv::Mat &disp, const cv::Mat >disp, const cv::Mat &mask, float threshold) { - MiddEvalResult result {0, 0, 0, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; + MiddEvalResult result {0, 0, 0, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; if (gtdisp.type() != CV_32FC1) { throw std::exception(); } if (disp.type() != CV_32FC1) { throw std::exception(); } if (!mask.empty() && mask.type() != CV_8UC1) { throw std::exception(); } @@ -134,9 +192,9 @@ MiddEvalResult evaluate(const cv::Mat &disp, const cv::Mat >disp, const cv::Ma int good = 0; int &invalid = result.invalid; - float &rms_good = result.rms_good; - float &rms_bad = result.rms_bad; - + float &mse_good = result.mse_good; + float &mse_bad = result.mse_bad; + float &mse_total = result.mse_total; float serr = 0; // evaldisp.cpp from middlebury SDK @@ -161,12 +219,14 @@ MiddEvalResult evaluate(const cv::Mat &disp, const cv::Mat >disp, const cv::Ma n++; if (valid) { serr += err; + mse_total += err*err; + if (err > threshold) { bad++; - rms_bad += err*err; + mse_bad += err*err; } else { - rms_good += err*err; + mse_good += err*err; good++; } } else {// invalid (i.e. hole in sparse disp map) @@ -176,12 +236,13 @@ MiddEvalResult evaluate(const cv::Mat &disp, const cv::Mat >disp, const cv::Ma } } - rms_bad = sqrtf(rms_bad/float(bad)); - rms_good = sqrtf(rms_good/float(good)); + mse_bad = mse_bad/float(bad); + mse_good = mse_good/float(good); + mse_total = mse_total/float(n - invalid); + result.avgerr = serr/float(n - invalid); result.err_bad = float(bad)/float(n); result.err_invalid = float(invalid)/float(n); result.err_total = float(bad+invalid)/float(n); - float avgErr = serr / float(n - invalid); return result; } diff --git a/lib/libstereo/middlebury/middlebury.hpp b/lib/libstereo/middlebury/middlebury.hpp index 3355860eec91bc4f6f25bf0ae9e6d46156a983da..0816fa0f5ba15fc97abc6a64077b873db6f3033e 100644 --- a/lib/libstereo/middlebury/middlebury.hpp +++ b/lib/libstereo/middlebury/middlebury.hpp @@ -9,8 +9,10 @@ struct MiddEvalResult { float err_bad; float err_invalid; float err_total; - float rms_bad; - float rms_good; + float mse_bad; + float mse_good; + float mse_total; + float avgerr; float threshold; }; @@ -33,6 +35,25 @@ MiddEvalResult evaluate(const cv::Mat &disp, const cv::Mat >, const cv::Mat &m cv::Mat read_pfm(const std::string &filename); +struct MiddleburyData { + const std::string name; + const cv::Mat imL; + const cv::Mat imR; + const cv::Mat gtL; + const cv::Mat maskL; + const MiddEvalCalib calib; +}; + +/** Load one middlebury dataset image + * @param path path to image directory + */ +MiddleburyData load_input(const std::string &path); + +/** Load all middlebury dataset images + * @param path path to middlebury datase directory + */ +std::vector<MiddleburyData> load_directory(const std::string &path); + /** * Add gaussian noise to image. * diff --git a/lib/libstereo/src/aggregations/adaptive_penalty.hpp b/lib/libstereo/src/aggregations/adaptive_penalty.hpp index a978d8e36604cbb081a6997a824a257de9b4d2d0..2be27490c0818f3a0db477321a9849a0d7b0afcd 100644 --- a/lib/libstereo/src/aggregations/adaptive_penalty.hpp +++ b/lib/libstereo/src/aggregations/adaptive_penalty.hpp @@ -17,13 +17,13 @@ struct AdaptivePenaltySGM { const DSIIN in; typename Array2D<costtype_t>::Data min_cost_all; - const int P1; + const costtype_t P1; // Provided internally typename DisparitySpaceImage<costtype_t>::DataType out; typename DisparitySpaceImage<costtype_t>::DataType previous; typename DisparitySpaceImage<costtype_t>::DataType updates; - typename Array2D<uint8_t>::Data penalties; + typename Array2D<costtype_t>::Data penalties; struct PathData : BasePathData<costtype_t> { // Custom path data goes here... @@ -35,7 +35,7 @@ struct AdaptivePenaltySGM { struct DirectionData { DisparitySpaceImage<costtype_t> previous; DisparitySpaceImage<costtype_t> updates; - Array2D<uint8_t> penalties; + Array2D<costtype_t> penalties; }; /* Initialise buffers for a new path direction. */ diff --git a/lib/libstereo/src/aggregations/cp_sgm.hpp b/lib/libstereo/src/aggregations/cp_sgm.hpp new file mode 100644 index 0000000000000000000000000000000000000000..15efbad15bbaf9e41c5a435ea892a97828afa4da --- /dev/null +++ b/lib/libstereo/src/aggregations/cp_sgm.hpp @@ -0,0 +1,128 @@ +#ifndef _FTL_LIBSTEREO_AGGREGATIONS_CONFPRIOR_HPP_ +#define _FTL_LIBSTEREO_AGGREGATIONS_CONFPRIOR_HPP_ + +#include "../dsi.hpp" +#include "../array2d.hpp" + +namespace ftl { +namespace stereo { +namespace aggregations { + +template <typename DSIIN> +struct ConfidencePriorSGM { + typedef typename DSIIN::Type Type; + typedef typename DSIIN::Type costtype_t; + + // Provided externally + const DSIIN in; + typename Array2D<costtype_t>::Data min_cost_all; + typename Array2D<uchar>::Data conf_prior; + + const costtype_t P1; + const costtype_t P2; + + // Provided internally + typename DisparitySpaceImage<costtype_t>::DataType out; + typename DisparitySpaceImage<costtype_t>::DataType previous; + typename DisparitySpaceImage<costtype_t>::DataType updates; + + struct PathData : BasePathData<costtype_t> { + // Custom path data goes here... + costtype_t previous_cost_min; + costtype_t *previous; + costtype_t *current; + }; + + struct DirectionData { + DisparitySpaceImage<costtype_t> previous; + DisparitySpaceImage<costtype_t> updates; + }; + + /* Initialise buffers for a new path direction. */ + void direction(DirectionData &data, DisparitySpaceImage<costtype_t> &buffer) { + out = buffer.data(); + data.previous.create(out.width+out.height, 1, out.disp_min, out.disp_max); + data.updates.create(out.width+out.height, 1, out.disp_min, out.disp_max); + previous = data.previous.data(); + updates = data.updates.data(); + } + + /* Reset buffers to start a new path */ + __host__ __device__ inline void startPath(ushort2 pixel, ushort thread, ushort stride, PathData &data) { + data.previous = &previous(0,data.pathix,previous.disp_min); + data.current = &updates(0,data.pathix,updates.disp_min); + + for (int d=thread; d<=previous.disp_min; d+=stride) { + data.previous[d] = 0; + } + + // To ensure all threads have finished clearing the buffer + #ifdef __CUDA_ARCH__ + __syncwarp(); + #endif + } + + __host__ __device__ inline void endPath(ushort2 pixel, ushort thread, ushort stride, PathData &data) {} + + /* Main SGM cost aggregation function */ + __host__ __device__ inline costtype_t calculateCost(ushort2 pixel, int d, costtype_t *previous, int size, costtype_t previous_cost_min) { + const costtype_t L_min = + min(previous[d], + min(costtype_t(previous_cost_min + P2), + min(costtype_t(previous[min(d+1,size)] + P1), + costtype_t(previous[max(d-1,0)] + P1)) + ) + ); + + // Note: This clamping to min 0 does make a tiny difference + auto cost = L_min + in(pixel.y,pixel.x,d+in.disp_min); + return (cost > previous_cost_min) ? cost - previous_cost_min : 0; + } + + /* Stride over each disparity and calculate minimum cost */ + __host__ __device__ inline void operator()(ushort2 pixel, ushort thread, ushort stride, PathData &data) { + #ifndef __CUDA_ARCH__ + using std::min; + using std::max; + #endif + + const int d_stop = int(out.disp_max)-int(out.disp_min); + + costtype_t min_cost = 255; + + // For each disparity (striding the threads) + uchar p = conf_prior(pixel.y, pixel.x); + for (int d=thread; d<=d_stop; d+=stride) { + auto c = (p == 0) ? calculateCost(pixel, d, data.previous, d_stop, data.previous_cost_min) : (p == d+in.disp_min) ? 0 : P2; + + out(pixel.y,pixel.x,d+in.disp_min) += c; + data.current[d] = c; + + // Record the smallest disparity cost for this pixel + min_cost = (c < min_cost) ? c : min_cost; + } + + // WARP Aggregate on GPU only (assumes stride = 1 on CPU) + // Min cost from each thread must be combined for overall minimum + // Each thread then obtains thread global minimum + #ifdef __CUDA_ARCH__ + min_cost = warpMin(min_cost); + #else + // add assert + #endif + + data.previous_cost_min = min_cost; + min_cost_all(pixel.y,pixel.x) += min_cost; // atomic? + + // Swap current and previous cost buffers + costtype_t *tmp_ptr = const_cast<costtype_t *>(data.previous); + data.previous = data.current; + data.current = tmp_ptr; + } +}; + +} +} +} + +#endif diff --git a/lib/libstereo/src/aggregations/standard_sgm.hpp b/lib/libstereo/src/aggregations/standard_sgm.hpp index 2830351bcf327efda973a854029f63cec5c59bb4..32b41abcc2baab03a92905d3b87173e9fb2677cc 100644 --- a/lib/libstereo/src/aggregations/standard_sgm.hpp +++ b/lib/libstereo/src/aggregations/standard_sgm.hpp @@ -17,8 +17,8 @@ struct StandardSGM { const DSIIN in; typename Array2D<costtype_t>::Data min_cost_all; - const int P1; - const int P2; + const costtype_t P1; + const costtype_t P2; // Provided internally typename DisparitySpaceImage<costtype_t>::DataType out; diff --git a/lib/libstereo/src/algorithms/brefcensussgm.cu b/lib/libstereo/src/algorithms/brefcensussgm.cu new file mode 100644 index 0000000000000000000000000000000000000000..eea64c294e16957ccfa4b5e25c4256d89812fbf8 --- /dev/null +++ b/lib/libstereo/src/algorithms/brefcensussgm.cu @@ -0,0 +1,48 @@ +#include "stereo.hpp" +#include "stereosgm.hpp" +#include "../costs/census.hpp" +#include <opencv2/cudaimgproc.hpp> +#include <opencv2/highgui.hpp> + +struct StereoBRefCensusSgm::Impl : public StereoSgm<CensusMatchingCost, StereoBRefCensusSgm::Parameters> { + Array2D<uchar> l; + Array2D<uchar> r; + Array2D<uchar> bl; + Array2D<uchar> br; + + Impl(StereoBRefCensusSgm::Parameters ¶ms, int width, int height, int dmin, int dmax) : + StereoSgm(params, width, height, dmin, dmax), l(width, height), r(width, height), + bl(width, height), br(width, height) {} +}; + +StereoBRefCensusSgm::StereoBRefCensusSgm() : impl_(nullptr) { + impl_ = new Impl(params, 0, 0, 0, 0); +} + +void StereoBRefCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + + //cudaSetDevice(0); + + if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(params, l.cols(), l.rows(), params.d_min, params.d_max); + } + + mat2gray(l, impl_->l); + mat2gray(r, impl_->r); + + cv::cuda::bilateralFilter(impl_->l.toGpuMat(), impl_->bl.toGpuMat(), 5, 50, 100); + cv::cuda::bilateralFilter(impl_->r.toGpuMat(), impl_->br.toGpuMat(), 5, 50, 100); + + impl_->cost.set(impl_->bl, impl_->br, impl_->l, impl_->r); + + cudaSafeCall(cudaDeviceSynchronize()); + impl_->compute(disparity); +} + +StereoBRefCensusSgm::~StereoBRefCensusSgm() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/algorithms/censussgm.cu b/lib/libstereo/src/algorithms/censussgm.cu new file mode 100644 index 0000000000000000000000000000000000000000..3c15e5275f2eae48a07f2f7758c77a7fdb333c97 --- /dev/null +++ b/lib/libstereo/src/algorithms/censussgm.cu @@ -0,0 +1,40 @@ +#include "stereo.hpp" +#include "stereosgm.hpp" +#include "../costs/census.hpp" + +struct StereoCensusSgm::Impl : public StereoSgm<CensusMatchingCost, StereoCensusSgm::Parameters> { + Array2D<uchar> l; + Array2D<uchar> r; + + Impl(StereoCensusSgm::Parameters ¶ms, int width, int height, int dmin, int dmax) : + StereoSgm(params, width, height, dmin, dmax), l(width, height), r(width, height) {} +}; + +StereoCensusSgm::StereoCensusSgm() : impl_(nullptr) { + impl_ = new Impl(params, 0, 0, 0, 0); +} + +void StereoCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + + //cudaSetDevice(0); + + if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(params, l.cols(), l.rows(), params.d_min, params.d_max); + } + + mat2gray(l, impl_->l); + mat2gray(r, impl_->r); + impl_->cost.setPattern(params.pattern); + impl_->cost.set(impl_->l, impl_->r); + + cudaSafeCall(cudaDeviceSynchronize()); + impl_->compute(disparity); +} + +StereoCensusSgm::~StereoCensusSgm() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/algorithms/excensussgm.cu b/lib/libstereo/src/algorithms/excensussgm.cu new file mode 100644 index 0000000000000000000000000000000000000000..4ea34333e0358145fca859c06fd7e41048004174 --- /dev/null +++ b/lib/libstereo/src/algorithms/excensussgm.cu @@ -0,0 +1,101 @@ +#include "stereo.hpp" +#include "stereosgm.hpp" +#include "../costs/census.hpp" +#include "costs/scale.hpp" + +#include <opencv2/cudafilters.hpp> + +static void variance_mask(cv::InputArray in, cv::OutputArray out, int wsize=3) { + if (in.isGpuMat() && out.isGpuMat()) { + cv::cuda::GpuMat im; + cv::cuda::GpuMat im2; + cv::cuda::GpuMat mean; + cv::cuda::GpuMat mean2; + + mean.create(in.size(), CV_32FC1); + mean2.create(in.size(), CV_32FC1); + im2.create(in.size(), CV_32FC1); + + if (in.type() != CV_32FC1) { + in.getGpuMat().convertTo(im, CV_32FC1); + } + else { + im = in.getGpuMat(); + } + + cv::cuda::multiply(im, im, im2); + auto filter = cv::cuda::createBoxFilter(CV_32FC1, CV_32FC1, cv::Size(wsize,wsize)); + filter->apply(im, mean); // E[X] + filter->apply(im2, mean2); // E[X^2] + cv::cuda::multiply(mean, mean, mean); // (E[X])^2 + + // NOTE: floating point accuracy in subtraction + // (cv::cuda::createBoxFilter only supports 8 bit integer types) + cv::cuda::subtract(mean2, mean, out.getGpuMatRef()); // E[X^2] - (E[X])^2 + } + else { throw std::exception(); /* todo CPU version */ } +} + + +typedef unsigned short CostType; +typedef WeightedCost<ExpandingCensusMatchingCost, CostType> MatchingCost; + + +struct StereoExCensusSgm::Impl : public StereoSgm<MatchingCost, StereoExCensusSgm::Parameters> { + Array2D<uchar> l; + Array2D<uchar> r; + Array2D<float> variance; + Array2D<float> variance_r; + ExpandingCensusMatchingCost excensus; + + Impl(StereoExCensusSgm::Parameters ¶ms, int width, int height, int dmin, int dmax) : + StereoSgm(params, width, height, dmin, dmax), l(width, height), r(width, height), + variance(width,height), variance_r(width,height), + excensus(width, height, dmin, dmax) { + cost.setCost(excensus); + cost.setWeights(variance, variance_r); + } +}; + +StereoExCensusSgm::StereoExCensusSgm() : impl_(nullptr) { + impl_ = new Impl(params, 0, 0, 0, 0); +} + +void StereoExCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + + //cudaSetDevice(0); + + if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(params, l.cols(), l.rows(), params.d_min, params.d_max); + } + + mat2gray(l, impl_->l); + mat2gray(r, impl_->r); + + cv::cuda::GpuMat var_l = impl_->variance.toGpuMat(); + variance_mask(impl_->l.toGpuMat(), var_l, params.var_window); + cv::cuda::GpuMat var_r = impl_->variance_r.toGpuMat(); + variance_mask(impl_->r.toGpuMat(), var_r, params.var_window); + + cv::cuda::normalize(var_l, var_l, params.alpha, params.beta, cv::NORM_MINMAX, -1); + cv::cuda::normalize(var_r, var_r, params.alpha, params.beta, cv::NORM_MINMAX, -1); + + impl_->excensus.setPattern(params.pattern); + impl_->excensus.set(impl_->l, impl_->r); + impl_->cost.set(); + + cudaSafeCall(cudaDeviceSynchronize()); + impl_->compute(disparity); + + Array2D<ExpandingCensusMatchingCost::Type> dsitmp_dev(l.cols(), l.rows()); + dsi_slice(impl_->cost, impl_->wta.disparity, dsitmp_dev); + show_dsi_slice(dsitmp_dev.toGpuMat()); +} + +StereoExCensusSgm::~StereoExCensusSgm() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/algorithms/hcensussgm.cu b/lib/libstereo/src/algorithms/hcensussgm.cu new file mode 100644 index 0000000000000000000000000000000000000000..7cd4c00d799f339a1bdbcadf216661b95ff6b663 --- /dev/null +++ b/lib/libstereo/src/algorithms/hcensussgm.cu @@ -0,0 +1,131 @@ +#include "stereo.hpp" +#include "stereosgm.hpp" +#include "../costs/census.hpp" +#include "../costs/dual.hpp" +#include <opencv2/cudawarping.hpp> +#include <opencv2/cudafilters.hpp> +#include <opencv2/highgui.hpp> + +typedef MultiCostsWeighted<MiniCensusMatchingCost,3> MatchingCost; + +static void variance_mask(cv::InputArray in, cv::OutputArray out, int wsize=3) { + if (in.isGpuMat() && out.isGpuMat()) { + cv::cuda::GpuMat im; + cv::cuda::GpuMat im2; + cv::cuda::GpuMat mean; + cv::cuda::GpuMat mean2; + + mean.create(in.size(), CV_32FC1); + mean2.create(in.size(), CV_32FC1); + im2.create(in.size(), CV_32FC1); + + if (in.type() != CV_32FC1) { + in.getGpuMat().convertTo(im, CV_32FC1); + } + else { + im = in.getGpuMat(); + } + + cv::cuda::multiply(im, im, im2); + auto filter = cv::cuda::createBoxFilter(CV_32FC1, CV_32FC1, cv::Size(wsize,wsize)); + filter->apply(im, mean); // E[X] + filter->apply(im2, mean2); // E[X^2] + cv::cuda::multiply(mean, mean, mean); // (E[X])^2 + + // NOTE: floating point accuracy in subtraction + // (cv::cuda::createBoxFilter only supports 8 bit integer types) + cv::cuda::subtract(mean2, mean, out.getGpuMatRef()); // E[X^2] - (E[X])^2 + } + else { throw std::exception(); /* todo CPU version */ } +} + +struct StereoHierCensusSgm::Impl : public StereoSgm<MatchingCost, StereoHierCensusSgm::Parameters> { + MiniCensusMatchingCost cost_fine; + MiniCensusMatchingCost cost_medium; + MiniCensusMatchingCost cost_coarse; + Array2D<uchar> l; + Array2D<uchar> r; + Array2D<float> var_fine; + Array2D<float> var_medium; + Array2D<float> var_coarse; + + Impl(StereoHierCensusSgm::Parameters ¶ms, int width, int height, int dmin, int dmax) : + StereoSgm(params, width, height, dmin, dmax), + cost_fine(width, height, dmin, dmax), + cost_medium(width, height, dmin, dmax), + cost_coarse(width, height, dmin, dmax), + l(width, height), r(width, height), + var_fine(width, height), + var_medium(width, height), + var_coarse(width, height) { + cost.add(0, cost_fine, var_fine); + cost.add(1, cost_medium, var_medium); + cost.add(2, cost_coarse, var_coarse); + } +}; + +StereoHierCensusSgm::StereoHierCensusSgm() : impl_(nullptr) { + impl_ = new Impl(params, 0, 0, 0, 0); +} + +void StereoHierCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + + //cudaSetDevice(0); + + if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(params, l.cols(), l.rows(), params.d_min, params.d_max); + } + + mat2gray(l, impl_->l); + mat2gray(r, impl_->r); + timer_set(); + + static constexpr int DOWNSCALE_MEDIUM = 4; + static constexpr int DOWNSCALE_COARSE = 6; + + Array2D<uchar> medium_l(l.cols()/DOWNSCALE_MEDIUM, l.rows()/DOWNSCALE_MEDIUM); + Array2D<uchar> medium_r(r.cols()/DOWNSCALE_MEDIUM, r.rows()/DOWNSCALE_MEDIUM); + Array2D<uchar> coarse_l(l.cols()/DOWNSCALE_COARSE, l.rows()/DOWNSCALE_COARSE); + Array2D<uchar> coarse_r(r.cols()/DOWNSCALE_COARSE, r.rows()/DOWNSCALE_COARSE); + cv::cuda::resize(impl_->l.toGpuMat(), medium_l.toGpuMat(), cv::Size(medium_l.width, medium_r.height)); + cv::cuda::resize(impl_->r.toGpuMat(), medium_r.toGpuMat(), cv::Size(medium_r.width, medium_r.height)); + cv::cuda::resize(impl_->l.toGpuMat(), coarse_l.toGpuMat(), cv::Size(coarse_l.width, coarse_l.height)); + cv::cuda::resize(impl_->r.toGpuMat(), coarse_r.toGpuMat(), cv::Size(coarse_r.width, coarse_r.height)); + + cv::cuda::GpuMat var_fine = impl_->var_fine.toGpuMat(); + variance_mask(impl_->l.toGpuMat(), var_fine, params.var_window); + cv::cuda::normalize(var_fine, var_fine, params.alpha, params.beta, cv::NORM_MINMAX, -1); + + cv::cuda::GpuMat var_medium; // = impl_->var_medium.toGpuMat(); + variance_mask(medium_l.toGpuMat(), var_medium, params.var_window); + cv::cuda::normalize(var_medium, var_medium, params.alpha, params.beta, cv::NORM_MINMAX, -1); + cv::cuda::resize(var_medium, impl_->var_medium.toGpuMat(), cv::Size(l.cols(), l.rows())); + + cv::cuda::GpuMat var_coarse; // = impl_->var_coarse.toGpuMat(); + variance_mask(coarse_l.toGpuMat(), var_coarse, params.var_window); + cv::cuda::normalize(var_coarse, var_coarse, params.alpha, params.beta, cv::NORM_MINMAX, -1); + cv::cuda::resize(var_coarse, impl_->var_coarse.toGpuMat(), cv::Size(l.cols(), l.rows())); + + cv::Mat tmp; + impl_->var_coarse.toGpuMat().download(tmp); + cv::imshow("Var", tmp); + + // CT + impl_->cost_fine.set(impl_->l, impl_->r); + impl_->cost_medium.set(impl_->l, impl_->r, medium_l, medium_r); + impl_->cost_coarse.set(impl_->l, impl_->r, coarse_l, coarse_r); + impl_->cost.set(); + impl_->compute(disparity); + + Array2D<ExpandingCensusMatchingCost::Type> dsitmp_dev(l.cols(), l.rows()); + dsi_slice(impl_->cost, impl_->wta.disparity, dsitmp_dev); + show_dsi_slice(dsitmp_dev.toGpuMat()); +} + +StereoHierCensusSgm::~StereoHierCensusSgm() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/algorithms/hstablesgm.cu b/lib/libstereo/src/algorithms/hstablesgm.cu new file mode 100644 index 0000000000000000000000000000000000000000..f7e9732ea8e331e558ad33333e799f4507ed3808 --- /dev/null +++ b/lib/libstereo/src/algorithms/hstablesgm.cu @@ -0,0 +1,129 @@ +#include "stereo.hpp" +#include "stereosgm.hpp" +#include "../costs/stable.hpp" +#include "../costs/dual.hpp" +#include <opencv2/cudawarping.hpp> +#include <opencv2/cudafilters.hpp> +#include <opencv2/highgui.hpp> + +typedef MultiCostsWeighted<StableMatchingCost,3> MatchingCost; + +static void variance_mask(cv::InputArray in, cv::OutputArray out, int wsize=3) { + if (in.isGpuMat() && out.isGpuMat()) { + cv::cuda::GpuMat im; + cv::cuda::GpuMat im2; + cv::cuda::GpuMat mean; + cv::cuda::GpuMat mean2; + + mean.create(in.size(), CV_32FC1); + mean2.create(in.size(), CV_32FC1); + im2.create(in.size(), CV_32FC1); + + if (in.type() != CV_32FC1) { + in.getGpuMat().convertTo(im, CV_32FC1); + } + else { + im = in.getGpuMat(); + } + + cv::cuda::multiply(im, im, im2); + auto filter = cv::cuda::createBoxFilter(CV_32FC1, CV_32FC1, cv::Size(wsize,wsize)); + filter->apply(im, mean); // E[X] + filter->apply(im2, mean2); // E[X^2] + cv::cuda::multiply(mean, mean, mean); // (E[X])^2 + + // NOTE: floating point accuracy in subtraction + // (cv::cuda::createBoxFilter only supports 8 bit integer types) + cv::cuda::subtract(mean2, mean, out.getGpuMatRef()); // E[X^2] - (E[X])^2 + } + else { throw std::exception(); /* todo CPU version */ } +} + +struct StereoHStableSgm::Impl : public StereoSgm<MatchingCost, StereoHStableSgm::Parameters> { + StableMatchingCost cost_fine; + StableMatchingCost cost_medium; + StableMatchingCost cost_coarse; + Array2D<uchar> l; + Array2D<uchar> r; + Array2D<float> var_fine; + Array2D<float> var_medium; + Array2D<float> var_coarse; + + Impl(StereoHStableSgm::Parameters ¶ms, int width, int height, int dmin, int dmax) : + StereoSgm(params, width, height, dmin, dmax), + cost_fine(width, height, dmin, dmax), + cost_medium(width, height, dmin, dmax), + cost_coarse(width, height, dmin, dmax), + l(width, height), r(width, height), + var_fine(width, height), + var_medium(width, height), + var_coarse(width, height) { + cost.add(0, cost_fine, var_fine); + cost.add(1, cost_medium, var_medium); + cost.add(2, cost_coarse, var_coarse); + } +}; + +StereoHStableSgm::StereoHStableSgm() : impl_(nullptr) { + impl_ = new Impl(params, 0, 0, 0, 0); +} + +void StereoHStableSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + + //cudaSetDevice(0); + + if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(params, l.cols(), l.rows(), params.d_min, params.d_max); + } + + mat2gray(l, impl_->l); + mat2gray(r, impl_->r); + timer_set(); + + static constexpr int DOWNSCALE_MEDIUM = 4; + static constexpr int DOWNSCALE_COARSE = 6; + + Array2D<uchar> medium_l(l.cols()/DOWNSCALE_MEDIUM, l.rows()/DOWNSCALE_MEDIUM); + Array2D<uchar> medium_r(r.cols()/DOWNSCALE_MEDIUM, r.rows()/DOWNSCALE_MEDIUM); + Array2D<uchar> coarse_l(l.cols()/DOWNSCALE_COARSE, l.rows()/DOWNSCALE_COARSE); + Array2D<uchar> coarse_r(r.cols()/DOWNSCALE_COARSE, r.rows()/DOWNSCALE_COARSE); + cv::cuda::resize(impl_->l.toGpuMat(), medium_l.toGpuMat(), cv::Size(medium_l.width, medium_r.height)); + cv::cuda::resize(impl_->r.toGpuMat(), medium_r.toGpuMat(), cv::Size(medium_r.width, medium_r.height)); + cv::cuda::resize(impl_->l.toGpuMat(), coarse_l.toGpuMat(), cv::Size(coarse_l.width, coarse_l.height)); + cv::cuda::resize(impl_->r.toGpuMat(), coarse_r.toGpuMat(), cv::Size(coarse_r.width, coarse_r.height)); + + cv::cuda::GpuMat var_fine = impl_->var_fine.toGpuMat(); + variance_mask(impl_->l.toGpuMat(), var_fine, params.var_window); + cv::cuda::normalize(var_fine, var_fine, params.alpha, params.beta, cv::NORM_MINMAX, -1); + + cv::cuda::GpuMat var_medium; // = impl_->var_medium.toGpuMat(); + variance_mask(medium_l.toGpuMat(), var_medium, params.var_window); + cv::cuda::normalize(var_medium, var_medium, params.alpha, params.beta, cv::NORM_MINMAX, -1); + cv::cuda::resize(var_medium, impl_->var_medium.toGpuMat(), cv::Size(l.cols(), l.rows())); + + cv::cuda::GpuMat var_coarse; // = impl_->var_coarse.toGpuMat(); + variance_mask(coarse_l.toGpuMat(), var_coarse, params.var_window); + cv::cuda::normalize(var_coarse, var_coarse, params.alpha, params.beta, cv::NORM_MINMAX, -1); + cv::cuda::resize(var_coarse, impl_->var_coarse.toGpuMat(), cv::Size(l.cols(), l.rows())); + + cv::Mat tmp; + impl_->var_coarse.toGpuMat().download(tmp); + cv::imshow("Var", tmp); + + impl_->cost_fine.generateFilterMask(params.wsize, 16); + impl_->cost_medium.setFilter(impl_->cost_fine.getFilter()); + impl_->cost_coarse.setFilter(impl_->cost_fine.getFilter()); + impl_->cost_fine.set(impl_->l, impl_->r); + impl_->cost_medium.set(medium_l, medium_r, l.cols(), l.rows()); + impl_->cost_coarse.set(coarse_l, coarse_r, l.cols(), l.rows()); + impl_->cost.set(); + impl_->compute(disparity); +} + +StereoHStableSgm::~StereoHStableSgm() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/algorithms/hwcensussgm.cu b/lib/libstereo/src/algorithms/hwcensussgm.cu new file mode 100644 index 0000000000000000000000000000000000000000..e1a42e86092e34301eadcc7637d7fd8e07f886a0 --- /dev/null +++ b/lib/libstereo/src/algorithms/hwcensussgm.cu @@ -0,0 +1,131 @@ +#include "stereo.hpp" +#include "stereosgm.hpp" +#include "../costs/census.hpp" +#include "../costs/dual.hpp" +#include <opencv2/cudawarping.hpp> +#include <opencv2/cudafilters.hpp> +#include <opencv2/highgui.hpp> + +typedef MultiCostsWeighted<WeightedCensusMatchingCost,3> MatchingCost; + +static void variance_mask(cv::InputArray in, cv::OutputArray out, int wsize=3) { + if (in.isGpuMat() && out.isGpuMat()) { + cv::cuda::GpuMat im; + cv::cuda::GpuMat im2; + cv::cuda::GpuMat mean; + cv::cuda::GpuMat mean2; + + mean.create(in.size(), CV_32FC1); + mean2.create(in.size(), CV_32FC1); + im2.create(in.size(), CV_32FC1); + + if (in.type() != CV_32FC1) { + in.getGpuMat().convertTo(im, CV_32FC1); + } + else { + im = in.getGpuMat(); + } + + cv::cuda::multiply(im, im, im2); + auto filter = cv::cuda::createBoxFilter(CV_32FC1, CV_32FC1, cv::Size(wsize,wsize)); + filter->apply(im, mean); // E[X] + filter->apply(im2, mean2); // E[X^2] + cv::cuda::multiply(mean, mean, mean); // (E[X])^2 + + // NOTE: floating point accuracy in subtraction + // (cv::cuda::createBoxFilter only supports 8 bit integer types) + cv::cuda::subtract(mean2, mean, out.getGpuMatRef()); // E[X^2] - (E[X])^2 + } + else { throw std::exception(); /* todo CPU version */ } +} + +struct StereoHierWeightCensusSgm::Impl : public StereoSgm<MatchingCost, StereoHierWeightCensusSgm::Parameters> { + WeightedCensusMatchingCost cost_fine; + WeightedCensusMatchingCost cost_medium; + WeightedCensusMatchingCost cost_coarse; + Array2D<uchar> l; + Array2D<uchar> r; + Array2D<float> var_fine; + Array2D<float> var_medium; + Array2D<float> var_coarse; + + Impl(StereoHierWeightCensusSgm::Parameters ¶ms, int width, int height, int dmin, int dmax) : + StereoSgm(params, width, height, dmin, dmax), + cost_fine(width, height, dmin, dmax), + cost_medium(width, height, dmin, dmax), + cost_coarse(width, height, dmin, dmax), + l(width, height), r(width, height), + var_fine(width, height), + var_medium(width, height), + var_coarse(width, height) { + cost.add(0, cost_fine, var_fine); + cost.add(1, cost_medium, var_medium); + cost.add(2, cost_coarse, var_coarse); + } +}; + +StereoHierWeightCensusSgm::StereoHierWeightCensusSgm() : impl_(nullptr) { + impl_ = new Impl(params, 0, 0, 0, 0); +} + +void StereoHierWeightCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + + //cudaSetDevice(0); + + if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(params, l.cols(), l.rows(), params.d_min, params.d_max); + } + + mat2gray(l, impl_->l); + mat2gray(r, impl_->r); + timer_set(); + + static constexpr int DOWNSCALE_MEDIUM = 4; + static constexpr int DOWNSCALE_COARSE = 6; + + Array2D<uchar> medium_l(l.cols()/DOWNSCALE_MEDIUM, l.rows()/DOWNSCALE_MEDIUM); + Array2D<uchar> medium_r(r.cols()/DOWNSCALE_MEDIUM, r.rows()/DOWNSCALE_MEDIUM); + Array2D<uchar> coarse_l(l.cols()/DOWNSCALE_COARSE, l.rows()/DOWNSCALE_COARSE); + Array2D<uchar> coarse_r(r.cols()/DOWNSCALE_COARSE, r.rows()/DOWNSCALE_COARSE); + cv::cuda::resize(impl_->l.toGpuMat(), medium_l.toGpuMat(), cv::Size(medium_l.width, medium_r.height)); + cv::cuda::resize(impl_->r.toGpuMat(), medium_r.toGpuMat(), cv::Size(medium_r.width, medium_r.height)); + cv::cuda::resize(impl_->l.toGpuMat(), coarse_l.toGpuMat(), cv::Size(coarse_l.width, coarse_l.height)); + cv::cuda::resize(impl_->r.toGpuMat(), coarse_r.toGpuMat(), cv::Size(coarse_r.width, coarse_r.height)); + + cv::cuda::GpuMat var_fine = impl_->var_fine.toGpuMat(); + variance_mask(impl_->l.toGpuMat(), var_fine, params.var_window); + cv::cuda::normalize(var_fine, var_fine, params.alpha, params.beta, cv::NORM_MINMAX, -1); + + cv::cuda::GpuMat var_medium; // = impl_->var_medium.toGpuMat(); + variance_mask(medium_l.toGpuMat(), var_medium, params.var_window); + cv::cuda::normalize(var_medium, var_medium, params.alpha, params.beta, cv::NORM_MINMAX, -1); + cv::cuda::resize(var_medium, impl_->var_medium.toGpuMat(), cv::Size(l.cols(), l.rows())); + + cv::cuda::GpuMat var_coarse; // = impl_->var_coarse.toGpuMat(); + variance_mask(coarse_l.toGpuMat(), var_coarse, params.var_window); + cv::cuda::normalize(var_coarse, var_coarse, params.alpha, params.beta, cv::NORM_MINMAX, -1); + cv::cuda::resize(var_coarse, impl_->var_coarse.toGpuMat(), cv::Size(l.cols(), l.rows())); + + cv::Mat tmp; + impl_->var_coarse.toGpuMat().download(tmp); + cv::imshow("Var", tmp); + + // CT + impl_->cost_fine.set(impl_->l, impl_->r); + impl_->cost_medium.set(impl_->l, impl_->r, medium_l, medium_r); + impl_->cost_coarse.set(impl_->l, impl_->r, coarse_l, coarse_r); + impl_->cost.set(); + impl_->compute(disparity); + + Array2D<ExpandingCensusMatchingCost::Type> dsitmp_dev(l.cols(), l.rows()); + dsi_slice(impl_->cost, impl_->wta.disparity, dsitmp_dev); + show_dsi_slice(dsitmp_dev.toGpuMat()); +} + +StereoHierWeightCensusSgm::~StereoHierWeightCensusSgm() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/algorithms/meancensussgm.cu b/lib/libstereo/src/algorithms/meancensussgm.cu new file mode 100644 index 0000000000000000000000000000000000000000..7058ba79f8f9432a61e8ac1e009cedd8d36ab52c --- /dev/null +++ b/lib/libstereo/src/algorithms/meancensussgm.cu @@ -0,0 +1,48 @@ +#include "stereo.hpp" +#include "stereosgm.hpp" +#include "../costs/census.hpp" +#include <opencv2/cudafilters.hpp> + +struct StereoMeanCensusSgm::Impl : public StereoSgm<CensusMatchingCost, StereoMeanCensusSgm::Parameters> { + Array2D<uchar> l; + Array2D<uchar> r; + Array2D<uchar> ml; + Array2D<uchar> mr; + + Impl(StereoMeanCensusSgm::Parameters ¶ms, int width, int height, int dmin, int dmax) : + StereoSgm(params, width, height, dmin, dmax), l(width, height), r(width, height), + ml(width, height), mr(width, height) {} +}; + +StereoMeanCensusSgm::StereoMeanCensusSgm() : impl_(nullptr) { + impl_ = new Impl(params, 0, 0, 0, 0); +} + +void StereoMeanCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + + //cudaSetDevice(0); + + if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(params, l.cols(), l.rows(), params.d_min, params.d_max); + } + + mat2gray(l, impl_->l); + mat2gray(r, impl_->r); + + auto filter = cv::cuda::createBoxFilter(CV_8UC1, CV_8UC1, cv::Size(5,5)); + filter->apply(impl_->l.toGpuMat(), impl_->ml.toGpuMat()); // E[X] + filter->apply(impl_->r.toGpuMat(), impl_->mr.toGpuMat()); // E[X] + + impl_->cost.set(impl_->ml, impl_->mr); + + cudaSafeCall(cudaDeviceSynchronize()); + impl_->compute(disparity); +} + +StereoMeanCensusSgm::~StereoMeanCensusSgm() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/algorithms/stablesgm.cu b/lib/libstereo/src/algorithms/stablesgm.cu new file mode 100644 index 0000000000000000000000000000000000000000..efaf20a289005f51c80711af4f6735efb2e8196c --- /dev/null +++ b/lib/libstereo/src/algorithms/stablesgm.cu @@ -0,0 +1,40 @@ +#include "stereo.hpp" +#include "stereosgm.hpp" +#include "../costs/stable.hpp" + +struct StereoStableSgm::Impl : public StereoSgm<StableMatchingCost, StereoStableSgm::Parameters> { + Array2D<uchar> l; + Array2D<uchar> r; + + Impl(StereoStableSgm::Parameters ¶ms, int width, int height, int dmin, int dmax) : + StereoSgm(params, width, height, dmin, dmax), l(width, height), r(width, height) {} +}; + +StereoStableSgm::StereoStableSgm() : impl_(nullptr) { + impl_ = new Impl(params, 0, 0, 0, 0); +} + +void StereoStableSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + + //cudaSetDevice(0); + + if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(params, l.cols(), l.rows(), params.d_min, params.d_max); + } + + mat2gray(l, impl_->l); + mat2gray(r, impl_->r); + impl_->cost.generateFilterMask(params.wsize, 16); // hardcoded in implementation + impl_->cost.set(impl_->l, impl_->r); + + cudaSafeCall(cudaDeviceSynchronize()); + impl_->compute(disparity); +} + +StereoStableSgm::~StereoStableSgm() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/algorithms/stereosgm.hpp b/lib/libstereo/src/algorithms/stereosgm.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2a6369df049e51f1136320810cd62abe34c5e1d2 --- /dev/null +++ b/lib/libstereo/src/algorithms/stereosgm.hpp @@ -0,0 +1,102 @@ +#pragma once + +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> + +#include <opencv2/core/cuda/common.hpp> +#include <opencv2/cudaarithm.hpp> +#include <opencv2/cudastereo.hpp> + +#include "stereo.hpp" + +#include "util_opencv.hpp" +#include "dsi.hpp" + +#include "wta.hpp" +#include "cost_aggregation.hpp" +#include "aggregations/standard_sgm.hpp" + +#include "median_filter.hpp" +#include "dsi_tools.hpp" + +#ifdef __GNUG__ + +#include <chrono> +#include <iostream> + +static std::chrono::time_point<std::chrono::system_clock> start; + +static void timer_set() { + start = std::chrono::high_resolution_clock::now(); +} + +static void timer_print(const std::string &msg, const bool reset=true) { + auto stop = std::chrono::high_resolution_clock::now(); + + char buf[24]; + snprintf(buf, sizeof(buf), "%5i ms ", + (int) std::chrono::duration_cast<std::chrono::milliseconds>(stop-start).count()); + + std::cout << buf << msg << "\n" << std::flush; + if (reset) { timer_set(); } +} + +#else + +static void timer_set() {} +static void timer_print(const std::string &msg, const bool reset=true) {} + +#endif + +template<typename MatchingCostT, typename ParamsT> +struct StereoSgm { + ParamsT ¶ms; + MatchingCostT cost; + Array2D<typename MatchingCostT::Type> cost_min_paths; + Array2D<typename MatchingCostT::Type> uncertainty; + + PathAggregator<ftl::stereo::aggregations::StandardSGM<typename MatchingCostT::DataType>> aggr; + WinnerTakesAll<DisparitySpaceImage<typename MatchingCostT::Type>, float> wta; + + StereoSgm(ParamsT ¶ms, int width, int height, int min_disp, int max_disp) : + params(params), + cost(width, height, min_disp, max_disp), + cost_min_paths(width, height), + uncertainty(width, height) + {} + + void compute(cv::OutputArray disparity) { + + // cost aggregation + ftl::stereo::aggregations::StandardSGM<typename MatchingCostT::DataType> func = {cost.data(), cost_min_paths.data(), params.P1, params.P2}; + auto &out = aggr(func, params.paths); + + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("Aggregation"); } + + wta(out, params.subpixel, params.lr_consistency); + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("WTA"); } + + // Drory, A., Haubold, C., Avidan, S., & Hamprecht, F. A. (2014). + // Semi-global matching: A principled derivation in terms of + // message passing. Lecture Notes in Computer Science (Including Subseries + // Lecture Notes in Artificial Intelligence and Lecture Notes in + // Bioinformatics). https://doi.org/10.1007/978-3-319-11752-2_4 + + #if USE_GPU + cv::cuda::GpuMat uncertainty_gpumat = uncertainty.toGpuMat(); + cv::cuda::subtract(wta.min_cost.toGpuMat(), cost_min_paths.toGpuMat(), uncertainty_gpumat); + cv::cuda::compare(uncertainty_gpumat, params.uniqueness, uncertainty_gpumat, cv::CMP_GT); + wta.disparity.toGpuMat().setTo(0, uncertainty_gpumat); + #else + cv::Mat uncertainty_mat = uncertainty.toMat(); + cv::subtract(wta.min_cost.toMat(), cost_min_paths.toMat(), uncertainty_mat); + cv::compare(uncertainty_mat, params.uniqueness, uncertainty, cv::CMP_GT); + wta.disparity.toMat().setTo(0, uncertainty_mat); + #endif + + median_filter(wta.disparity, disparity); + if (params.debug) { timer_print("median filter"); } + } +}; diff --git a/lib/libstereo/src/algorithms/tcensussgm.cu b/lib/libstereo/src/algorithms/tcensussgm.cu new file mode 100644 index 0000000000000000000000000000000000000000..6a449937a13936b0a3d7f141cf36ac5ef44e4ad1 --- /dev/null +++ b/lib/libstereo/src/algorithms/tcensussgm.cu @@ -0,0 +1,40 @@ +#include "stereo.hpp" +#include "stereosgm.hpp" +#include "../costs/tcensus.hpp" + +struct StereoTCensusSgm::Impl : public StereoSgm<TCensusMatchingCost, StereoTCensusSgm::Parameters> { + Array2D<uchar> l; + Array2D<uchar> r; + + Impl(StereoTCensusSgm::Parameters ¶ms, int width, int height, int dmin, int dmax) : + StereoSgm(params, width, height, dmin, dmax), l(width, height), r(width, height) {} +}; + +StereoTCensusSgm::StereoTCensusSgm() : impl_(nullptr) { + impl_ = new Impl(params, 0, 0, 0, 0); +} + +void StereoTCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + + //cudaSetDevice(0); + + if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(params, l.cols(), l.rows(), params.d_min, params.d_max); + } + + mat2gray(l, impl_->l); + mat2gray(r, impl_->r); + timer_set(); + + impl_->cost.setT(params.t); + impl_->cost.set(impl_->l, impl_->r); + impl_->compute(disparity); +} + +StereoTCensusSgm::~StereoTCensusSgm() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/costs/census.cu b/lib/libstereo/src/costs/census.cu index 3930e7333e4376ec3f8cb724066ddc2e1aef6ea2..f56cedf608d3eb87ccbdf078f563dccd1f85d688 100644 --- a/lib/libstereo/src/costs/census.cu +++ b/lib/libstereo/src/costs/census.cu @@ -7,44 +7,260 @@ #include <cuda_runtime.h> +template <typename T> +__host__ __device__ inline T square(T v) { return v*v; } + namespace algorithms { + /** Census transform. Bits stored in row major order. */ + template<int WINX, int WINY> + struct CensusTransformRowMajor { + __host__ __device__ inline void window(const int y, const int x, uint64_t* __restrict__ out) { + short center = im(y, x); + uint8_t i = 0; // bit counter for *out + + for (int wy = -WINY/2; wy <= WINY/2; wy++) { + for (int wx = -WINX/2; wx <= WINX/2; wx++) { + const int y_ = y + wy; + const int x_ = x + wx; + + // zero if first value, otherwise shift to left + if (i % 64 == 0) { *out = 0; } + else { *out = (*out << 1); } + *out |= (center < (im(y_,x_)) ? 1 : 0); + + i += 1; + // if all bits set, continue to next element + if (i % 64 == 0) { out++; } + } + } + } + + __host__ __device__ void operator()(ushort2 thread, ushort2 stride, ushort2 size) { + for (int y = thread.y+WINY/2; y<size.y-WINY/2-1; y+=stride.y) { + for (int x = thread.x+WINX/2; x<size.x-WINX/2-1; x+=stride.x) { + window(y, x, &(out(y, x*WSTEP))); + } + } + } + + Array2D<uchar>::Data im; + Array2D<uint64_t>::Data out; + + // number of uint64_t values for each window + static constexpr int WSTEP = (WINX*WINY - 1)/(sizeof(uint64_t)*8) + 1; + }; + + /** Census transform. Bits stored in row major order. This variant uses a + * nested window that expands with distance from center, where the + * absolute max difference to center is used from that window. */ template<int WINX, int WINY> - struct CensusTransform { - static_assert(WINX*WINY <= 64, "Census window is too large"); + struct ExCensusTransformRowMajor { + __host__ __device__ inline short subwindow(const int x, const int y, int wx, int wy, short center, ushort2 size) { + short value = center; - __host__ __device__ inline uint64_t ct_window(const int y, const int x) { - uint64_t ct = 0; - uchar center = im(y, x); + int nwx = 3*wx; //int(float(wx)*scale); + int nwy = 3*wy; //int(float(wy)*scale); + //int wsize = max(0,max(abs(wx),abs(wy))-2); + //if (x == 100 && y == 100) printf("Ex shape: %d, %d\n",nwx,nwy); + for (int dy=-max(0,abs(wy)-2); dy<=max(0,abs(wy)-2); ++dy) { + for (int dx=-max(0,abs(wx)-2); dx<=max(0,abs(wx)-2); ++dx) { + auto v = im(min(size.y, max(0,y+nwy+dy)), min(size.x, max(0,x+nwx+dx))); + if (abs(center-v) > abs(center-value)) value = v; + } + } + + return value; + } + + __host__ __device__ inline void window(const int y, const int x, uint64_t* __restrict__ out, ushort2 size) { + short center = im(y, x); + uint8_t i = 0; // bit counter for *out for (int wy = -WINY/2; wy <= WINY/2; wy++) { for (int wx = -WINX/2; wx <= WINX/2; wx++) { const int y_ = y + wy; const int x_ = x + wx; - ct = ct << 1; - ct |= (center < (im(y_,x_)) ? 1 : 0); + + // zero if first value, otherwise shift to left + if (i % 64 == 0) { *out = 0; } + else { *out = (*out << 1); } + *out |= (center < (subwindow(x,y,wx,wy,center,size)) ? 1 : 0); + + i += 1; + // if all bits set, continue to next element + if (i % 64 == 0) { out++; } + } + } + } + + __host__ __device__ void operator()(ushort2 thread, ushort2 stride, ushort2 size) { + for (int y = thread.y+WINY/2; y<size.y-WINY/2-1; y+=stride.y) { + for (int x = thread.x+WINX/2; x<size.x-WINX/2-1; x+=stride.x) { + window(y, x, &(out(y, x*WSTEP)), size); + } + } + } + + Array2D<uchar>::Data im; + Array2D<uint64_t>::Data out; + float scale; + + // number of uint64_t values for each window + static constexpr int WSTEP = (WINX*WINY - 1)/(sizeof(uint64_t)*8) + 1; + }; + + /* W. S. Fife and J. K. Archibald, + "Improved Census Transforms for Resource-Optimized Stereo Vision," + in IEEE Transactions on Circuits and Systems for Video Technology, + vol. 23, no. 1, pp. 60-73, Jan. 2013. */ + template<int WINX, int WINY> + struct GCensusTransformRowMajor { + __host__ __device__ inline void window(const int y, const int x, uint64_t* __restrict__ out) { + //short center = im(y, x); + uint8_t i = 0; // bit counter for *out + + for (int wy = -WINY/2; wy <= WINY/2; wy++) { + for (int wx = -WINX/2; wx <= WINX/2; wx++) { + //const int y_ = y + wy; + //const int x_ = x + wx; + + // zero if first value, otherwise shift to left + if (i % 64 == 0) { *out = 0; } + else { *out = (*out << 1); } + *out |= (im(y-wy,x-wx) < (im(y+wy,x+wx)) ? 1 : 0); + + i += 1; + // if all bits set, continue to next element + if (i % 64 == 0) { out++; } } } - return ct; } __host__ __device__ void operator()(ushort2 thread, ushort2 stride, ushort2 size) { - //for (int y = WINY/2+1; y < size.y - (WINY/2+1); y++) { - // for (int x = WINX/2+1; x < size.x - (WINX/2+1); x++) { for (int y = thread.y+WINY/2; y<size.y-WINY/2-1; y+=stride.y) { for (int x = thread.x+WINX/2; x<size.x-WINX/2-1; x+=stride.x) { - out(y, x) = ct_window(y, x); + window(y, x, &(out(y, x*WSTEP))); + } + } + } + + Array2D<uchar>::Data im; + Array2D<uint64_t>::Data out; + + // number of uint64_t values for each window + static constexpr int WSTEP = (WINX*WINY - 1)/(sizeof(uint64_t)*8) + 1; + }; + + /** Census transform. Bits stored in row major order. */ + template<int WINX, int WINY> + struct HCensusTransformRowMajor { + __host__ __device__ inline void window(short center, const int y, const int x, uint64_t* __restrict__ out) { + uint8_t i = 0; // bit counter for *out + + for (int wy = -WINY/2; wy <= WINY/2; wy++) { + for (int wx = -WINX/2; wx <= WINX/2; wx++) { + const uint y_ = min(uint(y + wy), hsize.y-1); + const uint x_ = min(uint(x + wx), hsize.x-1); + + // zero if first value, otherwise shift to left + if (i % 64 == 0) { *out = 0; } + else { *out = (*out << 1); } + *out |= (center < (neigh(y_,x_)) ? 1 : 0); + + i += 1; + // if all bits set, continue to next element + if (i % 64 == 0) { out++; } + } + } + } + + __host__ __device__ void operator()(ushort2 thread, ushort2 stride, ushort2 size) { + for (int y = thread.y; y<size.y; y+=stride.y) { + for (int x = thread.x; x<size.x; x+=stride.x) { + short center = im(y, x); + float ny = float(y) / float(size.y) * float(hsize.y); + float nx = float(x) / float(size.x) * float(hsize.x); + window(center, ny, nx, &(out(y, x*WSTEP))); } } } Array2D<uchar>::Data im; + Array2D<uchar>::Data neigh; Array2D<uint64_t>::Data out; + ushort2 hsize; + + // number of uint64_t values for each window + static constexpr int WSTEP = (WINX*WINY - 1)/(sizeof(uint64_t)*8) + 1; + }; + + /** Census transform. Bits stored in row major order. + @see GCensusTransform above. */ + template<int WINX, int WINY> + struct HGCensusTransformRowMajor { + __host__ __device__ inline void window(short2 center, ushort2 size, const int y, const int x, uint64_t* __restrict__ out) { + uint8_t i = 0; // bit counter for *out + + for (int wy = -WINY/2; wy <= WINY/2; wy++) { + for (int wx = -WINX/2; wx <= WINX/2; wx++) { + const uint ny = min(uint(y + wy), hsize.y-1); + const uint nx = min(uint(x + wx), hsize.x-1); + const uint cy = min(uint(center.y - wy), size.y-1); + const uint cx = min(uint(center.x - wx), size.x-1); + + // zero if first value, otherwise shift to left + if (i % 64 == 0) { *out = 0; } + else { *out = (*out << 1); } + *out |= (im(cy,cx) < (neigh(ny,nx)) ? 1 : 0); + + i += 1; + // if all bits set, continue to next element + if (i % 64 == 0) { out++; } + } + } + } + + __host__ __device__ void operator()(ushort2 thread, ushort2 stride, ushort2 size) { + for (int y = thread.y; y<size.y; y+=stride.y) { + for (int x = thread.x; x<size.x; x+=stride.x) { + //short center = im(y, x); + float ny = float(y) / float(size.y) * float(hsize.y); + float nx = float(x) / float(size.x) * float(hsize.x); + window({short(x),short(y)}, size, ny, nx, &(out(y, x*WSTEP))); + } + } + } + + Array2D<uchar>::Data im; + Array2D<uchar>::Data neigh; + Array2D<uint64_t>::Data out; + ushort2 hsize; + + // number of uint64_t values for each window + static constexpr int WSTEP = (WINX*WINY - 1)/(sizeof(uint64_t)*8) + 1; }; } void CensusMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r) { - parallel2D<algorithms::CensusTransform<9,7>>({l.data(), ct_l_.data()}, l.width, l.height); - parallel2D<algorithms::CensusTransform<9,7>>({r.data(), ct_r_.data()}, r.width, r.height); + if (pattern_ == CensusPattern::STANDARD) { + parallel2D<algorithms::CensusTransformRowMajor<9,7>>({l.data(), ct_l_.data()}, l.width, l.height); + parallel2D<algorithms::CensusTransformRowMajor<9,7>>({r.data(), ct_r_.data()}, r.width, r.height); + } else if (pattern_ == CensusPattern::GENERALISED) { + parallel2D<algorithms::GCensusTransformRowMajor<9,7>>({l.data(), ct_l_.data()}, l.width, l.height); + parallel2D<algorithms::GCensusTransformRowMajor<9,7>>({r.data(), ct_r_.data()}, r.width, r.height); + } else { + // TODO: + } +} + +void CensusMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r, const Array2D<uchar> &hl, const Array2D<uchar> &hr) { + if (pattern_ == CensusPattern::STANDARD) { + parallel2D<algorithms::HCensusTransformRowMajor<9,7>>({l.data(), hl.data(), ct_l_.data(), {ushort(hl.width), ushort(hl.height)}}, l.width, l.height); + parallel2D<algorithms::HCensusTransformRowMajor<9,7>>({r.data(), hr.data(), ct_r_.data(), {ushort(hr.width), ushort(hr.height)}}, r.width, r.height); + } else if (pattern_ == CensusPattern::GENERALISED) { + parallel2D<algorithms::HGCensusTransformRowMajor<9,7>>({l.data(), hl.data(), ct_l_.data(), {ushort(hl.width), ushort(hl.height)}}, l.width, l.height); + parallel2D<algorithms::HGCensusTransformRowMajor<9,7>>({r.data(), hr.data(), ct_r_.data(), {ushort(hr.width), ushort(hr.height)}}, r.width, r.height); + } } void CensusMatchingCost::set(cv::InputArray l, cv::InputArray r) { @@ -67,3 +283,119 @@ void CensusMatchingCost::set(cv::InputArray l, cv::InputArray r) { throw std::exception(); } } + +//////////////////////////////////////////////////////////////////////////////// + +void ExpandingCensusMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r) { + parallel2D<algorithms::ExCensusTransformRowMajor<9,9>>({l.data(), ct_l_.data(),1.5f}, l.width, l.height); + parallel2D<algorithms::ExCensusTransformRowMajor<9,9>>({r.data(), ct_r_.data(),1.5f}, r.width, r.height); +} + +/*void ExpandingCensusMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r, const Array2D<uchar> &hl, const Array2D<uchar> &hr) { + parallel2D<algorithms::HCensusTransformRowMajor<9,7>>({l.data(), hl.data(), ct_l_.data(), {ushort(hl.width), ushort(hl.height)}}, l.width, l.height); + parallel2D<algorithms::HCensusTransformRowMajor<9,7>>({r.data(), hr.data(), ct_r_.data(), {ushort(hr.width), ushort(hr.height)}}, r.width, r.height); +}*/ + +void ExpandingCensusMatchingCost::set(cv::InputArray l, cv::InputArray r) { + if (l.type() != CV_8UC1 || r.type() != CV_8UC1) { throw std::exception(); } + if (l.rows() != r.rows() || l.cols() != r.cols() || l.rows() != height() || l.cols() != width()) { + throw std::exception(); + } + + if (l.isGpuMat() && r.isGpuMat()) { + auto ml = l.getGpuMat(); + auto mr = r.getGpuMat(); + set(Array2D<uchar>(ml), Array2D<uchar>(mr)); + } + else if (l.isMat() && r.isMat()) { + auto ml = l.getMat(); + auto mr = r.getMat(); + set(Array2D<uchar>(ml), Array2D<uchar>(mr)); + } + else { + throw std::exception(); + } +} + +//////////////////////////////////////////////////////////////////////////////// + +void MiniCensusMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r) { + if (pattern_ == CensusPattern::STANDARD) { + parallel2D<algorithms::CensusTransformRowMajor<5,3>>({l.data(), ct_l_.data()}, l.width, l.height); + parallel2D<algorithms::CensusTransformRowMajor<5,3>>({r.data(), ct_r_.data()}, r.width, r.height); + } else if (pattern_ == CensusPattern::GENERALISED) { + parallel2D<algorithms::GCensusTransformRowMajor<5,3>>({l.data(), ct_l_.data()}, l.width, l.height); + parallel2D<algorithms::GCensusTransformRowMajor<5,3>>({r.data(), ct_r_.data()}, r.width, r.height); + } else { + // TODO: + } +} + +void MiniCensusMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r, const Array2D<uchar> &hl, const Array2D<uchar> &hr) { + if (pattern_ == CensusPattern::STANDARD) { + parallel2D<algorithms::HCensusTransformRowMajor<5,3>>({l.data(), hl.data(), ct_l_.data(), {ushort(hl.width), ushort(hl.height)}}, l.width, l.height); + parallel2D<algorithms::HCensusTransformRowMajor<5,3>>({r.data(), hr.data(), ct_r_.data(), {ushort(hr.width), ushort(hr.height)}}, r.width, r.height); + } else if (pattern_ == CensusPattern::GENERALISED) { + parallel2D<algorithms::HGCensusTransformRowMajor<5,3>>({l.data(), hl.data(), ct_l_.data(), {ushort(hl.width), ushort(hl.height)}}, l.width, l.height); + parallel2D<algorithms::HGCensusTransformRowMajor<5,3>>({r.data(), hr.data(), ct_r_.data(), {ushort(hr.width), ushort(hr.height)}}, r.width, r.height); + } +} + +void MiniCensusMatchingCost::set(cv::InputArray l, cv::InputArray r) { + if (l.type() != CV_8UC1 || r.type() != CV_8UC1) { throw std::exception(); } + if (l.rows() != r.rows() || l.cols() != r.cols() || l.rows() != height() || l.cols() != width()) { + throw std::exception(); + } + + if (l.isGpuMat() && r.isGpuMat()) { + auto ml = l.getGpuMat(); + auto mr = r.getGpuMat(); + set(Array2D<uchar>(ml), Array2D<uchar>(mr)); + } + else if (l.isMat() && r.isMat()) { + auto ml = l.getMat(); + auto mr = r.getMat(); + set(Array2D<uchar>(ml), Array2D<uchar>(mr)); + } + else { + throw std::exception(); + } +} + +//////////////////////////////////////////////////////////////////////////////// + +void WeightedCensusMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r) { + parallel2D<algorithms::CensusTransformRowMajor<7,7>>({l.data(), ct_l_.data()}, l.width, l.height); + parallel2D<algorithms::CensusTransformRowMajor<7,7>>({r.data(), ct_r_.data()}, r.width, r.height); +} + +void WeightedCensusMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r, const Array2D<uchar> &hl, const Array2D<uchar> &hr) { + //if (pattern_ == CensusPattern::STANDARD) { + parallel2D<algorithms::HCensusTransformRowMajor<7,7>>({l.data(), hl.data(), ct_l_.data(), {ushort(hl.width), ushort(hl.height)}}, l.width, l.height); + parallel2D<algorithms::HCensusTransformRowMajor<7,7>>({r.data(), hr.data(), ct_r_.data(), {ushort(hr.width), ushort(hr.height)}}, r.width, r.height); + //} else if (pattern_ == CensusPattern::GENERALISED) { + // parallel2D<algorithms::HGCensusTransformRowMajor<7,7>>({l.data(), hl.data(), ct_l_.data(), {ushort(hl.width), ushort(hl.height)}}, l.width, l.height); + // parallel2D<algorithms::HGCensusTransformRowMajor<7,7>>({r.data(), hr.data(), ct_r_.data(), {ushort(hr.width), ushort(hr.height)}}, r.width, r.height); + //} +} + +void WeightedCensusMatchingCost::set(cv::InputArray l, cv::InputArray r) { + if (l.type() != CV_8UC1 || r.type() != CV_8UC1) { throw std::exception(); } + if (l.rows() != r.rows() || l.cols() != r.cols() || l.rows() != height() || l.cols() != width()) { + throw std::exception(); + } + + if (l.isGpuMat() && r.isGpuMat()) { + auto ml = l.getGpuMat(); + auto mr = r.getGpuMat(); + set(Array2D<uchar>(ml), Array2D<uchar>(mr)); + } + else if (l.isMat() && r.isMat()) { + auto ml = l.getMat(); + auto mr = r.getMat(); + set(Array2D<uchar>(ml), Array2D<uchar>(mr)); + } + else { + throw std::exception(); + } +} diff --git a/lib/libstereo/src/costs/census.hpp b/lib/libstereo/src/costs/census.hpp index fc7be10d3bff600c8656419f933a9b1d63628382..4600b64a26674b5a23a9d77b65b210e618eabd63 100644 --- a/lib/libstereo/src/costs/census.hpp +++ b/lib/libstereo/src/costs/census.hpp @@ -4,70 +4,296 @@ #include <opencv2/core/core.hpp> #include "array2d.hpp" #include "dsbase.hpp" +#include <stereo_types.hpp> #include <cuda_runtime.h> - namespace impl { - struct CensusMatchingCost : DSImplBase<unsigned short> { + __host__ __device__ static inline uint64_t popcount(const uint64_t bits) { + #if defined(__CUDA_ARCH__) + return __popcll(bits); + #elif defined(_MSC_VER) + return __popcnt64(bits); + #elif defined(__GNUC__) + return __builtin_popcountl(bits); + #else + static_assert(false, "unsupported compiler (popcount intrinsic)"); + #endif + } + + /** + * Hamming cost, template parameter number of bits + */ + template<int SIZE> + struct HammingCost : DSImplBase<unsigned short> { + typedef unsigned short Type; + + HammingCost(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<Type>({w,h,dmin,dmax}) {} + HammingCost() : DSImplBase<Type>({0,0,0,0}) {} + + __host__ __device__ inline Type operator()(const int y, const int x, const int d) const { + if ((x-d) < 0) { return COST_MAX; } + unsigned short c = 0; + + #pragma unroll + for (int i = 0; i < WSTEP; i++) { + c+= popcount(l(y, x*WSTEP+i) ^ r(y, (x-d)*WSTEP+i)); + } + return c; + } + + // number of uint64_t values for each window + static constexpr int WSTEP = (SIZE - 1)/(sizeof(uint64_t)*8) + 1; + static constexpr Type COST_MAX = SIZE; + + Array2D<uint64_t>::Data l; + Array2D<uint64_t>::Data r; + }; + + template<uint8_t WW, uint8_t WH, int BPP=1> + using CensusMatchingCost = HammingCost<WW*WH*BPP>; + + template<uint8_t R, uint8_t NBINS> + struct WeightedCensusMatchingCost : DSImplBase<unsigned short> { + static_assert(R % 2 == 1, "R must be odd"); + typedef unsigned short Type; + + WeightedCensusMatchingCost() : DSImplBase<Type>({0,0,0,0}) {} + WeightedCensusMatchingCost(ushort w, ushort h, ushort dmin, ushort dmax) : + DSImplBase<unsigned short>({w,h,dmin,dmax}) { + + const float wbin = float(R)/(2.0f*float(NBINS)); // bin width + int step = 0; + int i = 0; + + for (int wy = -R/2; wy <= R/2; wy++) { + for (int wx = -R/2; wx <= R/2; wx++) { + const int bin = floor(sqrtf(wx*wx + wy*wy)/wbin); + masks[step][bin] |= uint64_t(1) << i; + + i += 1; + if (i % 64 == 0) { step++; i = 0; } + }} + + float weight = 1.0; + int bin = 0; + do { + weights[bin] = weight; + weight *= W; + bin++; + } while(bin != NBINS); + } + + WeightedCensusMatchingCost<R,NBINS> &operator=(const WeightedCensusMatchingCost<R,NBINS> &c) { + ct_l = c.ct_l; + ct_r = c.ct_r; + W = c.W; + memcpy(masks, c.masks, sizeof(masks)); + memcpy(weights, c.weights, sizeof(weights)); + return *this; + } + + __host__ __device__ inline unsigned short operator()(const int y, const int x, const int d) const { + if ((x-d) < 0) { return COST_MAX; } + float c = 0; + + #pragma unroll + for (int i = 0; i < WSTEP; i++) { + # pragma unroll + for (int bin = 0; bin < NBINS; bin++) { + uint64_t bits = ct_l(y, x*WSTEP+i) ^ ct_r(y, (x-d)*WSTEP+i); + bits &= masks[i][bin]; + c += float(popcount(bits)) * weights[bin]; + } + } + + return round(c); + } + + static constexpr int BPP = 1; + // number of uint64_t values for each window + static constexpr int WSTEP = (BPP*R*R - 1)/(sizeof(uint64_t)*8) + 1; + static constexpr Type COST_MAX = R*R*BPP; + + uint64_t masks[WSTEP][NBINS] = {0}; + float weights[NBINS] = {0.0f}; + float W = 0.75f; + + Array2D<uint64_t>::Data ct_l; + Array2D<uint64_t>::Data ct_r; + }; + + template<uint8_t WW, uint8_t WH, int BPP=1, bool RMASK=true> + struct CensusMaskMatchingCost : DSImplBase<unsigned short> { typedef unsigned short Type; - CensusMatchingCost(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<unsigned short>({w,h,dmin,dmax}) {} + CensusMaskMatchingCost(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<unsigned short>({w,h,dmin,dmax}) {} __host__ __device__ inline unsigned short operator()(const int y, const int x, const int d) const { - if ((x-d) < 0) { return 255; } // TODO: use window size - const uint64_t bits = - ct_l(y, x) ^ ct_r(y, x-d); - #if defined(__CUDA_ARCH__) - return __popcll(bits); - #elif defined(_MSC_VER) - return __popcnt64(bits); - #elif defined(__GNUC__) - return __builtin_popcountl(bits); - #else - static_assert(false, "unsupported compiler (popcount intrinsic)"); - #endif + if ((x-d) < 0) { return COST_MAX; } + unsigned short c = 0; + + #pragma unroll + for (int i = 0; i < WSTEP; i++) { + uint64_t mask = mask_l(y, x*WSTEP+i); + if (RMASK) { + /* intersection of l and r masks */ + mask &= mask_r(y, x*WSTEP+i); + } + + const uint64_t bits = + (ct_l(y, x*WSTEP+i) ^ ct_r(y, (x-d)*WSTEP+i)) & mask; + + c += popcount(bits); + } + return c; } - static constexpr unsigned short COST_MAX = 64; + // number of uint64_t values for each window + static constexpr int WSTEP = (BPP*WW*WH - 1)/(sizeof(uint64_t)*8) + 1; + static constexpr Type COST_MAX = WW*WH*BPP; Array2D<uint64_t>::Data ct_l; Array2D<uint64_t>::Data ct_r; + Array2D<uint64_t>::Data mask_l; + Array2D<uint64_t>::Data mask_r; }; } -class CensusMatchingCost : public DSBase<impl::CensusMatchingCost> { +class CensusMatchingCost : public DSBase<impl::CensusMatchingCost<9,7,1>> { public: - typedef impl::CensusMatchingCost DataType; + typedef impl::CensusMatchingCost<9,7,1> DataType; typedef unsigned short Type; CensusMatchingCost() : DSBase<DataType>(0, 0, 0, 0) {}; CensusMatchingCost(int width, int height, int disp_min, int disp_max) : DSBase<DataType>(width, height, disp_min, disp_max), - ct_l_(width, height), ct_r_(width,height), - //cost_max(wwidth * wheight), - ww_(9), wh_(7) + ct_l_(width*data().WSTEP, height), ct_r_(width*data().WSTEP,height) { - if (ww_ != 9 || wh_ != 7) { - // TODO: window size paramters (hardcoded) in matching_cost.cpp - throw std::exception(); - } - if (ww_*wh_ > 64) { - throw std::exception(); // todo: dynamic size - } + data().l = ct_l_.data(); + data().r = ct_r_.data(); + } + + inline void setPattern(CensusPattern p) { pattern_ = p; } + + void set(cv::InputArray l, cv::InputArray r); + void set(const Array2D<uchar>& l, const Array2D<uchar>& r); + void set(const Array2D<uchar> &l, const Array2D<uchar> &r, const Array2D<uchar> &hl, const Array2D<uchar> &hr); + static constexpr Type COST_MAX = DataType::COST_MAX; + +protected: + Array2D<uint64_t> ct_l_; + Array2D<uint64_t> ct_r_; + CensusPattern pattern_ = CensusPattern::STANDARD; +}; + +class MiniCensusMatchingCost : public DSBase<impl::CensusMatchingCost<5,3,1>> { +public: + typedef impl::CensusMatchingCost<5,3,1> DataType; + typedef unsigned short Type; + MiniCensusMatchingCost() : DSBase<DataType>(0, 0, 0, 0) {}; + MiniCensusMatchingCost(int width, int height, int disp_min, int disp_max) + : DSBase<DataType>(width, height, disp_min, disp_max), + ct_l_(width*data().WSTEP, height), ct_r_(width*data().WSTEP,height) + { + data().l = ct_l_.data(); + data().r = ct_r_.data(); + } + + inline void setPattern(CensusPattern p) { pattern_ = p; } + + void set(cv::InputArray l, cv::InputArray r); + void set(const Array2D<uchar>& l, const Array2D<uchar>& r); + void set(const Array2D<uchar> &l, const Array2D<uchar> &r, const Array2D<uchar> &hl, const Array2D<uchar> &hr); + static constexpr Type COST_MAX = DataType::COST_MAX; + +protected: + Array2D<uint64_t> ct_l_; + Array2D<uint64_t> ct_r_; + CensusPattern pattern_ = CensusPattern::STANDARD; +}; + +class ExpandingCensusMatchingCost : public DSBase<impl::WeightedCensusMatchingCost<9,3>> { +public: + typedef impl::WeightedCensusMatchingCost<9,3> DataType; + typedef unsigned short Type; + + ExpandingCensusMatchingCost() : DSBase<DataType>(0, 0, 0, 0) {}; + ExpandingCensusMatchingCost(int width, int height, int disp_min, int disp_max) + : DSBase<DataType>(width, height, disp_min, disp_max), + ct_l_(width*data().WSTEP, height), ct_r_(width*data().WSTEP,height) + { + data().ct_l = ct_l_.data(); + data().ct_r = ct_r_.data(); + } + + inline void setPattern(CensusPattern p) { pattern_ = p; } + + void set(cv::InputArray l, cv::InputArray r); + void set(const Array2D<uchar>& l, const Array2D<uchar>& r); + //void set(const Array2D<uchar> &l, const Array2D<uchar> &r, const Array2D<uchar> &hl, const Array2D<uchar> &hr); + static constexpr Type COST_MAX = DataType::COST_MAX; + +protected: + Array2D<uint64_t> ct_l_; + Array2D<uint64_t> ct_r_; + CensusPattern pattern_ = CensusPattern::STANDARD; +}; + +class WeightedCensusMatchingCost : public DSBase<impl::WeightedCensusMatchingCost<7, 3>> { +public: + typedef impl::WeightedCensusMatchingCost<7, 3> DataType; + typedef unsigned short Type; + + WeightedCensusMatchingCost() : DSBase<DataType>(0, 0, 0, 0) {}; + WeightedCensusMatchingCost(int width, int height, int disp_min, int disp_max) + : DSBase<DataType>(width, height, disp_min, disp_max), + ct_l_(width*data().WSTEP, height), ct_r_(width*data().WSTEP,height) + { data().ct_l = ct_l_.data(); data().ct_r = ct_r_.data(); } void set(cv::InputArray l, cv::InputArray r); void set(const Array2D<uchar>& l, const Array2D<uchar>& r); - static constexpr short COST_MAX = 9*7; // TODO: window size paramters (hardcoded) in matching_cost.cpp + void set(const Array2D<uchar> &l, const Array2D<uchar> &r, const Array2D<uchar> &hl, const Array2D<uchar> &hr); + static constexpr Type COST_MAX = DataType::COST_MAX; + +protected: + Array2D<uint64_t> ct_l_; + Array2D<uint64_t> ct_r_; +}; + +class CensusMaskMatchingCost : public DSBase<impl::CensusMaskMatchingCost<9,7,1>> { +public: + typedef impl::CensusMaskMatchingCost<9,7,1> DataType; + typedef unsigned short Type; + + CensusMaskMatchingCost() : DSBase<DataType>(0, 0, 0, 0) {}; + CensusMaskMatchingCost(int width, int height, int disp_min, int disp_max) + : DSBase<DataType>(width, height, disp_min, disp_max), + ct_l_(width*data().WSTEP, height), ct_r_(width*data().WSTEP,height), + mask_l_(width*data().WSTEP, height), mask_r_(width*data().WSTEP,height) + { + data().ct_l = ct_l_.data(); + data().ct_r = ct_r_.data(); + data().mask_l = mask_l_.data(); + data().mask_r = mask_r_.data(); + } + + void set(cv::InputArray l, cv::InputArray r); + void set(const Array2D<uint64_t>& l, const Array2D<uint64_t>& r); + + void setMask(cv::InputArray l, cv::InputArray r); + void setMask(const Array2D<uchar>& l, const Array2D<uchar>& r); + + static constexpr Type COST_MAX = DataType::COST_MAX; protected: Array2D<uint64_t> ct_l_; Array2D<uint64_t> ct_r_; - int ww_; - int wh_; + Array2D<uint64_t> mask_l_; + Array2D<uint64_t> mask_r_; }; #endif diff --git a/lib/libstereo/src/costs/dual.hpp b/lib/libstereo/src/costs/dual.hpp index 04d0831d2f8f40fcc6ddab2c8457154d6ae060c8..1e0a4774d5318a82742c5691d66aa10525025cae 100644 --- a/lib/libstereo/src/costs/dual.hpp +++ b/lib/libstereo/src/costs/dual.hpp @@ -11,29 +11,72 @@ namespace impl { template <typename A, typename B> struct DualCosts : DSImplBase<typename A::Type> { static_assert(std::is_same<typename A::Type, typename B::Type>::value, "Cost types must be the same"); - typedef unsigned short Type; + typedef typename A::Type Type; - DualCosts(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<unsigned short>({w,h,dmin,dmax}), + DualCosts(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<Type>({w,h,dmin,dmax}), cost_a(w,h,dmin,dmax), cost_b(w,h,dmin,dmax) {} - __host__ __device__ inline unsigned short operator()(const int y, const int x, const int d) const { + __host__ __device__ inline Type operator()(const int y, const int x, const int d) const { return cost_a(y,x,d) + cost_b(y,x,d); } A cost_a; B cost_b; + static constexpr Type COST_MAX = A::COST_MAX + B::COST_MAX; + }; + + template <typename A, int N> + struct MultiCosts : DSImplBase<typename A::Type> { + typedef typename A::Type Type; + + MultiCosts(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<Type>({w,h,dmin,dmax}) {} + + __host__ __device__ inline Type operator()(const int y, const int x, const int d) const { + Type cost = 0; + #pragma unroll + for (int n=0; n<N; ++n) { + cost += costs[n](y,x,d); + } + return cost; + } + + A costs[N]; + static constexpr Type COST_MAX = A::COST_MAX * N; + }; + + template <typename A, int N> + struct MultiCostsWeighted : DSImplBase<typename A::Type> { + typedef typename A::Type Type; + + MultiCostsWeighted(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<Type>({w,h,dmin,dmax}) {} + + __host__ __device__ inline Type operator()(const int y, const int x, const int d) const { + float cost = 0; + float pw = 1.0f; + #pragma unroll + for (int n=0; n<N; ++n) { + float w = weights[n](y,x); + cost += pw*w*float(costs[n](y,x,d)); + pw *= 1.0f-w; + } + return round(cost); + } + + A costs[N]; + Array2D<float>::Data weights[N]; + static constexpr Type COST_MAX = A::COST_MAX; }; template <typename A, typename B> struct DualCostsWeighted : DSImplBase<typename A::Type> { static_assert(std::is_same<typename A::Type, typename B::Type>::value, "Cost types must be the same"); - typedef unsigned short Type; + typedef typename A::Type Type; - DualCostsWeighted(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<unsigned short>({w,h,dmin,dmax}), + DualCostsWeighted(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<Type>({w,h,dmin,dmax}), cost_a(w,h,dmin,dmax), cost_b(w,h,dmin,dmax) {} - __host__ __device__ inline unsigned short operator()(const int y, const int x, const int d) const { - const float w = weights_l(y,x); + __host__ __device__ inline Type operator()(const int y, const int x, const int d) const { + const float w = max(weights_l(y,x), weights_r(y,x)); return cost_a(y,x,d)*w + cost_b(y,x,d)*(1.0f - w); } @@ -41,7 +84,9 @@ namespace impl { B cost_b; Array2D<float>::Data weights_l; Array2D<float>::Data weights_r; + static constexpr Type COST_MAX = A::COST_MAX + B::COST_MAX; }; + } @@ -49,7 +94,7 @@ template <typename A, typename B> class DualCosts : public DSBase<impl::DualCosts<typename A::DataType,typename B::DataType>> { public: typedef impl::DualCosts<typename A::DataType,typename B::DataType> DataType; - typedef unsigned short Type; + typedef typename A::DataType::Type Type; DualCosts(int width, int height, int disp_min, int disp_max, A &a, B &b) : DSBase<DataType>(width, height, disp_min, disp_max), @@ -60,13 +105,68 @@ public: this->data().cost_b = cost_b.data(); } - static constexpr short COST_MAX = A::COST_MAX + B::COST_MAX; + static constexpr Type COST_MAX = A::COST_MAX + B::COST_MAX; protected: A &cost_a; B &cost_b; }; +template <typename A, int N> +class MultiCosts : public DSBase<impl::MultiCosts<typename A::DataType,N>> { +public: + typedef impl::MultiCosts<typename A::DataType,N> DataType; + typedef typename A::DataType::Type Type; + + MultiCosts(int width, int height, int disp_min, int disp_max) + : DSBase<DataType>(width, height, disp_min, disp_max) {}; + + void add(int n, A &c) { + if (n >= N || n < 0) throw std::exception(); + costs[n] = &c; + } + + void set() { + for (int n=0; n<N; ++n) { + this->data().costs[n] = costs[n]->data(); + } + } + + static constexpr Type COST_MAX = A::COST_MAX * N; + +protected: + A *costs[N]; +}; + +template <typename A, int N> +class MultiCostsWeighted : public DSBase<impl::MultiCostsWeighted<typename A::DataType,N>> { +public: + typedef impl::MultiCostsWeighted<typename A::DataType,N> DataType; + typedef typename A::DataType::Type Type; + + MultiCostsWeighted(int width, int height, int disp_min, int disp_max) + : DSBase<DataType>(width, height, disp_min, disp_max) {}; + + void add(int n, A &c, Array2D<float> &w) { + if (n >= N || n < 0) throw std::exception(); + costs[n] = &c; + weights[n] = &w; + } + + void set() { + for (int n=0; n<N; ++n) { + this->data().costs[n] = costs[n]->data(); + this->data().weights[n] = weights[n]->data(); + } + } + + static constexpr Type COST_MAX = A::COST_MAX; + +protected: + A *costs[N]; + Array2D<float> *weights[N]; +}; + /** * Combine two cost functions with weight. Weight in Array2D<float> same size * as input. Weight values assumed in range [0,1] and cost is calculated as @@ -77,7 +177,7 @@ template <typename A, typename B> class DualCostsWeighted : public DSBase<impl::DualCostsWeighted<typename A::DataType,typename B::DataType>> { public: typedef impl::DualCostsWeighted<typename A::DataType,typename B::DataType> DataType; - typedef unsigned short Type; + typedef typename A::DataType::Type Type; DualCostsWeighted(int width, int height, int disp_min, int disp_max, A &a, B &b, Array2D<float> &weightsl, Array2D<float> &weightsr) : DSBase<DataType>(width, height, disp_min, disp_max), @@ -90,7 +190,7 @@ public: this->data().weights_r = weights_r.data(); } - static constexpr short COST_MAX = A::DataType::COST_MAX + B::DataType::COST_MAX; + static constexpr Type COST_MAX = A::DataType::COST_MAX + B::DataType::COST_MAX; protected: A &cost_a; diff --git a/lib/libstereo/src/costs/sad.cu b/lib/libstereo/src/costs/sad.cu new file mode 100644 index 0000000000000000000000000000000000000000..41f1e556423f9e9ea5dc32015d44b7b6be40e543 --- /dev/null +++ b/lib/libstereo/src/costs/sad.cu @@ -0,0 +1,58 @@ +#include "sad.hpp" + +#include "../util.hpp" +#include "../util_opencv.hpp" + +#include <opencv2/imgproc.hpp> +#include <opencv2/core.hpp> + +void calculate_ii(const cv::Mat &in, cv::Mat &out) { + out.create(in.size(), CV_32SC1); + if (in.type() != CV_8UC3) { throw std::exception(); } + + for (int y = 0; y < in.rows; y++) { + int sum = 0; + for (int x = 0; x < in.cols; x++) { + auto rgb = in.at<cv::Vec3b>(y,x); + sum += (rgb[0] + rgb[1] + rgb[2]); + if (y == 0) { + out.at<int32_t>(y,x) = sum; + } + else { + out.at<int32_t>(y,x) = sum + out.at<int32_t>(y-1,x); + } + } + } +} + +void SADMatchingCost::set(cv::InputArray l, cv::InputArray r) { + cv::Mat iil; + cv::Mat iir; + cv::Mat iml; + cv::Mat imr; + + if (l.isGpuMat() && r.isGpuMat()) { + l.getGpuMat().download(iml); + r.getGpuMat().download(imr); + } + else if (l.isMat() && r.isMat()) { + iml = l.getMat(); + imr = r.getMat(); + } + calculate_ii(iml, iil); + calculate_ii(imr, iir); + + l_.create(l.cols(), l.rows()); + r_.create(r.cols(), r.rows()); + + #if USE_GPU + l_.toGpuMat().upload(iil); + r_.toGpuMat().upload(iir); + #else + iil.copyTo(l_.toMat()); + iir.copyTo(r_.toMat()); + #endif + + data().l = l_.data(); + data().r = r_.data(); +} diff --git a/lib/libstereo/src/costs/sad.hpp b/lib/libstereo/src/costs/sad.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ace645a7ce3e456950885a1d808ac1882ca4a36d --- /dev/null +++ b/lib/libstereo/src/costs/sad.hpp @@ -0,0 +1,69 @@ +/** + * Sum of absolute differences matching cost implemented using integral images. + */ + +#ifndef _FTL_LIBSTEREO_COSTS_SAD_HPP_ +#define _FTL_LIBSTEREO_COSTS_SAD_HPP_ + +#include <opencv2/core/core.hpp> +#include "array2d.hpp" +#include "dsbase.hpp" + +#include <cuda_runtime.h> + +namespace impl { + struct SADMatchingCost : DSImplBase<float> { + typedef float Type; + + SADMatchingCost(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<float>({w,h,dmin,dmax}), ww(0), wh(0) {} + + __host__ __device__ inline float operator()(const int y, const int x, const int d) const { + if ((x-d-ww-1) < 0) { return COST_MAX; } + if ((x+ww) >= width) { return COST_MAX; } + if ((y-wh-1) < 0) { return COST_MAX; } + if ((y+wh) >= height) { return COST_MAX; } + + int wl = l(y+wh,x+ww) + l(y-wh-1,x-ww-1) + - l(y+wh, x-ww-1) - l(y-wh-1, x+ww); + + int wr = r(y+wh,x-d+ww) + r(y-wh-1,x-d-ww-1) + - r(y+wh, x-d-ww-1) - r(y-wh-1, x-d+ww); + + return abs(wl - wr)/float((2*ww+1)*(2*wh+1)); + } + + static constexpr Type COST_MAX = 255*3; + + Array2D<int32_t>::Data l; + Array2D<int32_t>::Data r; + uchar ww; + uchar wh; + }; +} + +class SADMatchingCost : public DSBase<impl::SADMatchingCost> { +public: + typedef impl::SADMatchingCost DataType; + typedef float Type; + + SADMatchingCost() : DSBase<DataType>(0, 0, 0, 0) {}; + SADMatchingCost(int width, int height, int disp_min, int disp_max) + : DSBase<DataType>(width, height, disp_min, disp_max) { + setWindow(0, 0); + } + + void set(cv::InputArray l, cv::InputArray r); + + void setWindow(int w, int h) { + data().ww = w/2; + data().wh = h/2; + } + + static constexpr Type COST_MAX = DataType::COST_MAX; + +protected: + Array2D<int32_t> l_; + Array2D<int32_t> r_; +}; + +#endif diff --git a/lib/libstereo/src/costs/scale.hpp b/lib/libstereo/src/costs/scale.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8ae4c44940ccd042ec9e228ec297e881c93f12d9 --- /dev/null +++ b/lib/libstereo/src/costs/scale.hpp @@ -0,0 +1,157 @@ +#ifndef _FTL_LIBSTEREO_COSTS_EXP_HPP_ +#define _FTL_LIBSTEREO_COSTS_EXP_HPP_ + +#include "array2d.hpp" +#include "dsbase.hpp" + +#include <cuda_runtime.h> + +namespace impl { + template <typename A> + struct ExpCost : DSImplBase<float> { + typedef float Type; + + ExpCost(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<float>({w,h,dmin,dmax}), + cost_a(w,h,dmin,dmax) {} + + __host__ __device__ inline float operator()(const int y, const int x, const int d) const { + return 1.0f-expf(-cost_a(y,x,d)*l); + } + + A cost_a; + float l=1.0; + static constexpr Type COST_MAX = A::COST_MAX; + }; + + template <typename A> + struct LinearCost : DSImplBase<float> { + typedef float Type; + + LinearCost(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<float>({w,h,dmin,dmax}), + cost_a(w,h,dmin,dmax) {} + + __host__ __device__ inline float operator()(const int y, const int x, const int d) const { + return cost_a(y,x,d)*s*a; + } + + A cost_a; + float a=1.0; + float s=1.0; + + static constexpr Type COST_MAX = A::COST_MAX; + }; + + template <typename A, typename T> + struct WeightedCost : DSImplBase<T> { + typedef T Type; + + WeightedCost(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<Type>({w,h,dmin,dmax}), + cost(w,h,dmin,dmax) {} + + __host__ __device__ inline Type operator()(const int y, const int x, const int d) const { + const float w = max(weights_l(y,x), weights_r(y,x)); + return round(cost(y,x,d)*w); + } + + A cost; + Array2D<float>::Data weights_l; + Array2D<float>::Data weights_r; + static constexpr Type COST_MAX = A::COST_MAX; + }; +} + +template <typename A> +class ExpCost : public DSBase<impl::ExpCost<typename A::DataType>> { +public: + typedef impl::ExpCost<typename A::DataType> DataType; + typedef float Type; + + ExpCost(int width, int height, int disp_min, int disp_max, A &a, float l) + : DSBase<DataType>(width, height, disp_min, disp_max), + cost_a(a) {}; + + void set() { + this->data().cost_a = cost_a.data(); + } + + void set(float l) { + this->data().cost_a = cost_a.data(); + this->data().l = l; + } + + static constexpr float COST_MAX = A::COST_MAX; + +protected: + A &cost_a; +}; + +template <typename A> +class LinearCost : public DSBase<impl::LinearCost<typename A::DataType>> { +public: + typedef impl::LinearCost<typename A::DataType> DataType; + typedef float Type; + + LinearCost(int width, int height, int disp_min, int disp_max, A &a, float l) + : DSBase<DataType>(width, height, disp_min, disp_max), + cost_a(a) { + + this->data().s = l; + this->data().a = 1.0f/COST_MAX; + } + + void set() { + this->data().cost_a = cost_a.data(); + } + + void set(float s) { + this->data().cost_a = cost_a.data(); + this->data().s = s; + } + + static const typename A::Type COST_MAX = A::COST_MAX; + +protected: + A &cost_a; +}; + + +template <typename A, typename T=unsigned short> +class WeightedCost : public DSBase<impl::WeightedCost<typename A::DataType, T>> { +public: + typedef impl::WeightedCost<typename A::DataType, T> DataType; + typedef T Type; + + WeightedCost(int width, int height, int disp_min, int disp_max, A &a, Array2D<float> &wl, Array2D<float> &wr) + : DSBase<DataType>(width, height, disp_min, disp_max), + cost(&a), weights_l(&wl), weights_r(&wr) { + + } + + WeightedCost(int width, int height, int disp_min, int disp_max) + : DSBase<DataType>(width, height, disp_min, disp_max), + cost(nullptr), weights_l(nullptr), weights_r(nullptr) { + + } + + void setCost(A &c) { cost = &c; } + + void setWeights(Array2D<float> &wl, Array2D<float> &wr) { + weights_l = &wl; + weights_r = ≀ + } + + void set() { + this->data().cost = cost->data(); + this->data().weights_l = weights_l->data(); + this->data().weights_r = weights_r->data(); + } + + static const T COST_MAX = A::COST_MAX; + +protected: + Array2D<float> *weights_l; + Array2D<float> *weights_r; + A *cost; +}; + +#endif diff --git a/lib/libstereo/src/costs/stable.cu b/lib/libstereo/src/costs/stable.cu new file mode 100644 index 0000000000000000000000000000000000000000..25111afb3b2004b555925de6fecb0ecda0cf517c --- /dev/null +++ b/lib/libstereo/src/costs/stable.cu @@ -0,0 +1,130 @@ +#include "stable.hpp" +#include "census.hpp" +#include "../util.hpp" +#include "../util_opencv.hpp" + +#include <opencv2/imgproc.hpp> +#include <opencv2/core.hpp> + +#include <random> +#include <chrono> + +#include <cuda_runtime.h> + +namespace algorithms { + template<int NBITS> + struct Stable { + __host__ __device__ inline void window(const int y, const int x, uint64_t* __restrict__ out) { + int32_t accumulator[NBITS] = {0}; + uint16_t i = 0; + for (int wy = -WINY/2; wy <= WINY/2; wy++) { + for (int wx = -WINX/2; wx <= WINX/2; wx++) { + const int16_t value = im(min(height,max(0,int(float(y)*scaleY) + wy)), min(width,max(0,int(float(x)*scaleX) + wx))); + const int16_t filter = filter_mask(0, i++); + const int16_t sign = filter > 0 ? 1 : -1; + // NOTE: indexing starts from 1 + const int16_t index = int(abs(filter)) - 1; + accumulator[index] += sign*value; + } + } + + for (i = 0; i < NBITS;) { + // zero if first value, otherwise shift to left + if (i % 64 == 0) { *out = 0; } + else { *out = (*out << 1); } + *out |= ((accumulator[i] > 0) ? 1 : 0); + + i += 1; + // if all bits set, continue to next element + if (i % 64 == 0) { out++; } + } + } + + __host__ __device__ void operator()(ushort2 thread, ushort2 stride, ushort2 size) { + for (int y = thread.y+WINY/2; y<size.y-WINY/2-1; y+=stride.y) { + for (int x = thread.x+WINX/2; x<size.x-WINX/2-1; x+=stride.x) { + window(y, x, &(out(y, x*WSTEP))); + } + } + } + + const Array2D<uchar>::Data im; + const Array2D<int16_t>::Data filter_mask; + Array2D<uint64_t>::Data out; + + const int WINX; + const int WINY; + float scaleX; + float scaleY; + const int width; + const int height; + + // number of uint64_t values for each window + const int WSTEP = (NBITS - 1)/(sizeof(uint64_t)*8) + 1; + }; + +} + +#include <iostream> +void StableMatchingCost::generateFilterMask(const int wsize, const int bits) { + if (bits > 127) { + // TODO: hardcoded in HammingCost template parameters + throw std::exception(); + } + cv::Mat mask(cv::Size(wsize*wsize, 1), CV_16SC1, cv::Scalar(0)); + if (!mask.isContinuous()) { throw std::exception(); } + + unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); + std::default_random_engine generator(seed); + std::uniform_int_distribution<int16_t> distribution(1, bits*2); + + for (int i = 0; i < mask.total(); i++) { + // index from 1 to bits (instead of 0 to bits-1) value truncated if + // outside window + int16_t val = distribution(generator); + if (val <= bits) { + mask.at<int16_t>(i) = ((i + val) > mask.total()) ? bits - 1 : val; + } + else { + val = -(val - bits); + mask.at<int16_t>(i) = ((i + val) < 0) ? 0 : val; + } + } + + wsize_ = wsize; + filter_mask_.create(wsize*wsize, 1); + filter_mask_.toGpuMat().upload(mask); +} + +void StableMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r) { + parallel2D<algorithms::Stable<16>>({l.data(), filter_mask_.data(), stable_l_.data(), wsize_, wsize_, 1.0f, 1.0f, l.width, l.height}, l.width, l.height); + parallel2D<algorithms::Stable<16>>({r.data(), filter_mask_.data(), stable_r_.data(), wsize_, wsize_, 1.0f, 1.0f, r.width, r.height}, r.width, r.height); +} + +void StableMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r, size_t w, size_t h) { + float scaleX = float(l.width) / float(w); + float scaleY = float(l.height) / float(h); + parallel2D<algorithms::Stable<16>>({l.data(), filter_mask_.data(), stable_l_.data(), wsize_, wsize_, scaleX, scaleY, l.width, l.height}, w, h); + parallel2D<algorithms::Stable<16>>({r.data(), filter_mask_.data(), stable_r_.data(), wsize_, wsize_, scaleX, scaleY, r.width, r.height}, w, h); +} + +void StableMatchingCost::set(cv::InputArray l, cv::InputArray r) { + if (l.type() != CV_8UC1 || r.type() != CV_8UC1) { throw std::exception(); } + if (l.rows() != r.rows() || l.cols() != r.cols() || l.rows() != height() || l.cols() != width()) { + throw std::exception(); + } + + if (l.isGpuMat() && r.isGpuMat()) { + auto ml = l.getGpuMat(); + auto mr = r.getGpuMat(); + set(Array2D<uchar>(ml), Array2D<uchar>(mr)); + } + else if (l.isMat() && r.isMat()) { + auto ml = l.getMat(); + auto mr = r.getMat(); + set(Array2D<uchar>(ml), Array2D<uchar>(mr)); + } + else { + throw std::exception(); + } +} diff --git a/lib/libstereo/src/costs/stable.hpp b/lib/libstereo/src/costs/stable.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7da102753505e668ec2e6dccd08af15cf92d8c24 --- /dev/null +++ b/lib/libstereo/src/costs/stable.hpp @@ -0,0 +1,41 @@ +#ifndef _FTL_LIBSTEREO_COSTS_STABLE_HPP_ +#define _FTL_LIBSTEREO_COSTS_STABLE_HPP_ + +#include "census.hpp" + +/** + * STABLE descriptor matching cost + */ +class StableMatchingCost : public DSBase<impl::HammingCost<16>> { +public: + typedef impl::HammingCost<16> DataType; + typedef DataType::Type Type; + + StableMatchingCost() : DSBase<DataType>(0, 0, 0, 0) {}; + StableMatchingCost(int width, int height, int disp_min, int disp_max) + : DSBase<DataType>(width, height, disp_min, disp_max), + wsize_(0), filter_mask_(0, 0), + stable_l_(width*data().WSTEP, height), + stable_r_(width*data().WSTEP, height) + { + data().l = stable_l_.data(); + data().r = stable_r_.data(); + } + + void generateFilterMask(const int wsize, const int bits); + void set(cv::InputArray l, cv::InputArray r); + void set(const Array2D<uchar>& l, const Array2D<uchar>& r); + void set(const Array2D<uchar>& l, const Array2D<uchar>& r, size_t w, size_t h); + static constexpr Type COST_MAX = DataType::COST_MAX; + + Array2D<int16_t> &getFilter() { return filter_mask_; } + void setFilter(const Array2D<int16_t> &f) { filter_mask_ = f; } + +protected: + int wsize_; + Array2D<int16_t> filter_mask_; + Array2D<uint64_t> stable_l_; + Array2D<uint64_t> stable_r_; +}; + +#endif diff --git a/lib/libstereo/src/costs/tcensus.cu b/lib/libstereo/src/costs/tcensus.cu new file mode 100644 index 0000000000000000000000000000000000000000..450ff8047e6f6e8e5b177249992c738d8f8fc25a --- /dev/null +++ b/lib/libstereo/src/costs/tcensus.cu @@ -0,0 +1,84 @@ +#include "tcensus.hpp" +#include "../util.hpp" +#include "../util_opencv.hpp" + +#include <opencv2/imgproc.hpp> +#include <opencv2/core.hpp> + +#include <cuda_runtime.h> + +namespace algorithms { + template<int WINX, int WINY> + struct TCensusTransform { + + __host__ __device__ inline void window(const int y, const int x, uint64_t* __restrict__ out) { + + static_assert(BPP == 2, "2 bits per pixel expected"); + short center = im(y, x); + uint8_t i = 0; // bit counter for *out + + for (int wy = -WINY/2; wy <= WINY/2; wy++) { + for (int wx = -WINX/2; wx <= WINX/2; wx++) { + const int y_ = y + wy; + const int x_ = x + wx; + + // If fist value, set zero. Otherwise shift left by BPP + if (i % 64 == 0) { *out = 0; } + else { *out = (*out << BPP); } + + if (center+t > im(y_,x_)) { *out |= 1; /* 01 */ } + else if (center-t < im(y_,x_)) { *out |= 2; /* 10 */ } + else { /* 00 */ } + + i += BPP; + // if all bits set, continue to next element + if (i % 64 == 0) { out++; } + } + } + } + + __host__ __device__ void operator()(ushort2 thread, ushort2 stride, ushort2 size) { + for (int y = thread.y+WINY/2; y<size.y-WINY/2-1; y+=stride.y) { + for (int x = thread.x+WINX/2; x<size.x-WINX/2-1; x+=stride.x) { + window(y, x, &(out(y, x*WSTEP))); + } + } + } + + Array2D<uchar>::Data im; + Array2D<uint64_t>::Data out; + short t; // intensity threshold + + // Bits per pixel (for each census feature). Must be 1 or power of 2 for + // window() to be correct (for tri-census)! + static constexpr int BPP = 2; + // number of uint64_t values for each window + static constexpr int WSTEP = (BPP*WINX*WINY-1)/(sizeof(uint64_t)*8) + 1; + }; +} + +void TCensusMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r) { + parallel2D<algorithms::TCensusTransform<9,7>>({l.data(), ct_l_.data(), t_}, l.width, l.height); + parallel2D<algorithms::TCensusTransform<9,7>>({r.data(), ct_r_.data(), t_}, r.width, r.height); +} + +void TCensusMatchingCost::set(cv::InputArray l, cv::InputArray r) { + if (l.type() != CV_8UC1 || r.type() != CV_8UC1) { throw std::exception(); } + if (l.rows() != r.rows() || l.cols() != r.cols() || l.rows() != height() || l.cols() != width()) { + throw std::exception(); + } + + if (l.isGpuMat() && r.isGpuMat()) { + auto ml = l.getGpuMat(); + auto mr = r.getGpuMat(); + set(Array2D<uchar>(ml), Array2D<uchar>(mr)); + } + else if (l.isMat() && r.isMat()) { + auto ml = l.getMat(); + auto mr = r.getMat(); + set(Array2D<uchar>(ml), Array2D<uchar>(mr)); + } + else { + throw std::exception(); + } +} diff --git a/lib/libstereo/src/costs/tcensus.hpp b/lib/libstereo/src/costs/tcensus.hpp new file mode 100644 index 0000000000000000000000000000000000000000..74000c06b86adcad616199c8f300efc5048a9b8b --- /dev/null +++ b/lib/libstereo/src/costs/tcensus.hpp @@ -0,0 +1,37 @@ +#ifndef _FTL_LIBSTEREO_COSTS_TCENSUS_HPP_ +#define _FTL_LIBSTEREO_COSTS_TCENSUS_HPP_ + +#include <opencv2/core/core.hpp> +#include "array2d.hpp" +#include "dsbase.hpp" +#include "census.hpp" + +#include <cuda_runtime.h> + +class TCensusMatchingCost : public DSBase<impl::CensusMatchingCost<9,7,2>> { +public: + typedef impl::CensusMatchingCost<9,7,2> DataType; + typedef unsigned short Type; + + TCensusMatchingCost() : DSBase<DataType>(0, 0, 0, 0) {}; + TCensusMatchingCost(int width, int height, int disp_min, int disp_max) + : DSBase<DataType>(width, height, disp_min, disp_max), + ct_l_(width*data().WSTEP, height), ct_r_(width*data().WSTEP,height), t_(0) + { + data().l = ct_l_.data(); + data().r = ct_r_.data(); + } + + void set(cv::InputArray l, cv::InputArray r); + void set(const Array2D<uchar>& l, const Array2D<uchar>& r); + void setT(uchar t) { t_ = t; } + + static constexpr Type COST_MAX = DataType::COST_MAX; + +protected: + Array2D<uint64_t> ct_l_; + Array2D<uint64_t> ct_r_; + uchar t_; +}; + +#endif diff --git a/lib/libstereo/src/dsi_tools.cu b/lib/libstereo/src/dsi_tools.cu new file mode 100644 index 0000000000000000000000000000000000000000..19c9388fb1425c606c1593a9efafee85ebd19e06 --- /dev/null +++ b/lib/libstereo/src/dsi_tools.cu @@ -0,0 +1,32 @@ +#include <opencv2/core/cuda/common.hpp> +#include "dsi_tools.hpp" +#include <opencv2/highgui.hpp> +#include <opencv2/imgproc.hpp> + +static void colorize(const cv::Mat &disp, cv::Mat &out, int ndisp=-1) { + double dmin, dmax; + cv::minMaxLoc(disp, &dmin, &dmax); + cv::Mat dispf, dispc; + disp.convertTo(dispf, CV_32FC1); + dispf.convertTo(dispc, CV_8UC1, (1.0f / (ndisp > 0 ? (float) ndisp : dmax)) * 255.0f); + + #if OPENCV_VERSION >= 40102 + cv::applyColorMap(dispc, out, cv::COLORMAP_TURBO); + #else + cv::applyColorMap(dispc, out, cv::COLORMAP_INFERNO); + #endif +} + +void show_dsi_slice(cv::InputArray in) { + cv::Mat dsitmp; + + if (in.isGpuMat()) { + in.getGpuMat().download(dsitmp); + } else { + dsitmp = in.getMat(); + } + float scale = 1280.0f / float(dsitmp.cols); + cv::resize(dsitmp, dsitmp, cv::Size(dsitmp.cols*scale, dsitmp.rows*scale)); + colorize(dsitmp,dsitmp); + cv::imshow("DSI Slice", dsitmp); +} diff --git a/lib/libstereo/src/dsi_tools.hpp b/lib/libstereo/src/dsi_tools.hpp new file mode 100644 index 0000000000000000000000000000000000000000..be27e02c43d3aabcc0f3fbe5435ccf4f9eb91d74 --- /dev/null +++ b/lib/libstereo/src/dsi_tools.hpp @@ -0,0 +1,39 @@ +#ifndef _FTL_LIBSTEREO_DSI_TOOLS_HPP_ +#define _FTL_LIBSTEREO_DSI_TOOLS_HPP_ + +#include "util.hpp" +#include "array2d.hpp" + +namespace algorithms { + template <typename DSI> + struct SliceDisparity { + typename DSI::DataType in; + typename Array2D<float>::Data disparity; + typename Array2D<typename DSI::Type>::Data out; + + __cuda__ void operator()(ushort2 thread, ushort2 stride, ushort2 size) { + for (int y=thread.y; y<size.y; y+=stride.y) { + for (int x=thread.x; x<size.x; x+=stride.x) { + auto d = disparity(y,x); + out(y,x) = (d >= in.disp_min && d <= in.disp_max) ? in(y,x,d) : 255; + } + } + } + }; +} + +template <typename DSI> +void dsi_slice(const DSI &in, int d, Array2D<typename DSI::Type> &out) { + +} + +/* Extract cost image from DSI using the disparity map as index. */ +template <typename DSI> +void dsi_slice(const DSI &in, Array2D<float> &disp, Array2D<typename DSI::Type> &out) { + algorithms::SliceDisparity<DSI> sd = {in.data(), disp.data(), out.data()}; + parallel2D(sd, in.width(), in.height()); +} + +void show_dsi_slice(cv::InputArray in); + +#endif \ No newline at end of file diff --git a/lib/libstereo/src/stereo_census_adaptive.cu b/lib/libstereo/src/stereo_census_adaptive.cu index d0c6f105c4578d17275a90503b47b2477063bbd1..79db2fe9299c2c67375c1557b74cf3fd03f63319 100644 --- a/lib/libstereo/src/stereo_census_adaptive.cu +++ b/lib/libstereo/src/stereo_census_adaptive.cu @@ -57,8 +57,8 @@ struct StereoCensusAdaptive::Impl { Array2D<unsigned short> uncertainty; Array2D<float> disparity_r; Array2D<uchar> l; - Array2D<uchar> r; - Array2D<uchar> penalty; + Array2D<uchar> r; + Array2D<unsigned short> penalty; PathAggregator<AdaptivePenaltySGM<CensusMatchingCost::DataType>> aggr; WinnerTakesAll<DSImage16U,float> wta; @@ -67,8 +67,8 @@ struct StereoCensusAdaptive::Impl { cost(width, height, min_disp, max_disp), cost_min_paths(width, height), uncertainty(width, height), - disparity_r(width, height), l(width, height), r(width, height), - penalty(width, height) {} + disparity_r(width, height), l(width, height), r(width, height), + penalty(width, height) {} }; @@ -92,17 +92,17 @@ void StereoCensusAdaptive::compute(cv::InputArray l, cv::InputArray r, cv::Outpu impl_->cost.set(impl_->l, impl_->r); cudaSafeCall(cudaDeviceSynchronize()); - if (params.debug) { timer_print("census transform"); } - - impl_->penalty.toGpuMat().setTo(params.P1*4); - auto canny = cv::cuda::createCannyEdgeDetector(30,120); - cv::cuda::GpuMat edges; - canny->detect(impl_->l.toGpuMat(), edges); - impl_->penalty.toGpuMat().setTo(int(float(params.P1)*1.5f), edges); - - cv::Mat penalties; - impl_->penalty.toGpuMat().download(penalties); - cv::imshow("Penalties", penalties); + if (params.debug) { timer_print("census transform"); } + + impl_->penalty.toGpuMat().setTo(params.P1*4); + auto canny = cv::cuda::createCannyEdgeDetector(30,120); + cv::cuda::GpuMat edges; + canny->detect(impl_->l.toGpuMat(), edges); + impl_->penalty.toGpuMat().setTo(int(float(params.P1)*1.5f), edges); + + cv::Mat penalties; + impl_->penalty.toGpuMat().download(penalties); + cv::imshow("Penalties", penalties); // cost aggregation AdaptivePenaltySGM<CensusMatchingCost::DataType> func = {impl_->cost.data(), impl_->cost_min_paths.data(), params.P1}; diff --git a/lib/libstereo/src/stereo_cp_censussgm.cu b/lib/libstereo/src/stereo_cp_censussgm.cu new file mode 100644 index 0000000000000000000000000000000000000000..bf63e6b3802c2d0997a642036eb6abc37ea68a8c --- /dev/null +++ b/lib/libstereo/src/stereo_cp_censussgm.cu @@ -0,0 +1,149 @@ +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> + +#include <opencv2/core/cuda/common.hpp> +#include <opencv2/cudaarithm.hpp> +#include <opencv2/highgui.hpp> + +#include "stereo.hpp" + +#include "util_opencv.hpp" +#include "costs/census.hpp" +#include "dsi.hpp" + +#include "wta.hpp" +#include "cost_aggregation.hpp" +#include "aggregations/cp_sgm.hpp" + +#include "median_filter.hpp" +#include "dsi_tools.hpp" + +#ifdef __GNUG__ + +#include <chrono> +#include <iostream> + +static std::chrono::time_point<std::chrono::system_clock> start; + +static void timer_set() { + start = std::chrono::high_resolution_clock::now(); +} + +static void timer_print(const std::string &msg, const bool reset=true) { + auto stop = std::chrono::high_resolution_clock::now(); + + char buf[24]; + snprintf(buf, sizeof(buf), "%5i ms ", + (int) std::chrono::duration_cast<std::chrono::milliseconds>(stop-start).count()); + + std::cout << buf << msg << "\n" << std::flush; + if (reset) { timer_set(); } +} + +#else + +static void timer_set() {} +static void timer_print(const std::string &msg, const bool reset=true) {} + +#endif + +using cv::Mat; +using cv::Size; +using ftl::stereo::aggregations::ConfidencePriorSGM; + +typedef CensusMatchingCost MatchingCost; + +struct StereoCPCensusSgm::Impl { + MatchingCost cost; + Array2D<MatchingCost::Type> cost_min_paths; + Array2D<MatchingCost::Type> uncertainty; + Array2D<uchar> l; + Array2D<uchar> r; + Array2D<uchar> conf_prior; + Array2D<float> disparity; + + PathAggregator<ConfidencePriorSGM<MatchingCost::DataType>> aggr; + WinnerTakesAll<DisparitySpaceImage<MatchingCost::Type>,float> wta; + + Impl(int width, int height, int min_disp, int max_disp) : + cost(width, height, min_disp, max_disp), + cost_min_paths(width, height), + uncertainty(width, height), + l(width, height), r(width, height), + conf_prior(width, height), + disparity(width, height) + {} + +}; + +StereoCPCensusSgm::StereoCPCensusSgm() : impl_(nullptr) { + impl_ = new Impl(0, 0, 0, 0); +} + +void StereoCPCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + cudaSetDevice(0); + + if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(l.cols(), l.rows(), params.d_min, params.d_max); + impl_->conf_prior.toGpuMat().setTo(0); + } + + mat2gray(l, impl_->l); + mat2gray(r, impl_->r); + timer_set(); + + // CT + impl_->cost.set(impl_->l, impl_->r); + + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("census transform"); } + + // cost aggregation + ConfidencePriorSGM<MatchingCost::DataType> func = {impl_->cost.data(), impl_->cost_min_paths.data(), impl_->conf_prior.data(), params.P1, params.P2}; + auto &out = impl_->aggr(func, params.paths); + + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("Aggregation"); } + + impl_->wta(out, params.subpixel, params.lr_consistency); + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("WTA"); } + + // Drory, A., Haubold, C., Avidan, S., & Hamprecht, F. A. (2014). + // Semi-global matching: A principled derivation in terms of + // message passing. Lecture Notes in Computer Science (Including Subseries + // Lecture Notes in Artificial Intelligence and Lecture Notes in + // Bioinformatics). https://doi.org/10.1007/978-3-319-11752-2_4 + + Array2D<MatchingCost::Type> dsitmp_dev(l.cols(), l.rows()); + + #if USE_GPU + auto uncertainty = impl_->uncertainty.toGpuMat(); + cv::cuda::subtract(impl_->wta.min_cost.toGpuMat(), impl_->cost_min_paths.toGpuMat(), uncertainty); + cv::cuda::compare(uncertainty, params.uniqueness, uncertainty, cv::CMP_GT); + impl_->wta.disparity.toGpuMat().setTo(0, uncertainty); + dsi_slice(out, impl_->wta.disparity, dsitmp_dev); + cv::cuda::compare(dsitmp_dev.toGpuMat(), float(params.P2)+2.0f*(float(MatchingCost::COST_MAX)*0.5f), uncertainty, cv::CMP_GT); + if (params.filtercosts) impl_->wta.disparity.toGpuMat().setTo(0, uncertainty); + #else + auto uncertainty = impl_->uncertainty.toMat(); + cv::subtract(impl_->wta.min_cost.toMat(), impl_->cost_min_paths.toMat(), uncertainty); + cv::compare(uncertainty, params.uniqueness, uncertainty, cv::CMP_GT); + impl_->wta.disparity.toMat().setTo(0, uncertainty); + #endif + + median_filter(impl_->wta.disparity, impl_->disparity.toGpuMat()); + impl_->wta.disparity.toGpuMat().convertTo(impl_->conf_prior.toGpuMat(), CV_8UC1); + impl_->disparity.toGpuMat().download(disparity); + if (params.debug) { timer_print("median filter"); } + + show_dsi_slice(dsitmp_dev.toGpuMat()); +} + +StereoCPCensusSgm::~StereoCPCensusSgm() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/stereo_sad.cu b/lib/libstereo/src/stereo_sad.cu new file mode 100644 index 0000000000000000000000000000000000000000..ef390db8f6048d9e0b1f07efdad250f2931091b5 --- /dev/null +++ b/lib/libstereo/src/stereo_sad.cu @@ -0,0 +1,93 @@ +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> + +#include <opencv2/core/cuda/common.hpp> +#include <opencv2/cudaarithm.hpp> + +#include "stereo.hpp" + +#include "util_opencv.hpp" +#include "costs/sad.hpp" +#include "dsi.hpp" + +#include "wta.hpp" + +#include "median_filter.hpp" + +#ifdef __GNUG__ + +#include <chrono> +#include <iostream> + +static std::chrono::time_point<std::chrono::system_clock> start; + +static void timer_set() { + start = std::chrono::high_resolution_clock::now(); +} + +static void timer_print(const std::string &msg, const bool reset=true) { + auto stop = std::chrono::high_resolution_clock::now(); + + char buf[24]; + snprintf(buf, sizeof(buf), "%5i ms ", + (int) std::chrono::duration_cast<std::chrono::milliseconds>(stop-start).count()); + + std::cout << buf << msg << "\n" << std::flush; + if (reset) { timer_set(); } +} + +#else + +static void timer_set() {} +static void timer_print(const std::string &msg, const bool reset=true) {} + +#endif + +using cv::Mat; +using cv::Size; + +struct StereoSad::Impl { + SADMatchingCost cost; + Array2D<float> disparity_r; + + WinnerTakesAll<SADMatchingCost, float> wta; + + Impl(int width, int height, int min_disp, int max_disp) : + cost(width, height, min_disp, max_disp), + disparity_r(width, height) {} + +}; + +StereoSad::StereoSad() : impl_(nullptr) { + impl_ = new Impl(0, 0, 0, 0); +} + +void StereoSad::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + cudaSetDevice(0); + + if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(l.cols(), l.rows(), params.d_min, params.d_max); + } + + timer_set(); + + impl_->cost.setWindow(params.wsize, params.wsize); + impl_->cost.set(l, r); + + cudaSafeCall(cudaDeviceSynchronize()); + + impl_->wta(impl_->cost, params.subpixel); + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("WTA"); } + + median_filter(impl_->wta.disparity, disparity); + if (params.debug) { timer_print("median filter"); } +} + +StereoSad::~StereoSad() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/stereo_varcensussgm.cu b/lib/libstereo/src/stereo_varcensussgm.cu new file mode 100644 index 0000000000000000000000000000000000000000..0bea1b7a0c4dc7f08f81c0927d71f4c6c20d6f7f --- /dev/null +++ b/lib/libstereo/src/stereo_varcensussgm.cu @@ -0,0 +1,179 @@ +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> +#include <opencv2/core/cuda/common.hpp> +#include <opencv2/cudaarithm.hpp> +#include <opencv2/cudafilters.hpp> + +#include "stereo.hpp" + +#include "util_opencv.hpp" +#include "dsi.hpp" + +#include "wta.hpp" +#include "cost_aggregation.hpp" +#include "aggregations/standard_sgm.hpp" +#include "aggregations/adaptive_penalty.hpp" +#include "median_filter.hpp" +#include "dsi_tools.hpp" + +#include "costs/census.hpp" +#include "costs/scale.hpp" +#include <chrono> +#include <iostream> + +static void timer_set() {} +static void timer_print(const std::string &msg, const bool reset=true) {} + +using cv::Mat; +using cv::Size; +using ftl::stereo::aggregations::AdaptivePenaltySGM; + +static void variance_mask(cv::InputArray in, cv::OutputArray out, int wsize=3) { + if (in.isGpuMat() && out.isGpuMat()) { + cv::cuda::GpuMat im; + cv::cuda::GpuMat im2; + cv::cuda::GpuMat mean; + cv::cuda::GpuMat mean2; + + mean.create(in.size(), CV_32FC1); + mean2.create(in.size(), CV_32FC1); + im2.create(in.size(), CV_32FC1); + + if (in.type() != CV_32FC1) { + in.getGpuMat().convertTo(im, CV_32FC1); + } + else { + im = in.getGpuMat(); + } + + cv::cuda::multiply(im, im, im2); + auto filter = cv::cuda::createBoxFilter(CV_32FC1, CV_32FC1, cv::Size(wsize,wsize)); + filter->apply(im, mean); // E[X] + filter->apply(im2, mean2); // E[X^2] + cv::cuda::multiply(mean, mean, mean); // (E[X])^2 + + // NOTE: floating point accuracy in subtraction + // (cv::cuda::createBoxFilter only supports 8 bit integer types) + cv::cuda::subtract(mean2, mean, out.getGpuMatRef()); // E[X^2] - (E[X])^2 + } + else { throw std::exception(); /* todo CPU version */ } +} + +typedef unsigned short CostType; +typedef WeightedCost<CensusMatchingCost, CostType> MatchingCost; + +struct StereoVarCensus::Impl { + CensusMatchingCost census; + Array2D<float> variance; + Array2D<float> variance_r; + MatchingCost cost; + + Array2D<CostType> penalty; + Array2D<CostType> cost_min; + Array2D<CostType> cost_min_paths; + Array2D<CostType> uncertainty; + Array2D<float> confidence; + + Array2D<uchar> l; + Array2D<uchar> r; + + Mat prior; // used only to calculate MI + + PathAggregator<AdaptivePenaltySGM<MatchingCost::DataType>> aggr; + + WinnerTakesAll<DSImage16U,float> wta; + + Impl(int width, int height, int min_disp, int max_disp) : + census(width, height, min_disp, max_disp), + variance(width, height), + variance_r(width, height), + cost(width, height, min_disp, max_disp, census, variance, variance_r), + + penalty(width, height), + cost_min(width, height), + cost_min_paths(width, height), + uncertainty(width, height), + confidence(width, height), + l(width, height), r(width, height) {} +}; + +StereoVarCensus::StereoVarCensus() : impl_(nullptr) { + impl_ = new Impl(0, 0, 0, 0); +} + +void StereoVarCensus::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { + cudaSetDevice(0); + + if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { + delete impl_; impl_ = nullptr; + impl_ = new Impl(l.cols(), l.rows(), params.d_min, params.d_max); + } + + mat2gray(l, impl_->l); + mat2gray(r, impl_->r); + + timer_set(); + + cv::cuda::GpuMat var_l = impl_->variance.toGpuMat(); + variance_mask(impl_->l.toGpuMat(), var_l, params.var_window); + cv::cuda::GpuMat var_r = impl_->variance_r.toGpuMat(); + variance_mask(impl_->r.toGpuMat(), var_r, params.var_window); + + cv::cuda::normalize(var_l, var_l, params.alpha, params.beta, cv::NORM_MINMAX, -1); + cv::cuda::normalize(var_r, var_r, params.alpha, params.beta, cv::NORM_MINMAX, -1); + + impl_->census.set(impl_->l, impl_->r); + impl_->cost.set(); + + if (params.debug) { timer_print("Matching cost"); } + + cudaSafeCall(cudaDeviceSynchronize()); + + auto penalty = impl_->penalty.toGpuMat(); + penalty.setTo(params.P2); + + AdaptivePenaltySGM<MatchingCost::DataType> func = {impl_->cost.data(), impl_->cost_min_paths.data(), short(params.P1)}; + + impl_->aggr.getDirectionData(AggregationDirections::LEFTRIGHT).penalties = impl_->penalty; + impl_->aggr.getDirectionData(AggregationDirections::RIGHTLEFT).penalties = impl_->penalty; + impl_->aggr.getDirectionData(AggregationDirections::UPDOWN).penalties = impl_->penalty; + impl_->aggr.getDirectionData(AggregationDirections::DOWNUP).penalties = impl_->penalty; + impl_->aggr.getDirectionData(AggregationDirections::TOPLEFTBOTTOMRIGHT).penalties = impl_->penalty; + impl_->aggr.getDirectionData(AggregationDirections::BOTTOMRIGHTTOPLEFT).penalties = impl_->penalty; + impl_->aggr.getDirectionData(AggregationDirections::BOTTOMLEFTTOPRIGHT).penalties = impl_->penalty; + impl_->aggr.getDirectionData(AggregationDirections::TOPRIGHTBOTTOMLEFT).penalties = impl_->penalty; + auto &out = impl_->aggr(func, params.paths); + + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("Aggregation"); } + + impl_->wta(out, params.subpixel, params.lr_consistency); + cudaSafeCall(cudaDeviceSynchronize()); + if (params.debug) { timer_print("WTA"); } + + #if USE_GPU + auto uncertainty = impl_->uncertainty.toGpuMat(); + cv::cuda::subtract(impl_->wta.min_cost.toGpuMat(), impl_->cost_min_paths.toGpuMat(), uncertainty); + cv::cuda::compare(uncertainty, params.uniqueness, uncertainty, cv::CMP_GT); + impl_->wta.disparity.toGpuMat().setTo(0, uncertainty); + #else + auto uncertainty = impl_->uncertainty.toMat(); + cv::subtract(impl_->wta.min_cost.toMat(), impl_->cost_min_paths.toMat(), uncertainty); + cv::compare(uncertainty, params.uniqueness, uncertainty, cv::CMP_GT); + impl_->wta.disparity.toMat().setTo(0, uncertainty); + #endif + + median_filter(impl_->wta.disparity, disparity); + if (params.debug) { timer_print("median filter"); } + + Array2D<MatchingCost::Type> dsitmp_dev(l.cols(), l.rows()); + dsi_slice(out, impl_->wta.disparity, dsitmp_dev); + show_dsi_slice(dsitmp_dev.toGpuMat()); +} + +StereoVarCensus::~StereoVarCensus() { + if (impl_) { + delete impl_; + impl_ = nullptr; + } +} diff --git a/lib/libstereo/src/stereo_misgm2.cu b/lib/libstereo/src/stereo_wadcensus.cu similarity index 61% rename from lib/libstereo/src/stereo_misgm2.cu rename to lib/libstereo/src/stereo_wadcensus.cu index f2fba6f81107bc1359af30a22ab0271954b83cee..4787894f9d960a8dbdebfa81af64736832736885 100644 --- a/lib/libstereo/src/stereo_misgm2.cu +++ b/lib/libstereo/src/stereo_wadcensus.cu @@ -7,7 +7,6 @@ #include "stereo.hpp" #include "util_opencv.hpp" -#include "costs/mutual_information.hpp" #include "dsi.hpp" #include "wta.hpp" @@ -16,38 +15,17 @@ #include "aggregations/adaptive_penalty.hpp" #include "median_filter.hpp" -#include "costs/dual.hpp" #include "costs/census.hpp" - -#ifdef __GNUG__ +#include "costs/sad.hpp" +#include "costs/dual.hpp" +#include "costs/scale.hpp" #include <chrono> #include <iostream> -static std::chrono::time_point<std::chrono::system_clock> start; - -static void timer_set() { - start = std::chrono::high_resolution_clock::now(); -} - -static void timer_print(const std::string &msg, const bool reset=true) { - auto stop = std::chrono::high_resolution_clock::now(); - - char buf[24]; - snprintf(buf, sizeof(buf), "%5i ms ", - (int) std::chrono::duration_cast<std::chrono::milliseconds>(stop-start).count()); - - std::cout << buf << msg << "\n" << std::flush; - if (reset) { timer_set(); } -} - -#else - static void timer_set() {} static void timer_print(const std::string &msg, const bool reset=true) {} -#endif - using cv::Mat; using cv::Size; using ftl::stereo::aggregations::AdaptivePenaltySGM; @@ -83,17 +61,21 @@ static void variance_mask(cv::InputArray in, cv::OutputArray out, int wsize=3) { else { throw std::exception(); /* todo CPU version */ } } -struct StereoMiSgm2::Impl { +typedef DualCostsWeighted<LinearCost<CensusMatchingCost>, ExpCost<SADMatchingCost>> MatchingCost; + +struct StereoWADCensus::Impl { CensusMatchingCost census; - MutualInformationMatchingCost mi; + LinearCost<CensusMatchingCost> exp_census; + SADMatchingCost sad; + ExpCost<SADMatchingCost> exp_sad; Array2D<float> variance; Array2D<float> variance_r; - DualCostsWeighted<CensusMatchingCost, MutualInformationMatchingCost> cost; + MatchingCost cost; - Array2D<unsigned char> penalty; - Array2D<unsigned short> cost_min; - Array2D<unsigned short> cost_min_paths; - Array2D<unsigned short> uncertainty; + Array2D<float> penalty; + Array2D<float> cost_min; + Array2D<float> cost_min_paths; + Array2D<float> uncertainty; Array2D<float> confidence; Array2D<float> disparity_r; Array2D<uchar> l; @@ -101,15 +83,19 @@ struct StereoMiSgm2::Impl { Mat prior; // used only to calculate MI - PathAggregator<AdaptivePenaltySGM<DualCostsWeighted<CensusMatchingCost, MutualInformationMatchingCost>::DataType>> aggr; - WinnerTakesAll<DSImage16U,float> wta; + PathAggregator<AdaptivePenaltySGM<MatchingCost::DataType>> aggr; + + WinnerTakesAll<DSImageFloat,float> wta; Impl(int width, int height, int min_disp, int max_disp) : census(width, height, min_disp, max_disp), - mi(width, height, min_disp, max_disp), + exp_census(width, height, min_disp, max_disp, census, 1.0), + sad(width, height, min_disp, max_disp), + exp_sad(width, height, min_disp, max_disp, sad, 1.0), + variance(width, height), variance_r(width, height), - cost(width, height, min_disp, max_disp, census, mi, variance, variance_r), + cost(width, height, min_disp, max_disp, exp_census, exp_sad, variance, variance_r), penalty(width, height), cost_min(width, height), cost_min_paths(width, height), @@ -119,25 +105,11 @@ struct StereoMiSgm2::Impl { l(width, height), r(width, height) {} }; -StereoMiSgm2::StereoMiSgm2() : impl_(nullptr) { +StereoWADCensus::StereoWADCensus() : impl_(nullptr) { impl_ = new Impl(0, 0, 0, 0); } -void StereoMiSgm2::setPrior(cv::InputArray prior) { - if (prior.rows() != impl_->cost.height() || prior.cols() != impl_->cost.width()) { - return; - } - if (prior.isGpuMat()) { - prior.getGpuMat().download(impl_->prior); - } - else { - prior.getMat().copyTo(impl_->prior); - } -} - -#include <opencv2/highgui.hpp> - -void StereoMiSgm2::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { +void StereoWADCensus::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { cudaSetDevice(0); if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { @@ -145,37 +117,25 @@ void StereoMiSgm2::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray d impl_ = new Impl(l.cols(), l.rows(), params.d_min, params.d_max); } - if (impl_->prior.empty() || impl_->prior.size() != l.size()) { - // if prior disparity is missing, use random values from uniform - // distribution - impl_->prior.create(l.rows(), l.cols(), CV_32FC1); - cv::randu(impl_->prior, params.d_min, params.d_max); - } - mat2gray(l, impl_->l); mat2gray(r, impl_->r); timer_set(); cv::cuda::GpuMat var_l = impl_->variance.toGpuMat(); - variance_mask(impl_->l.toGpuMat(), var_l, 17); + variance_mask(impl_->l.toGpuMat(), var_l, params.var_window); cv::cuda::GpuMat var_r = impl_->variance_r.toGpuMat(); - variance_mask(impl_->r.toGpuMat(), var_r, 17); + variance_mask(impl_->r.toGpuMat(), var_r, params.var_window); cv::cuda::normalize(var_l, var_l, params.alpha, params.beta, cv::NORM_MINMAX, -1); cv::cuda::normalize(var_r, var_r, params.alpha, params.beta, cv::NORM_MINMAX, -1); - if (l.isMat()) { - impl_->mi.set(l.getMat(), r.getMat(), impl_->prior); - } - else if (l.isGpuMat()) { - Mat l_; - Mat r_; - l.getGpuMat().download(l_); - r.getGpuMat().download(r_); - impl_->mi.set(l_, r_, impl_->prior); - } - impl_->census.set( impl_->l, impl_->r); + impl_->census.set(impl_->l, impl_->r); + impl_->exp_census.set(params.l1); + + impl_->sad.setWindow(params.wsize, params.wsize); + impl_->sad.set(l, r); + impl_->exp_sad.set(params.l2); impl_->cost.set(); if (params.debug) { timer_print("Matching cost"); } @@ -184,13 +144,8 @@ void StereoMiSgm2::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray d auto penalty = impl_->penalty.toGpuMat(); penalty.setTo(params.P2); - /*cv::cuda::GpuMat mask(penalty.size(), CV_8UC1); - cv::cuda::compare(var_l, 0.25, mask, cv::CMP_LT); - penalty.setTo(params.P2*2, mask); - cv::cuda::compare(var_l, 0.75, mask, cv::CMP_GT); - penalty.setTo(params.P1, mask);*/ + AdaptivePenaltySGM<MatchingCost::DataType> func = {impl_->cost.data(), impl_->cost_min_paths.data(), params.P1}; - AdaptivePenaltySGM<DualCostsWeighted<CensusMatchingCost, MutualInformationMatchingCost>::DataType> func = {impl_->cost.data(), impl_->cost_min_paths.data(), params.P1}; impl_->aggr.getDirectionData(AggregationDirections::LEFTRIGHT).penalties = impl_->penalty; impl_->aggr.getDirectionData(AggregationDirections::RIGHTLEFT).penalties = impl_->penalty; impl_->aggr.getDirectionData(AggregationDirections::UPDOWN).penalties = impl_->penalty; @@ -212,7 +167,7 @@ void StereoMiSgm2::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray d if (params.debug) { timer_print("median filter"); } } -StereoMiSgm2::~StereoMiSgm2() { +StereoWADCensus::~StereoWADCensus() { if (impl_) { delete impl_; impl_ = nullptr; diff --git a/lib/libstereo/src/stereo_censussgm.cu b/lib/libstereo/src/stereo_wcensussgm.cu similarity index 62% rename from lib/libstereo/src/stereo_censussgm.cu rename to lib/libstereo/src/stereo_wcensussgm.cu index c74782d3c511dfd91f7fdfe4bcd29083befa0367..3ea46621f5c7029614d7a7e47f3a2f5c4a4c1ae7 100644 --- a/lib/libstereo/src/stereo_censussgm.cu +++ b/lib/libstereo/src/stereo_wcensussgm.cu @@ -15,6 +15,7 @@ #include "aggregations/standard_sgm.hpp" #include "median_filter.hpp" +#include "dsi_tools.hpp" #ifdef __GNUG__ @@ -49,31 +50,32 @@ using cv::Mat; using cv::Size; using ftl::stereo::aggregations::StandardSGM; -struct StereoCensusSgm::Impl { - //DisparitySpaceImage<unsigned short> dsi; - CensusMatchingCost cost; - Array2D<unsigned short> cost_min_paths; - Array2D<unsigned short> uncertainty; - Array2D<float> disparity_r; +typedef WeightedCensusMatchingCost MatchingCost; + +struct StereoWCensusSgm::Impl { + MatchingCost cost; + Array2D<MatchingCost::Type> cost_min_paths; + Array2D<MatchingCost::Type> uncertainty; Array2D<uchar> l; Array2D<uchar> r; - PathAggregator<StandardSGM<CensusMatchingCost::DataType>> aggr; - WinnerTakesAll<DSImage16U,float> wta; + PathAggregator<StandardSGM<MatchingCost::DataType>> aggr; + WinnerTakesAll<DisparitySpaceImage<MatchingCost::Type>,float> wta; Impl(int width, int height, int min_disp, int max_disp) : cost(width, height, min_disp, max_disp), cost_min_paths(width, height), uncertainty(width, height), - disparity_r(width, height), l(width, height), r(width, height) {} + l(width, height), r(width, height) + {} }; -StereoCensusSgm::StereoCensusSgm() : impl_(nullptr) { +StereoWCensusSgm::StereoWCensusSgm() : impl_(nullptr) { impl_ = new Impl(0, 0, 0, 0); } -void StereoCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { +void StereoWCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) { cudaSetDevice(0); if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) { @@ -92,13 +94,13 @@ void StereoCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArra if (params.debug) { timer_print("census transform"); } // cost aggregation - StandardSGM<CensusMatchingCost::DataType> func = {impl_->cost.data(), impl_->cost_min_paths.data(), params.P1, params.P2}; + StandardSGM<MatchingCost::DataType> func = {impl_->cost.data(), impl_->cost_min_paths.data(), params.P1, params.P2}; auto &out = impl_->aggr(func, params.paths); cudaSafeCall(cudaDeviceSynchronize()); if (params.debug) { timer_print("Aggregation"); } - impl_->wta(out, 0); + impl_->wta(out, params.subpixel, params.lr_consistency); cudaSafeCall(cudaDeviceSynchronize()); if (params.debug) { timer_print("WTA"); } @@ -108,24 +110,27 @@ void StereoCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArra // Lecture Notes in Artificial Intelligence and Lecture Notes in // Bioinformatics). https://doi.org/10.1007/978-3-319-11752-2_4 - if (disparity.isGpuMat()) { - auto uncertainty = impl_->uncertainty.toGpuMat(); - cv::cuda::subtract(impl_->wta.min_cost.toGpuMat(), impl_->cost_min_paths.toGpuMat(), uncertainty); - cv::cuda::compare(uncertainty, params.uniqueness, uncertainty, cv::CMP_GT); - impl_->wta.disparity.toGpuMat().setTo(0, uncertainty); - } - else { - auto uncertainty = impl_->uncertainty.toMat(); - cv::subtract(impl_->wta.min_cost.toMat(), impl_->cost_min_paths.toMat(), uncertainty); - cv::compare(uncertainty, params.uniqueness, uncertainty, cv::CMP_GT); - impl_->wta.disparity.toMat().setTo(0, uncertainty); - } + #if USE_GPU + auto uncertainty = impl_->uncertainty.toGpuMat(); + cv::cuda::subtract(impl_->wta.min_cost.toGpuMat(), impl_->cost_min_paths.toGpuMat(), uncertainty); + cv::cuda::compare(uncertainty, params.uniqueness, uncertainty, cv::CMP_GT); + impl_->wta.disparity.toGpuMat().setTo(0, uncertainty); + #else + auto uncertainty = impl_->uncertainty.toMat(); + cv::subtract(impl_->wta.min_cost.toMat(), impl_->cost_min_paths.toMat(), uncertainty); + cv::compare(uncertainty, params.uniqueness, uncertainty, cv::CMP_GT); + impl_->wta.disparity.toMat().setTo(0, uncertainty); + #endif median_filter(impl_->wta.disparity, disparity); if (params.debug) { timer_print("median filter"); } + + Array2D<MatchingCost::Type> dsitmp_dev(l.cols(), l.rows()); + dsi_slice(out, impl_->wta.disparity, dsitmp_dev); + show_dsi_slice(dsitmp_dev.toGpuMat()); } -StereoCensusSgm::~StereoCensusSgm() { +StereoWCensusSgm::~StereoWCensusSgm() { if (impl_) { delete impl_; impl_ = nullptr; diff --git a/lib/libstereo/src/wta.hpp b/lib/libstereo/src/wta.hpp index b23325ebdba34c06aa7e12e12625cf1d6835158b..bfb51edcf21195704af6c587be811ae791f68014 100644 --- a/lib/libstereo/src/wta.hpp +++ b/lib/libstereo/src/wta.hpp @@ -171,7 +171,7 @@ struct WinnerTakesAll { Array2D<TDisp> disparity_right; Array2D<typename DSI::Type> min_cost; - Array2D<TDisp> &operator()(const DSI& dsi, const int subpixel=0) { + Array2D<TDisp> &operator()(const DSI& dsi, const int subpixel=0, const bool lr_consistency=true) { disparity.create(dsi.width(), dsi.height()); disparity_right.create(dsi.width(), dsi.height()); min_cost.create(dsi.width(), dsi.height()); @@ -193,7 +193,9 @@ struct WinnerTakesAll { algorithms::WTADiagonal<DSI,float> wtadiag = {dsi.data(), disparity_right.data()}; parallel2D(wtadiag, dsi.width(), dsi.height()); - parallel2D<algorithms::ConsistencyCheck<float>>({disparity.data(), disparity_right.data()}, dsi.width(), dsi.height()); + if (lr_consistency) { + parallel2D<algorithms::ConsistencyCheck<float>>({disparity.data(), disparity_right.data()}, dsi.width(), dsi.height()); + } return disparity; } };