From 887aa933094504e1e6012fb6b406dc9c555b3775 Mon Sep 17 00:00:00 2001 From: Sebastian Hahta <joseha@utu.fi> Date: Tue, 21 Apr 2020 18:28:06 +0300 Subject: [PATCH] more useful middlebury testing --- lib/libstereo/include/stereo.hpp | 6 +- lib/libstereo/middlebury/main.cpp | 253 +++++++++++------- lib/libstereo/src/costs/mutual_information.cu | 12 +- .../src/costs/mutual_information.hpp | 18 +- lib/libstereo/src/stereo_censussgm.cu | 2 +- lib/libstereo/src/stereo_misgm.cu | 11 +- lib/libstereo/src/stereo_misgm2.cu | 37 ++- 7 files changed, 206 insertions(+), 133 deletions(-) diff --git a/lib/libstereo/include/stereo.hpp b/lib/libstereo/include/stereo.hpp index d210d3688..2ccab32d6 100644 --- a/lib/libstereo/include/stereo.hpp +++ b/lib/libstereo/include/stereo.hpp @@ -155,8 +155,8 @@ public: struct Parameters { int d_min = 0; int d_max = 0; - unsigned short P1 = 2; - unsigned short P2 = 8; + unsigned short P1 = 1; + unsigned short P2 = 16; float uniqueness = std::numeric_limits<short>::max(); int subpixel = 1; // subpixel interpolation method int paths = AggregationDirections::HORIZONTAL | @@ -164,7 +164,7 @@ public: AggregationDirections::DIAGONAL; float alpha = 0.2; /** alpha: minimum weight for census cost */ - float beta = 0.8; /** 1-beta: minimum weight for MI cost */ + float beta = 0.7; /** 1-beta: minimum weight for MI cost */ bool debug = false; }; Parameters params; diff --git a/lib/libstereo/middlebury/main.cpp b/lib/libstereo/middlebury/main.cpp index 1c9409cb5..60dfee546 100644 --- a/lib/libstereo/middlebury/main.cpp +++ b/lib/libstereo/middlebury/main.cpp @@ -1,6 +1,7 @@ #include <opencv2/opencv.hpp> #include <vector> +#include <map> #include <chrono> #include "middlebury.hpp" @@ -32,130 +33,176 @@ void colorize(const cv::Mat &disp, cv::Mat &out, int ndisp=-1) { //////////////////////////////////////////////////////////////////////////////// -using cv::Mat; -using std::vector; +struct MiddleburyData { + const std::string name; + const cv::Mat imL; + const cv::Mat imR; + const cv::Mat gtL; + const cv::Mat maskL; + const MiddEvalCalib calib; +}; + +struct Result { + std::vector<MiddEvalResult> results; + cv::Mat disp; +}; + +static void run_censussgm(MiddleburyData &data, cv::Mat &disparity) { + auto stereo = StereoCensusSgm(); + stereo.params.P1 = 4; + stereo.params.P2 = 18; + + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.params.subpixel = 1; + stereo.params.debug = false; + stereo.compute(data.imL, data.imR, disparity); +} -int main(int argc, char* argv[]) { - #if USE_GPU - std::cout << "GPU VERSION\n"; - #else - std::cout << "CPU VERSION\n"; - #endif +static void run_misgm(MiddleburyData &data, cv::Mat &disparity) { + auto stereo = StereoMiSgm(); + stereo.params.P1 = 4; + stereo.params.P2 = 18; - Mat imL; - Mat imR; - Mat gtL; - Mat maskL; - MiddEvalCalib calib; + stereo.params.d_min = data.calib.vmin; + stereo.params.d_max = data.calib.vmax; + stereo.params.subpixel = 1; + stereo.params.debug = false; + stereo.compute(data.imL, data.imR, disparity); + stereo.setPrior(disparity); + stereo.compute(data.imL, data.imR, disparity); +} - if (argc < 2) { - std::cerr << "usage: middlebury [path]\n"; - return 1; - } +static const std::map<std::string, std::function<void(MiddleburyData&, cv::Mat&)>> algorithms = { + { "censussgm", run_censussgm }, + { "misgm", run_misgm } +}; - imL = cv::imread(argv[1] + std::string("im0.png")); - imR = cv::imread(argv[1] + std::string("im1.png")); - gtL = read_pfm(argv[1] + std::string("disp0.pfm")); +//////////////////////////////////////////////////////////////////////////////// + +MiddleburyData load_input(const std::string &path) { + cv::Mat imL; + cv::Mat imR; + cv::Mat gtL; + cv::Mat maskL; + + imL = cv::imread(path + std::string("im0.png")); + imR = cv::imread(path + std::string("im1.png")); + gtL = read_pfm(path + std::string("disp0.pfm")); if (gtL.empty()) { - gtL = read_pfm(argv[1] + std::string("disp0GT.pfm")); + gtL = read_pfm(path + std::string("disp0GT.pfm")); } //maskL = cv::imread(argv[1] + std::string("mask0nocc.png"), cv::IMREAD_GRAYSCALE); - calib = read_calibration(argv[1] + std::string("calib.txt")); + auto calib = read_calibration(path + std::string("calib.txt")); maskL.create(imL.size(), CV_8UC1); maskL.setTo(cv::Scalar(255)); - int ndisp = calib.vmax - calib.vmin; + if (imL.empty() || imR.empty() || gtL.empty() || maskL.empty()) { + throw std::exception(); + } - auto stereo = StereoCensusAdaptive(); - stereo.params.P1 = 8; - //stereo.params.P2 = 25; + std::string name; + name = path.substr(0, path.size()-1); + name = name.substr(name.find_last_of("/") + 1); - stereo.params.d_min = calib.vmin; - stereo.params.d_max = calib.vmax; - stereo.params.subpixel = 1; - stereo.params.debug = true; - //stereo.params.paths = AggregationDirections::ALL; - stereo.params.uniqueness = 80; + return {name, imL, imR, gtL, maskL, calib}; +} + +Result run_one(MiddleburyData &input, std::function<void(MiddleburyData&, cv::Mat&)> f) { + Result result; + f(input, result.disp); + for (const float t : {4.0f, 1.0f, 0.5f, 0.25f}) { + result.results.push_back(evaluate(result.disp, input.gtL, input.maskL, t)); + } + return result; +} - int i_max = 1; - float t = 4.0f; +Result run_one(MiddleburyData &input, std::string name) { + return run_one(input, algorithms.at(name)); +} - if (imL.empty() || imR.empty() || gtL.empty()) { - std::cerr << "can't load image\n"; - return 1; +std::map<std::string, Result> run_all(MiddleburyData &input) { + std::map<std::string, Result> results; + + for (auto const& [name, f] : algorithms) { + results[name] = run_one(input, f); } - if (imL.size() != imR.size()) { - std::cerr << "images must be same size\n"; - return 1; + return results; +} + +//////////////////////////////////////////////////////////////////////////////// + +cv::Mat visualize_error(const cv::Mat &disp, const cv::Mat >, float t) { + cv::Mat err(gt.size(), CV_32FC1); + cv::Mat err_color(gt.size(), CV_8UC1); + + cv::absdiff(gt, disp, err); + err.setTo(t, err > t); + err += 1.0f; + err.setTo(0, disp == 0.0); + err.convertTo(err, CV_8UC1, 255.0f/(t+1.0f)); + cv::applyColorMap(err, err_color, cv::COLORMAP_PLASMA); + + return err_color; +} + +void show_result(std::string name, const MiddleburyData &data, const Result &result) { + cv::Mat err_color = visualize_error(result.disp, data.gtL, 4.0); + cv::Mat disp_color(data.gtL.size(), CV_8UC1); + + colorize(result.disp, disp_color, data.calib.ndisp); + cv::imshow(data.name + " (disparity) - " + name, disp_color); + cv::imshow(data.name + " (error) - " + name, err_color); + + printf("\n%s: %s\n", name.c_str(), data.name.c_str()); + for (auto res : result.results) { + printf("%9.2f %% correct (err < %.2f)\n", 100.0f * res.err_bad_nonoccl, res.threshold); + printf("%9.2f px RMSE\n", res.rms_bad_nonoccl); } +} - Mat disp(imL.size(), CV_32FC1, cv::Scalar(0.0f)); - std::cout << "resolution: " << imL.cols << "x" << imL.rows << ", " << ndisp << " disparities [" << calib.vmin << "," << calib.vmax << "]\n"; - - std::vector<cv::Mat> disp_color; - std::vector<cv::Mat> err_color; - - for (int i = 0; i < i_max; i++) { - auto start = std::chrono::high_resolution_clock::now(); - - cv::cuda::GpuMat gpu_imL(imL); - cv::cuda::GpuMat gpu_imR(imR); - cv::cuda::GpuMat gpu_disp(disp); - stereo.compute(gpu_imL, gpu_imR, gpu_disp); - gpu_disp.download(disp); - stereo.setPrior(disp); - - auto stop = std::chrono::high_resolution_clock::now(); - std::cout << "disparity complete in " - << std::chrono::duration_cast<std::chrono::milliseconds>(stop-start).count() - << " ms\n"; - - MiddEvalResult res; - - std::cout << "TYPE: " << gtL.type() << std::endl; - - for (const float t : {4.0f, 1.0f, 0.5f, 0.25f}) { - res = evaluate(disp, gtL, maskL, t); - if (i == 0 || i == i_max-1) { - printf("%9.2f %% correct (err < %.2f)\n", 100.0f * res.err_bad, res.threshold); - //printf("%9.2f %% correct (non-occluded, err < %.2f)\n", 100.0f * res.err_bad_nonoccl, res.threshold); - printf("%9.2f px RMSE (all)\n", res.rms_bad); - //printf("%9.2f px RMSE (non-occluded)\n", res.rms_bad_nonoccl); - } - } - - if (i == 0 || i == i_max-1) { - printf("%9.2f %% total\n", 100.0f * res.err_total); - printf("%9.2f %% non-occluded\n", 100.0f * res.err_nonoccl); - } - - Mat &err_color_ = err_color.emplace_back(); - Mat &disp_color_ = disp_color.emplace_back(); - - // visualize error: set missing values at 0 and scale error to [1.0, t+1.0] - // errors > t truncate to t - Mat err(gtL.size(), CV_32FC1); - cv::absdiff(gtL, disp, err); - err.setTo(t, err > t); - err += 1.0f; - err.setTo(0, disp == 0.0); - err.convertTo(err, CV_8UC1, 255.0f/(t+1.0f)); - cv::applyColorMap(err, err_color_, cv::COLORMAP_PLASMA); - - colorize(disp, disp_color_, calib.ndisp); +void show_results( const MiddleburyData &data, const std::map<std::string, Result> &results) { + for (auto const& [name, result] : results) { + show_result(name, data, result); } +} - cv::imshow(std::to_string(0) + ": " + "error (err < " + std::to_string(t) + ")", err_color[0]); - cv::imshow(std::to_string(0) + ": " + "disparity", disp_color[0]); - cv::imshow(std::to_string(i_max-1) + ": " + "error (err < " + std::to_string(t) + ")", err_color[i_max-1]); - cv::imshow(std::to_string(i_max-1) + ": " + "disparity", disp_color[i_max-1]); +//////////////////////////////////////////////////////////////////////////////// + +using cv::Mat; +using std::vector; + +int main(int argc, char* argv[]) { + #if USE_GPU + std::cout << "GPU VERSION\n"; + #else + std::cout << "CPU VERSION\n"; + #endif + + if (argc < 2) { + std::cerr << "usage: path [algorithm]\n"; + return 1; + } + + if (argc == 2) { + auto input = load_input(argv[1]); + auto results = run_all(input); + show_results(input, results); + while(cv::waitKey() != 27); + return 0; + } + else if (argc == 3) { + auto input = load_input(argv[1]); + auto result = run_one(input, argv[2]); + show_result(argv[2], input, result); + while(cv::waitKey() != 27); + return 0; + } - Mat gt_color; - colorize(gtL, gt_color, calib.ndisp); - cv::imshow("gt", gt_color); - while(cv::waitKey() != 27); + // todo: save results - return 0; + std::cerr << "error"; + return 1; } diff --git a/lib/libstereo/src/costs/mutual_information.cu b/lib/libstereo/src/costs/mutual_information.cu index af32f0c53..85771f207 100644 --- a/lib/libstereo/src/costs/mutual_information.cu +++ b/lib/libstereo/src/costs/mutual_information.cu @@ -74,7 +74,7 @@ void calculate_h12(const cv::Mat &P, cv::Mat &out, int ksize=7, double sigma=1.0 // possible numerical issues? should also be less than 1/(width*height) of // original image. - static float e = 0.00000001; + const float e = 0.00000001; cv::Mat tmp; P.copyTo(out); @@ -154,4 +154,14 @@ void MutualInformationMatchingCost::set(const cv::Mat &l, const cv::Mat &r, cons h1.copyTo(h1_.toMat()); h2.copyTo(h2_.toMat()); #endif + + data().normalize = 0; + double min, max; + cv::minMaxLoc(h12, &min, &max); + data().normalize += max; + cv::minMaxLoc(h1, &min, &max); + data().normalize -= min; + cv::minMaxLoc(h2, &min, &max); + data().normalize -= min; + data().normalize = 1.0f/data().normalize; } diff --git a/lib/libstereo/src/costs/mutual_information.hpp b/lib/libstereo/src/costs/mutual_information.hpp index 05f526f0a..3b86c6541 100644 --- a/lib/libstereo/src/costs/mutual_information.hpp +++ b/lib/libstereo/src/costs/mutual_information.hpp @@ -8,13 +8,14 @@ #include <cuda_runtime.h> namespace impl { - struct MutualInformationMatchingCost : DSImplBase<unsigned short> { + template<typename T> + struct MutualInformationMatchingCost : DSImplBase<T> { typedef unsigned short Type; - MutualInformationMatchingCost(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<unsigned short>({w,h,dmin,dmax}) {} + MutualInformationMatchingCost(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<T>({w,h,dmin,dmax}) {} - __host__ __device__ inline unsigned short operator()(const int y, const int x, const int d) const { - if ((x-d) < 0) { return 24; } + __host__ __device__ inline T operator()(const int y, const int x, const int d) const { + if ((x-d) < 0) { return 0; } const int I1 = l(y,x); const int I2 = r(y,x-d); const float H1 = h1(0,I1); @@ -23,19 +24,20 @@ namespace impl { return -(H1+H2-H12); } - static constexpr unsigned short COST_MAX = 255; + static constexpr T COST_MAX = 255; Array2D<unsigned char>::Data l; Array2D<unsigned char>::Data r; Array2D<float>::Data h1; Array2D<float>::Data h2; Array2D<float>::Data h12; + float normalize = 1.0f; }; } -class MutualInformationMatchingCost : public DSBase<impl::MutualInformationMatchingCost>{ +class MutualInformationMatchingCost : public DSBase<impl::MutualInformationMatchingCost<unsigned short>>{ public: - typedef impl::MutualInformationMatchingCost DataType; + typedef impl::MutualInformationMatchingCost<unsigned short> DataType; typedef unsigned short Type; MutualInformationMatchingCost(int width, int height, int disp_min, int disp_max) : @@ -64,4 +66,4 @@ protected: cv::Mat r_warp_; }; -#endif \ No newline at end of file +#endif diff --git a/lib/libstereo/src/stereo_censussgm.cu b/lib/libstereo/src/stereo_censussgm.cu index d707e7846..c74782d3c 100644 --- a/lib/libstereo/src/stereo_censussgm.cu +++ b/lib/libstereo/src/stereo_censussgm.cu @@ -118,7 +118,7 @@ void StereoCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArra auto uncertainty = impl_->uncertainty.toMat(); cv::subtract(impl_->wta.min_cost.toMat(), impl_->cost_min_paths.toMat(), uncertainty); cv::compare(uncertainty, params.uniqueness, uncertainty, cv::CMP_GT); - impl_->wta.disparity.toGpuMat().setTo(0, uncertainty); + impl_->wta.disparity.toMat().setTo(0, uncertainty); } median_filter(impl_->wta.disparity, disparity); diff --git a/lib/libstereo/src/stereo_misgm.cu b/lib/libstereo/src/stereo_misgm.cu index ccc6005d9..18981318b 100644 --- a/lib/libstereo/src/stereo_misgm.cu +++ b/lib/libstereo/src/stereo_misgm.cu @@ -11,6 +11,7 @@ #include "wta.hpp" #include "cost_aggregation.hpp" #include "aggregations/standard_sgm.hpp" +#include "median_filter.hpp" #ifdef __GNUG__ @@ -131,14 +132,8 @@ void StereoMiSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray di impl_->wta(out, 0); cudaSafeCall(cudaDeviceSynchronize()); if (params.debug) { timer_print("WTA"); } - if (disparity.isGpuMat()) { - impl_->wta.disparity.toGpuMat(disparity.getGpuMatRef()); - } - else { - cv::Mat &disparity_ = disparity.getMatRef(); - impl_->wta.disparity.toMat(disparity_); - cv::medianBlur(disparity_, disparity_, 3); - } + + median_filter(impl_->wta.disparity, disparity); // confidence estimate // Drory, A., Haubold, C., Avidan, S., & Hamprecht, F. A. (2014). diff --git a/lib/libstereo/src/stereo_misgm2.cu b/lib/libstereo/src/stereo_misgm2.cu index e23ab955b..f2fba6f81 100644 --- a/lib/libstereo/src/stereo_misgm2.cu +++ b/lib/libstereo/src/stereo_misgm2.cu @@ -13,6 +13,7 @@ #include "wta.hpp" #include "cost_aggregation.hpp" #include "aggregations/standard_sgm.hpp" +#include "aggregations/adaptive_penalty.hpp" #include "median_filter.hpp" #include "costs/dual.hpp" @@ -49,7 +50,7 @@ static void timer_print(const std::string &msg, const bool reset=true) {} using cv::Mat; using cv::Size; -using ftl::stereo::aggregations::StandardSGM; +using ftl::stereo::aggregations::AdaptivePenaltySGM; static void variance_mask(cv::InputArray in, cv::OutputArray out, int wsize=3) { if (in.isGpuMat()) { @@ -68,8 +69,7 @@ static void variance_mask(cv::InputArray in, cv::OutputArray out, int wsize=3) { else { im = in.getGpuMat(); } - std::cout << im.type() << "\n"; - std::cout << mean.type() << "\n"; + cv::cuda::multiply(im, im, im2); auto filter = cv::cuda::createBoxFilter(CV_32FC1, CV_32FC1, cv::Size(wsize,wsize)); filter->apply(im, mean); // E[X] @@ -90,6 +90,7 @@ struct StereoMiSgm2::Impl { Array2D<float> variance_r; DualCostsWeighted<CensusMatchingCost, MutualInformationMatchingCost> cost; + Array2D<unsigned char> penalty; Array2D<unsigned short> cost_min; Array2D<unsigned short> cost_min_paths; Array2D<unsigned short> uncertainty; @@ -100,7 +101,7 @@ struct StereoMiSgm2::Impl { Mat prior; // used only to calculate MI - PathAggregator<StandardSGM<DualCostsWeighted<CensusMatchingCost, MutualInformationMatchingCost>::DataType>> aggr; + PathAggregator<AdaptivePenaltySGM<DualCostsWeighted<CensusMatchingCost, MutualInformationMatchingCost>::DataType>> aggr; WinnerTakesAll<DSImage16U,float> wta; Impl(int width, int height, int min_disp, int max_disp) : @@ -109,6 +110,7 @@ struct StereoMiSgm2::Impl { variance(width, height), variance_r(width, height), cost(width, height, min_disp, max_disp, census, mi, variance, variance_r), + penalty(width, height), cost_min(width, height), cost_min_paths(width, height), uncertainty(width, height), @@ -156,9 +158,9 @@ void StereoMiSgm2::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray d timer_set(); cv::cuda::GpuMat var_l = impl_->variance.toGpuMat(); - variance_mask(impl_->l.toGpuMat(), var_l, 9); + variance_mask(impl_->l.toGpuMat(), var_l, 17); cv::cuda::GpuMat var_r = impl_->variance_r.toGpuMat(); - variance_mask(impl_->r.toGpuMat(), var_r, 9); + variance_mask(impl_->r.toGpuMat(), var_r, 17); cv::cuda::normalize(var_l, var_l, params.alpha, params.beta, cv::NORM_MINMAX, -1); cv::cuda::normalize(var_r, var_r, params.alpha, params.beta, cv::NORM_MINMAX, -1); @@ -179,13 +181,30 @@ void StereoMiSgm2::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray d cudaSafeCall(cudaDeviceSynchronize()); - StandardSGM<DualCostsWeighted<CensusMatchingCost, MutualInformationMatchingCost>::DataType> func = {impl_->cost.data(), impl_->cost_min_paths.data(), params.P1, params.P2}; - auto &out = impl_->aggr(func, AggregationDirections::ALL); // params.paths + auto penalty = impl_->penalty.toGpuMat(); + penalty.setTo(params.P2); + + /*cv::cuda::GpuMat mask(penalty.size(), CV_8UC1); + cv::cuda::compare(var_l, 0.25, mask, cv::CMP_LT); + penalty.setTo(params.P2*2, mask); + cv::cuda::compare(var_l, 0.75, mask, cv::CMP_GT); + penalty.setTo(params.P1, mask);*/ + + AdaptivePenaltySGM<DualCostsWeighted<CensusMatchingCost, MutualInformationMatchingCost>::DataType> func = {impl_->cost.data(), impl_->cost_min_paths.data(), params.P1}; + impl_->aggr.getDirectionData(AggregationDirections::LEFTRIGHT).penalties = impl_->penalty; + impl_->aggr.getDirectionData(AggregationDirections::RIGHTLEFT).penalties = impl_->penalty; + impl_->aggr.getDirectionData(AggregationDirections::UPDOWN).penalties = impl_->penalty; + impl_->aggr.getDirectionData(AggregationDirections::DOWNUP).penalties = impl_->penalty; + impl_->aggr.getDirectionData(AggregationDirections::TOPLEFTBOTTOMRIGHT).penalties = impl_->penalty; + impl_->aggr.getDirectionData(AggregationDirections::BOTTOMRIGHTTOPLEFT).penalties = impl_->penalty; + impl_->aggr.getDirectionData(AggregationDirections::BOTTOMLEFTTOPRIGHT).penalties = impl_->penalty; + impl_->aggr.getDirectionData(AggregationDirections::TOPRIGHTBOTTOMLEFT).penalties = impl_->penalty; + auto &out = impl_->aggr(func, params.paths); cudaSafeCall(cudaDeviceSynchronize()); if (params.debug) { timer_print("Aggregation"); } - impl_->wta(out, 0); + impl_->wta(out, params.subpixel); cudaSafeCall(cudaDeviceSynchronize()); if (params.debug) { timer_print("WTA"); } -- GitLab