From 3e456c08df459ab9cd79a48278b094bc577f0e6f Mon Sep 17 00:00:00 2001
From: Nicolas Pope <nwpope@utu.fi>
Date: Thu, 7 May 2020 11:14:34 +0300
Subject: [PATCH] Add weighted hcensus and ecensus

---
 lib/libstereo/CMakeLists.txt                |   2 +
 lib/libstereo/include/stereo.hpp            |  40 ++++++
 lib/libstereo/middlebury/algorithms.hpp     |  21 ++++
 lib/libstereo/src/algorithms/hwcensussgm.cu | 131 ++++++++++++++++++++
 lib/libstereo/src/costs/census.cu           |  18 ++-
 lib/libstereo/src/costs/census.hpp          |  25 ++--
 6 files changed, 226 insertions(+), 11 deletions(-)
 create mode 100644 lib/libstereo/src/algorithms/hwcensussgm.cu

diff --git a/lib/libstereo/CMakeLists.txt b/lib/libstereo/CMakeLists.txt
index f1708e4f1..64e19cd84 100644
--- a/lib/libstereo/CMakeLists.txt
+++ b/lib/libstereo/CMakeLists.txt
@@ -43,6 +43,7 @@ if (LIBSTEREO_SHARED)
                 src/algorithms/stablesgm.cu
                 src/algorithms/tcensussgm.cu
                 src/algorithms/hcensussgm.cu
+                src/algorithms/hwcensussgm.cu
                 src/algorithms/brefcensussgm.cu
                 src/algorithms/meancensussgm.cu
                 src/algorithms/hstablesgm.cu
@@ -78,6 +79,7 @@ else()
                 src/algorithms/stablesgm.cu
                 src/algorithms/tcensussgm.cu
                 src/algorithms/hcensussgm.cu
+                src/algorithms/hwcensussgm.cu
                 src/algorithms/brefcensussgm.cu
                 src/algorithms/meancensussgm.cu
                 src/algorithms/hstablesgm.cu
diff --git a/lib/libstereo/include/stereo.hpp b/lib/libstereo/include/stereo.hpp
index 2dac90d5a..8820a1dce 100644
--- a/lib/libstereo/include/stereo.hpp
+++ b/lib/libstereo/include/stereo.hpp
@@ -303,6 +303,46 @@ private:
 	Impl *impl_;
 };
 
+/**
+ * Three resolutions, fine, medium and coarse, are combined. In each case the
+ * reference pixel always comes from the fine resolution. Each resolution is
+ * variance weighted for that resolution to increase and decrease influence,
+ * allowing textured high res to dominate when possible.
+ */
+class StereoHierWeightCensusSgm {
+public:
+	StereoHierWeightCensusSgm();
+	~StereoHierWeightCensusSgm();
+
+	void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity);
+	void setPrior(cv::InputArray disp) {};
+
+	struct Parameters {
+		int d_min = 0;
+		int d_max = 0;
+		unsigned short P1 = 5;
+		unsigned short P2 = 25;
+		float uniqueness = std::numeric_limits<unsigned short>::max();
+		int subpixel = 1; // subpixel interpolation method
+		bool lr_consistency = true;
+		int paths = AggregationDirections::HORIZONTAL |
+					AggregationDirections::VERTICAL |
+					AggregationDirections::DIAGONAL;
+		/** normalization of variance to range [alpha, beta] */
+		float alpha = 0.2;
+		float beta = 1.0;
+		/** variance window size */
+		int var_window = 9;
+		bool debug = false;
+		CensusPattern pattern = CensusPattern::STANDARD;
+	};
+	Parameters params;
+
+private:
+	struct Impl;
+	Impl *impl_;
+};
+
 class StereoCPCensusSgm {
 public:
 	StereoCPCensusSgm();
diff --git a/lib/libstereo/middlebury/algorithms.hpp b/lib/libstereo/middlebury/algorithms.hpp
index e5cc07c38..6c936fdba 100644
--- a/lib/libstereo/middlebury/algorithms.hpp
+++ b/lib/libstereo/middlebury/algorithms.hpp
@@ -172,6 +172,26 @@ namespace Impl {
 		}
 	};
 
+	struct HWCensusSGM : public Algorithm {
+		HWCensusSGM() { P1 = 6.0f; P2 = 60.0f; }
+
+		virtual void run(const MiddleburyData &data, cv::Mat &disparity) override {
+			StereoHierWeightCensusSgm stereo;
+			stereo.params.P1 = P1;
+			stereo.params.P2 = P2;
+			stereo.params.subpixel = subpixel;
+			stereo.params.lr_consistency = lr_consistency;
+			stereo.params.var_window = 5;
+			stereo.params.d_min = data.calib.vmin;
+			stereo.params.d_max = data.calib.vmax;
+			stereo.params.debug = false;
+			stereo.params.alpha = 0.5f;
+			stereo.params.beta = 1.0f;
+
+			stereo.compute(data.imL, data.imR, disparity);
+		}
+	};
+
 	struct HGCensusSGM : public Algorithm {
 		HGCensusSGM() { P1 = 36.0f; P2 = 96.0f; }
 
@@ -326,6 +346,7 @@ static const std::map<std::string, Algorithm*> algorithms = {
 	{ "brefcensus", new Impl::BRefCensusSGM() },
 	{ "hgcensussgm", new Impl::HGCensusSGM() },
 	{ "hcensussgm", new Impl::HCensusSGM() },
+	{ "hwcensussgm", new Impl::HWCensusSGM() },
 	{ "cpcensussgm",  new Impl::CPCensusSGM() },
 	{ "tcensussgm",  new Impl::TCensusSGM() },
 	{ "wcensussgm",  new Impl::WCensusSGM() },
diff --git a/lib/libstereo/src/algorithms/hwcensussgm.cu b/lib/libstereo/src/algorithms/hwcensussgm.cu
new file mode 100644
index 000000000..e1a42e860
--- /dev/null
+++ b/lib/libstereo/src/algorithms/hwcensussgm.cu
@@ -0,0 +1,131 @@
+#include "stereo.hpp"
+#include "stereosgm.hpp"
+#include "../costs/census.hpp"
+#include "../costs/dual.hpp"
+#include <opencv2/cudawarping.hpp>
+#include <opencv2/cudafilters.hpp>
+#include <opencv2/highgui.hpp>
+
+typedef MultiCostsWeighted<WeightedCensusMatchingCost,3> MatchingCost;
+
+static void variance_mask(cv::InputArray in, cv::OutputArray out, int wsize=3) {
+	if (in.isGpuMat() && out.isGpuMat()) {
+		cv::cuda::GpuMat im;
+		cv::cuda::GpuMat im2;
+		cv::cuda::GpuMat mean;
+		cv::cuda::GpuMat mean2;
+
+		mean.create(in.size(), CV_32FC1);
+		mean2.create(in.size(), CV_32FC1);
+		im2.create(in.size(), CV_32FC1);
+
+		if (in.type() != CV_32FC1) {
+			in.getGpuMat().convertTo(im, CV_32FC1);
+		}
+		else {
+			im = in.getGpuMat();
+		}
+
+		cv::cuda::multiply(im, im, im2);
+		auto filter = cv::cuda::createBoxFilter(CV_32FC1, CV_32FC1, cv::Size(wsize,wsize));
+		filter->apply(im, mean);   // E[X]
+		filter->apply(im2, mean2); // E[X^2]
+		cv::cuda::multiply(mean, mean, mean); // (E[X])^2
+
+		// NOTE: floating point accuracy in subtraction
+		// (cv::cuda::createBoxFilter only supports 8 bit integer types)
+		cv::cuda::subtract(mean2, mean, out.getGpuMatRef()); // E[X^2] - (E[X])^2
+	}
+	else { throw std::exception(); /* todo CPU version */ }
+}
+
+struct StereoHierWeightCensusSgm::Impl : public StereoSgm<MatchingCost, StereoHierWeightCensusSgm::Parameters> {
+    WeightedCensusMatchingCost cost_fine;
+    WeightedCensusMatchingCost cost_medium;
+    WeightedCensusMatchingCost cost_coarse;
+	Array2D<uchar> l;
+    Array2D<uchar> r;
+    Array2D<float> var_fine;
+    Array2D<float> var_medium;
+    Array2D<float> var_coarse;
+
+	Impl(StereoHierWeightCensusSgm::Parameters &params, int width, int height, int dmin, int dmax) :
+        StereoSgm(params, width, height, dmin, dmax),
+        cost_fine(width, height, dmin, dmax),
+        cost_medium(width, height, dmin, dmax),
+        cost_coarse(width, height, dmin, dmax),
+        l(width, height), r(width, height),
+        var_fine(width, height),
+        var_medium(width, height),
+        var_coarse(width, height) {
+            cost.add(0, cost_fine, var_fine);
+            cost.add(1, cost_medium, var_medium);
+            cost.add(2, cost_coarse, var_coarse);
+        }
+};
+
+StereoHierWeightCensusSgm::StereoHierWeightCensusSgm() : impl_(nullptr) {
+	impl_ = new Impl(params, 0, 0, 0, 0);
+}
+
+void StereoHierWeightCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) {
+
+	//cudaSetDevice(0);
+
+	if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) {
+		delete impl_; impl_ = nullptr;
+		impl_ = new Impl(params, l.cols(), l.rows(), params.d_min, params.d_max);
+	}
+
+	mat2gray(l, impl_->l);
+	mat2gray(r, impl_->r);
+    timer_set();
+
+    static constexpr int DOWNSCALE_MEDIUM = 4;
+    static constexpr int DOWNSCALE_COARSE = 6;
+    
+    Array2D<uchar> medium_l(l.cols()/DOWNSCALE_MEDIUM, l.rows()/DOWNSCALE_MEDIUM);
+    Array2D<uchar> medium_r(r.cols()/DOWNSCALE_MEDIUM, r.rows()/DOWNSCALE_MEDIUM);
+    Array2D<uchar> coarse_l(l.cols()/DOWNSCALE_COARSE, l.rows()/DOWNSCALE_COARSE);
+    Array2D<uchar> coarse_r(r.cols()/DOWNSCALE_COARSE, r.rows()/DOWNSCALE_COARSE);
+    cv::cuda::resize(impl_->l.toGpuMat(), medium_l.toGpuMat(), cv::Size(medium_l.width, medium_r.height));
+    cv::cuda::resize(impl_->r.toGpuMat(), medium_r.toGpuMat(), cv::Size(medium_r.width, medium_r.height));
+    cv::cuda::resize(impl_->l.toGpuMat(), coarse_l.toGpuMat(), cv::Size(coarse_l.width, coarse_l.height));
+    cv::cuda::resize(impl_->r.toGpuMat(), coarse_r.toGpuMat(), cv::Size(coarse_r.width, coarse_r.height));
+
+    cv::cuda::GpuMat var_fine = impl_->var_fine.toGpuMat();
+    variance_mask(impl_->l.toGpuMat(), var_fine, params.var_window);
+    cv::cuda::normalize(var_fine, var_fine, params.alpha, params.beta, cv::NORM_MINMAX, -1);
+
+    cv::cuda::GpuMat var_medium; // = impl_->var_medium.toGpuMat();
+    variance_mask(medium_l.toGpuMat(), var_medium, params.var_window);
+    cv::cuda::normalize(var_medium, var_medium, params.alpha, params.beta, cv::NORM_MINMAX, -1);
+    cv::cuda::resize(var_medium, impl_->var_medium.toGpuMat(), cv::Size(l.cols(), l.rows()));
+
+    cv::cuda::GpuMat var_coarse; // = impl_->var_coarse.toGpuMat();
+    variance_mask(coarse_l.toGpuMat(), var_coarse, params.var_window);
+    cv::cuda::normalize(var_coarse, var_coarse, params.alpha, params.beta, cv::NORM_MINMAX, -1);
+    cv::cuda::resize(var_coarse, impl_->var_coarse.toGpuMat(), cv::Size(l.cols(), l.rows()));
+
+    cv::Mat tmp;
+    impl_->var_coarse.toGpuMat().download(tmp);
+    cv::imshow("Var", tmp);
+
+	// CT
+    impl_->cost_fine.set(impl_->l, impl_->r);
+    impl_->cost_medium.set(impl_->l, impl_->r, medium_l, medium_r);
+    impl_->cost_coarse.set(impl_->l, impl_->r, coarse_l, coarse_r);
+    impl_->cost.set();
+    impl_->compute(disparity);
+    
+    Array2D<ExpandingCensusMatchingCost::Type> dsitmp_dev(l.cols(), l.rows());
+	dsi_slice(impl_->cost, impl_->wta.disparity, dsitmp_dev);
+	show_dsi_slice(dsitmp_dev.toGpuMat());
+}
+
+StereoHierWeightCensusSgm::~StereoHierWeightCensusSgm() {
+	if (impl_) {
+		delete impl_;
+		impl_ = nullptr;
+	}
+}
diff --git a/lib/libstereo/src/costs/census.cu b/lib/libstereo/src/costs/census.cu
index 9760f4db2..af00c90ba 100644
--- a/lib/libstereo/src/costs/census.cu
+++ b/lib/libstereo/src/costs/census.cu
@@ -287,8 +287,8 @@ void CensusMatchingCost::set(cv::InputArray l, cv::InputArray r) {
 ////////////////////////////////////////////////////////////////////////////////
 
 void ExpandingCensusMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r) {
-	parallel2D<algorithms::ExCensusTransformRowMajor<5,3>>({l.data(), ct_l_.data(),1.5f}, l.width, l.height);
-	parallel2D<algorithms::ExCensusTransformRowMajor<5,3>>({r.data(), ct_r_.data(),1.5f}, r.width, r.height);
+	parallel2D<algorithms::ExCensusTransformRowMajor<7,7>>({l.data(), ct_l_.data(),1.5f}, l.width, l.height);
+	parallel2D<algorithms::ExCensusTransformRowMajor<7,7>>({r.data(), ct_r_.data(),1.5f}, r.width, r.height);
 }
 
 /*void ExpandingCensusMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r, const Array2D<uchar> &hl, const Array2D<uchar> &hr) {
@@ -365,8 +365,18 @@ void MiniCensusMatchingCost::set(cv::InputArray l, cv::InputArray r) {
 ////////////////////////////////////////////////////////////////////////////////
 
 void WeightedCensusMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r) {
-	parallel2D<algorithms::CensusTransformRowMajor<11,11>>({l.data(), ct_l_.data()}, l.width, l.height);
-	parallel2D<algorithms::CensusTransformRowMajor<11,11>>({r.data(), ct_r_.data()}, r.width, r.height);
+	parallel2D<algorithms::CensusTransformRowMajor<7,7>>({l.data(), ct_l_.data()}, l.width, l.height);
+	parallel2D<algorithms::CensusTransformRowMajor<7,7>>({r.data(), ct_r_.data()}, r.width, r.height);
+}
+
+void WeightedCensusMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r, const Array2D<uchar> &hl, const Array2D<uchar> &hr) {
+	//if (pattern_ == CensusPattern::STANDARD) {
+		parallel2D<algorithms::HCensusTransformRowMajor<7,7>>({l.data(), hl.data(), ct_l_.data(), {ushort(hl.width), ushort(hl.height)}}, l.width, l.height);
+		parallel2D<algorithms::HCensusTransformRowMajor<7,7>>({r.data(), hr.data(), ct_r_.data(), {ushort(hr.width), ushort(hr.height)}}, r.width, r.height);
+	//} else if (pattern_ == CensusPattern::GENERALISED) {
+	//	parallel2D<algorithms::HGCensusTransformRowMajor<7,7>>({l.data(), hl.data(), ct_l_.data(), {ushort(hl.width), ushort(hl.height)}}, l.width, l.height);
+	//	parallel2D<algorithms::HGCensusTransformRowMajor<7,7>>({r.data(), hr.data(), ct_r_.data(), {ushort(hr.width), ushort(hr.height)}}, r.width, r.height);
+	//}
 }
 
 void WeightedCensusMatchingCost::set(cv::InputArray l, cv::InputArray r) {
diff --git a/lib/libstereo/src/costs/census.hpp b/lib/libstereo/src/costs/census.hpp
index b28ece7ec..bd3ef442f 100644
--- a/lib/libstereo/src/costs/census.hpp
+++ b/lib/libstereo/src/costs/census.hpp
@@ -57,6 +57,7 @@ namespace impl {
 		static_assert(R % 2 == 1, "R must be odd");
 		typedef unsigned short Type;
 
+		WeightedCensusMatchingCost() : DSImplBase<Type>({0,0,0,0}) {}
 		WeightedCensusMatchingCost(ushort w, ushort h, ushort dmin, ushort dmax) :
 				DSImplBase<unsigned short>({w,h,dmin,dmax}) {
 
@@ -82,6 +83,15 @@ namespace impl {
 			} while(bin != NBINS);
 		}
 
+		WeightedCensusMatchingCost<R,NBINS> &operator=(const WeightedCensusMatchingCost<R,NBINS> &c) {
+			ct_l = c.ct_l;
+			ct_r = c.ct_r;
+			W = c.W;
+			memcpy(masks, c.masks, sizeof(masks));
+			memcpy(weights, c.weights, sizeof(weights));
+			return *this;
+		}
+
 		__host__ __device__ inline unsigned short operator()(const int y, const int x, const int d) const {
 			if ((x-d) < 0) { return COST_MAX; }
 			float c = 0;
@@ -106,7 +116,7 @@ namespace impl {
 
 		uint64_t masks[WSTEP][NBINS] = {0};
 		float weights[NBINS] = {0.0f};
-		const float W = 0.75f;
+		float W = 0.75f;
 
 		Array2D<uint64_t>::Data ct_l;
 		Array2D<uint64_t>::Data ct_r;
@@ -203,9 +213,9 @@ protected:
 	CensusPattern pattern_ = CensusPattern::STANDARD;
 };
 
-class ExpandingCensusMatchingCost : public DSBase<impl::CensusMatchingCost<5,3,1>> {
+class ExpandingCensusMatchingCost : public DSBase<impl::WeightedCensusMatchingCost<7,3>> {
 public:
-	typedef impl::CensusMatchingCost<5,3,1> DataType;
+	typedef impl::WeightedCensusMatchingCost<7,3> DataType;
 	typedef unsigned short Type;
 
 	ExpandingCensusMatchingCost() : DSBase<DataType>(0, 0, 0, 0) {};
@@ -213,8 +223,8 @@ public:
 		: DSBase<DataType>(width, height, disp_min, disp_max),
 			ct_l_(width*data().WSTEP, height), ct_r_(width*data().WSTEP,height)
 		{
-			data().l = ct_l_.data();
-			data().r = ct_r_.data();
+			data().ct_l = ct_l_.data();
+			data().ct_r = ct_r_.data();
 		}
 
 	inline void setPattern(CensusPattern p) { pattern_ = p; }
@@ -230,9 +240,9 @@ protected:
 	CensusPattern pattern_ = CensusPattern::STANDARD;
 };
 
-class WeightedCensusMatchingCost : public DSBase<impl::WeightedCensusMatchingCost<11, 5>> {
+class WeightedCensusMatchingCost : public DSBase<impl::WeightedCensusMatchingCost<7, 3>> {
 public:
-	typedef impl::WeightedCensusMatchingCost<11, 5> DataType;
+	typedef impl::WeightedCensusMatchingCost<7, 3> DataType;
 	typedef unsigned short Type;
 
 	WeightedCensusMatchingCost() : DSBase<DataType>(0, 0, 0, 0) {};
@@ -246,6 +256,7 @@ public:
 
 	void set(cv::InputArray l, cv::InputArray r);
 	void set(const Array2D<uchar>& l, const Array2D<uchar>& r);
+	void set(const Array2D<uchar> &l, const Array2D<uchar> &r, const Array2D<uchar> &hl, const Array2D<uchar> &hr);
 	static constexpr Type COST_MAX = DataType::COST_MAX;
 
 protected:
-- 
GitLab