diff --git a/lib/libstereo/include/stereo.hpp b/lib/libstereo/include/stereo.hpp
index 0159991a60f1fc1b6869ba739c75bf35cb211d8b..2dac90d5aa9b2cd2ed6c5e23a723f232f9f56993 100644
--- a/lib/libstereo/include/stereo.hpp
+++ b/lib/libstereo/include/stereo.hpp
@@ -106,6 +106,11 @@ public:
 					AggregationDirections::VERTICAL |
 					AggregationDirections::DIAGONAL;
 		bool debug = false;
+		/** normalization of variance to range [alpha, beta] */
+		float alpha = 0.2;
+		float beta = 1.0;
+		/** variance window size */
+		int var_window = 5;
 		CensusPattern pattern = CensusPattern::STANDARD;
 	};
 	Parameters params;
diff --git a/lib/libstereo/middlebury/algorithms.hpp b/lib/libstereo/middlebury/algorithms.hpp
index 2b73615507ed92a866d7d4b6d8839934657e89ae..e5cc07c383889b4099e85a4cb53822e12b4b3aa5 100644
--- a/lib/libstereo/middlebury/algorithms.hpp
+++ b/lib/libstereo/middlebury/algorithms.hpp
@@ -33,7 +33,7 @@ namespace Impl {
 	};
 
 	struct ECensusSGM : public Algorithm {
-		ECensusSGM() { P1 = 4.0f; P2 = 80.0f; }
+		ECensusSGM() { P1 = 4.0f; P2 = 60.0f; }
 
 		virtual void run(const MiddleburyData &data, cv::Mat &disparity) override {
 			StereoExCensusSgm stereo;
@@ -45,6 +45,9 @@ namespace Impl {
 			stereo.params.debug = false;
 			stereo.params.d_min = data.calib.vmin;
 			stereo.params.d_max = data.calib.vmax;
+			stereo.params.alpha = 0.5f;
+			stereo.params.beta = 1.0f;
+			stereo.params.var_window = 9;
 			stereo.compute(data.imL, data.imR, disparity);
 		}
 	};
@@ -151,7 +154,21 @@ namespace Impl {
 			stereo.params.debug = false;
 			stereo.params.alpha = 0.5f;
 			stereo.params.beta = 1.0f;
+
 			stereo.compute(data.imL, data.imR, disparity);
+
+			/*cv::cuda::GpuMat gdisp;
+			cv::cuda::GpuMat gdisp2;
+			cv::cuda::GpuMat gl;
+			gl.upload(data.imL);
+			gdisp.create(gl.size(), CV_32F);
+			stereo.compute(data.imL, data.imR, gdisp);
+
+			gdisp.convertTo(gdisp2, CV_16S);
+			auto blf = cv::cuda::createDisparityBilateralFilter(stereo.params.d_max-stereo.params.d_min+1,3,10);
+			blf->apply(gdisp2, gl, gdisp2);
+			gdisp2.convertTo(gdisp, CV_32F);
+			gdisp.download(disparity);*/
 		}
 	};
 
diff --git a/lib/libstereo/src/algorithms/excensussgm.cu b/lib/libstereo/src/algorithms/excensussgm.cu
index 78e03617be1f21b884cd3a6906d34899dcdd15d1..4ea34333e0358145fca859c06fd7e41048004174 100644
--- a/lib/libstereo/src/algorithms/excensussgm.cu
+++ b/lib/libstereo/src/algorithms/excensussgm.cu
@@ -1,13 +1,60 @@
 #include "stereo.hpp"
 #include "stereosgm.hpp"
 #include "../costs/census.hpp"
+#include "costs/scale.hpp"
 
-struct StereoExCensusSgm::Impl : public StereoSgm<ExpandingCensusMatchingCost, StereoExCensusSgm::Parameters> {
+#include <opencv2/cudafilters.hpp>
+
+static void variance_mask(cv::InputArray in, cv::OutputArray out, int wsize=3) {
+	if (in.isGpuMat() && out.isGpuMat()) {
+		cv::cuda::GpuMat im;
+		cv::cuda::GpuMat im2;
+		cv::cuda::GpuMat mean;
+		cv::cuda::GpuMat mean2;
+
+		mean.create(in.size(), CV_32FC1);
+		mean2.create(in.size(), CV_32FC1);
+		im2.create(in.size(), CV_32FC1);
+
+		if (in.type() != CV_32FC1) {
+			in.getGpuMat().convertTo(im, CV_32FC1);
+		}
+		else {
+			im = in.getGpuMat();
+		}
+
+		cv::cuda::multiply(im, im, im2);
+		auto filter = cv::cuda::createBoxFilter(CV_32FC1, CV_32FC1, cv::Size(wsize,wsize));
+		filter->apply(im, mean);   // E[X]
+		filter->apply(im2, mean2); // E[X^2]
+		cv::cuda::multiply(mean, mean, mean); // (E[X])^2
+
+		// NOTE: floating point accuracy in subtraction
+		// (cv::cuda::createBoxFilter only supports 8 bit integer types)
+		cv::cuda::subtract(mean2, mean, out.getGpuMatRef()); // E[X^2] - (E[X])^2
+	}
+	else { throw std::exception(); /* todo CPU version */ }
+}
+
+
+typedef unsigned short CostType;
+typedef WeightedCost<ExpandingCensusMatchingCost, CostType> MatchingCost;
+
+
+struct StereoExCensusSgm::Impl : public StereoSgm<MatchingCost, StereoExCensusSgm::Parameters> {
 	Array2D<uchar> l;
-	Array2D<uchar> r;
+    Array2D<uchar> r;
+    Array2D<float> variance;
+    Array2D<float> variance_r;
+    ExpandingCensusMatchingCost excensus;
 
 	Impl(StereoExCensusSgm::Parameters &params, int width, int height, int dmin, int dmax) :
-		StereoSgm(params, width, height, dmin, dmax), l(width, height), r(width, height) {}
+        StereoSgm(params, width, height, dmin, dmax), l(width, height), r(width, height),
+        variance(width,height), variance_r(width,height),
+        excensus(width, height, dmin, dmax) {
+            cost.setCost(excensus);
+            cost.setWeights(variance, variance_r);
+        }
 };
 
 StereoExCensusSgm::StereoExCensusSgm() : impl_(nullptr) {
@@ -24,9 +71,19 @@ void StereoExCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputAr
 	}
 
 	mat2gray(l, impl_->l);
-	mat2gray(r, impl_->r);
-	impl_->cost.setPattern(params.pattern);
-	impl_->cost.set(impl_->l, impl_->r);
+    mat2gray(r, impl_->r);
+    
+    cv::cuda::GpuMat var_l = impl_->variance.toGpuMat();
+	variance_mask(impl_->l.toGpuMat(), var_l, params.var_window);
+	cv::cuda::GpuMat var_r = impl_->variance_r.toGpuMat();
+    variance_mask(impl_->r.toGpuMat(), var_r, params.var_window);
+    
+    cv::cuda::normalize(var_l, var_l, params.alpha, params.beta, cv::NORM_MINMAX, -1);
+	cv::cuda::normalize(var_r, var_r, params.alpha, params.beta, cv::NORM_MINMAX, -1);
+
+	impl_->excensus.setPattern(params.pattern);
+    impl_->excensus.set(impl_->l, impl_->r);
+    impl_->cost.set();
 
 	cudaSafeCall(cudaDeviceSynchronize());
     impl_->compute(disparity);
diff --git a/lib/libstereo/src/algorithms/stereosgm.hpp b/lib/libstereo/src/algorithms/stereosgm.hpp
index 114bc501de437a4007165ba7e4a694d52e09a94e..2a6369df049e51f1136320810cd62abe34c5e1d2 100644
--- a/lib/libstereo/src/algorithms/stereosgm.hpp
+++ b/lib/libstereo/src/algorithms/stereosgm.hpp
@@ -5,6 +5,7 @@
 
 #include <opencv2/core/cuda/common.hpp>
 #include <opencv2/cudaarithm.hpp>
+#include <opencv2/cudastereo.hpp>
 
 #include "stereo.hpp"
 
diff --git a/lib/libstereo/src/costs/scale.hpp b/lib/libstereo/src/costs/scale.hpp
index 1369e8379db74b254bca0bdfb0aa06bec223ddaa..8ae4c44940ccd042ec9e228ec297e881c93f12d9 100644
--- a/lib/libstereo/src/costs/scale.hpp
+++ b/lib/libstereo/src/costs/scale.hpp
@@ -123,22 +123,35 @@ public:
 
 	WeightedCost(int width, int height, int disp_min, int disp_max, A &a, Array2D<float> &wl, Array2D<float> &wr)
 		: DSBase<DataType>(width, height, disp_min, disp_max),
-			cost(a), weights_l(wl), weights_r(wr) {
+			cost(&a), weights_l(&wl), weights_r(&wr) {
 
 	}
 
+	WeightedCost(int width, int height, int disp_min, int disp_max)
+		: DSBase<DataType>(width, height, disp_min, disp_max),
+			cost(nullptr), weights_l(nullptr), weights_r(nullptr) {
+
+	}
+
+	void setCost(A &c) { cost = &c; }
+
+	void setWeights(Array2D<float> &wl, Array2D<float> &wr) {
+		weights_l = &wl;
+		weights_r = &wr;
+	}
+
 	void set() {
-		this->data().cost = cost.data();
-		this->data().weights_l = weights_l.data();
-		this->data().weights_r = weights_r.data();
+		this->data().cost = cost->data();
+		this->data().weights_l = weights_l->data();
+		this->data().weights_r = weights_r->data();
 	}
 
 	static const T COST_MAX = A::COST_MAX;
 
 protected:
-	Array2D<float> &weights_l;
-	Array2D<float> &weights_r;
-	A &cost;
+	Array2D<float> *weights_l;
+	Array2D<float> *weights_r;
+	A *cost;
 };
 
 #endif