diff --git a/lib/libstereo/CMakeLists.txt b/lib/libstereo/CMakeLists.txt
index d3922b3c96793ed3816e3304ceed5400d20a0359..4031f68d9c2c3c3d4656e7c574ccbbe854394c4a 100644
--- a/lib/libstereo/CMakeLists.txt
+++ b/lib/libstereo/CMakeLists.txt
@@ -41,7 +41,8 @@ if (LIBSTEREO_SHARED)
                 src/algorithms/censussgm.cu
                 src/algorithms/stablesgm.cu
                 src/algorithms/tcensussgm.cu
-                src/stereo_hier_census.cu
+                src/algorithms/hcensussgm.cu
+                #src/stereo_hier_census.cu
                 src/stereo_wcensussgm.cu
                 src/stereo_census_adaptive.cu
                 src/stereo_cp_censussgm.cu
@@ -71,7 +72,8 @@ else()
                 src/algorithms/censussgm.cu
                 src/algorithms/stablesgm.cu
                 src/algorithms/tcensussgm.cu
-                src/stereo_hier_census.cu
+                src/algorithms/hcensussgm.cu
+                #src/stereo_hier_census.cu
                 src/stereo_wcensussgm.cu
                 src/stereo_census_adaptive.cu
                 src/stereo_cp_censussgm.cu
diff --git a/lib/libstereo/middlebury/main.cpp b/lib/libstereo/middlebury/main.cpp
index 90523b531c93da1eed645c35f02ab060cb41df2a..caceb4e5237ef509c258d596c1d2f51dad0110a7 100644
--- a/lib/libstereo/middlebury/main.cpp
+++ b/lib/libstereo/middlebury/main.cpp
@@ -48,7 +48,7 @@ static void run_hcensussgm(MiddleburyData &data, cv::Mat &disparity) {
 	stereo.params.d_min = data.calib.vmin;
 	stereo.params.d_max = data.calib.vmax;
 	stereo.params.subpixel = 1;
-	stereo.params.lr_consistency = false;
+	stereo.params.lr_consistency = true;
 	stereo.params.debug = false;
 	stereo.compute(data.imL, data.imR, disparity);
 }
diff --git a/lib/libstereo/src/algorithms/hcensussgm.cu b/lib/libstereo/src/algorithms/hcensussgm.cu
new file mode 100644
index 0000000000000000000000000000000000000000..f16be84e7859bd0c207d5f8a1896bd672b65d88a
--- /dev/null
+++ b/lib/libstereo/src/algorithms/hcensussgm.cu
@@ -0,0 +1,70 @@
+#include "stereo.hpp"
+#include "stereosgm.hpp"
+#include "../costs/census.hpp"
+#include "../costs/dual.hpp"
+#include <opencv2/cudawarping.hpp>
+
+typedef MultiCosts<CensusMatchingCost,3> MatchingCost;
+
+struct StereoHierCensusSgm::Impl : public StereoSgm<MatchingCost, StereoHierCensusSgm::Parameters> {
+    CensusMatchingCost cost_fine;
+    CensusMatchingCost cost_medium;
+    CensusMatchingCost cost_coarse;
+	Array2D<uchar> l;
+	Array2D<uchar> r;
+
+	Impl(StereoHierCensusSgm::Parameters &params, int width, int height, int dmin, int dmax) :
+        StereoSgm(params, width, height, dmin, dmax),
+        cost_fine(width, height, dmin, dmax),
+        cost_medium(width, height, dmin, dmax),
+        cost_coarse(width, height, dmin, dmax),
+        l(width, height), r(width, height) {
+            cost.add(0, cost_fine);
+            cost.add(1, cost_medium);
+            cost.add(2, cost_coarse);
+        }
+};
+
+StereoHierCensusSgm::StereoHierCensusSgm() : impl_(nullptr) {
+	impl_ = new Impl(params, 0, 0, 0, 0);
+}
+
+void StereoHierCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) {
+
+	cudaSetDevice(0);
+
+	if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) {
+		delete impl_; impl_ = nullptr;
+		impl_ = new Impl(params, l.cols(), l.rows(), params.d_min, params.d_max);
+	}
+
+	mat2gray(l, impl_->l);
+	mat2gray(r, impl_->r);
+    timer_set();
+
+    static constexpr int DOWNSCALE_MEDIUM = 2;
+    static constexpr int DOWNSCALE_COARSE = 4;
+    
+    Array2D<uchar> medium_l(l.cols()/DOWNSCALE_MEDIUM, l.rows()/DOWNSCALE_MEDIUM);
+    Array2D<uchar> medium_r(r.cols()/DOWNSCALE_MEDIUM, r.rows()/DOWNSCALE_MEDIUM);
+    Array2D<uchar> coarse_l(l.cols()/DOWNSCALE_COARSE, l.rows()/DOWNSCALE_COARSE);
+    Array2D<uchar> coarse_r(r.cols()/DOWNSCALE_COARSE, r.rows()/DOWNSCALE_COARSE);
+    cv::cuda::resize(impl_->l.toGpuMat(), medium_l.toGpuMat(), cv::Size(medium_l.width, medium_r.height));
+    cv::cuda::resize(impl_->r.toGpuMat(), medium_r.toGpuMat(), cv::Size(medium_r.width, medium_r.height));
+    cv::cuda::resize(impl_->l.toGpuMat(), coarse_l.toGpuMat(), cv::Size(coarse_l.width, coarse_l.height));
+    cv::cuda::resize(impl_->r.toGpuMat(), coarse_r.toGpuMat(), cv::Size(coarse_r.width, coarse_r.height));
+
+	// CT
+    impl_->cost_fine.set(impl_->l, impl_->r);
+    impl_->cost_medium.set(impl_->l, impl_->r, medium_l, medium_r);
+    impl_->cost_coarse.set(impl_->l, impl_->r, coarse_l, coarse_r);
+    impl_->cost.set();
+	impl_->compute(disparity);
+}
+
+StereoHierCensusSgm::~StereoHierCensusSgm() {
+	if (impl_) {
+		delete impl_;
+		impl_ = nullptr;
+	}
+}
diff --git a/lib/libstereo/src/stereo_hier_census.cu b/lib/libstereo/src/stereo_hier_census.cu
deleted file mode 100644
index 983ee357ff16958097770880c10201a48576073a..0000000000000000000000000000000000000000
--- a/lib/libstereo/src/stereo_hier_census.cu
+++ /dev/null
@@ -1,164 +0,0 @@
-#include <opencv2/core.hpp>
-#include <opencv2/imgproc.hpp>
-#include <opencv2/cudawarping.hpp>
-#include <opencv2/core/cuda/common.hpp>
-#include <opencv2/cudaarithm.hpp>
-
-#include "stereo.hpp"
-
-#include "util_opencv.hpp"
-#include "costs/census.hpp"
-#include "costs/dual.hpp"
-#include "dsi.hpp"
-
-#include "wta.hpp"
-#include "cost_aggregation.hpp"
-#include "aggregations/standard_sgm.hpp"
-
-#include "median_filter.hpp"
-#include "dsi_tools.hpp"
-
-#ifdef __GNUG__
-
-#include <chrono>
-#include <iostream>
-
-static std::chrono::time_point<std::chrono::system_clock> start;
-
-static void timer_set() {
-		start = std::chrono::high_resolution_clock::now();
-}
-
-static void timer_print(const std::string &msg, const bool reset=true) {
-	auto stop = std::chrono::high_resolution_clock::now();
-
-	char buf[24];
-	snprintf(buf, sizeof(buf), "%5i ms  ",
-				(int) std::chrono::duration_cast<std::chrono::milliseconds>(stop-start).count());
-
-	std::cout << buf <<  msg << "\n" << std::flush;
-	if (reset) { timer_set(); }
-}
-
-#else
-
-static void timer_set() {}
-static void timer_print(const std::string &msg, const bool reset=true) {}
-
-#endif
-
-using cv::Mat;
-using cv::Size;
-using ftl::stereo::aggregations::StandardSGM;
-
-typedef MultiCosts<CensusMatchingCost,3> MatchingCost;
-
-struct StereoHierCensusSgm::Impl {
-    CensusMatchingCost cost_fine;
-    CensusMatchingCost cost_medium;
-    CensusMatchingCost cost_coarse;
-    MatchingCost cost;
-	Array2D<MatchingCost::Type> cost_min_paths;
-	Array2D<MatchingCost::Type> uncertainty;
-	Array2D<uchar> l;
-	Array2D<uchar> r;
-
-	PathAggregator<StandardSGM<MatchingCost::DataType>> aggr;
-	WinnerTakesAll<DisparitySpaceImage<MatchingCost::Type>,float> wta;
-
-    Impl(int width, int height, int min_disp, int max_disp) :
-        cost_fine(width, height, min_disp, max_disp),
-        cost_medium(width, height, min_disp, max_disp),
-        cost_coarse(width, height, min_disp, max_disp),
-		cost(width, height, min_disp, max_disp),
-		cost_min_paths(width, height),
-		uncertainty(width, height),
-		l(width, height), r(width, height)
-		{
-            cost.add(0, cost_fine);
-            cost.add(1, cost_medium);
-            cost.add(2, cost_coarse);
-        }
-
-};
-
-StereoHierCensusSgm::StereoHierCensusSgm() : impl_(nullptr) {
-	impl_ = new Impl(0, 0, 0, 0);
-}
-
-void StereoHierCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) {
-	cudaSetDevice(0);
-
-	if (l.rows() != impl_->cost.height() || r.cols() != impl_->cost.width()) {
-		delete impl_; impl_ = nullptr;
-		impl_ = new Impl(l.cols(), l.rows(), params.d_min, params.d_max);
-	}
-
-	mat2gray(l, impl_->l);
-	mat2gray(r, impl_->r);
-    timer_set();
-
-    static constexpr int DOWNSCALE_MEDIUM = 2;
-    static constexpr int DOWNSCALE_COARSE = 4;
-    
-    Array2D<uchar> medium_l(l.cols()/DOWNSCALE_MEDIUM, l.rows()/DOWNSCALE_MEDIUM);
-    Array2D<uchar> medium_r(r.cols()/DOWNSCALE_MEDIUM, r.rows()/DOWNSCALE_MEDIUM);
-    Array2D<uchar> coarse_l(l.cols()/DOWNSCALE_COARSE, l.rows()/DOWNSCALE_COARSE);
-    Array2D<uchar> coarse_r(r.cols()/DOWNSCALE_COARSE, r.rows()/DOWNSCALE_COARSE);
-    cv::cuda::resize(impl_->l.toGpuMat(), medium_l.toGpuMat(), cv::Size(medium_l.width, medium_r.height));
-    cv::cuda::resize(impl_->r.toGpuMat(), medium_r.toGpuMat(), cv::Size(medium_r.width, medium_r.height));
-    cv::cuda::resize(impl_->l.toGpuMat(), coarse_l.toGpuMat(), cv::Size(coarse_l.width, coarse_l.height));
-    cv::cuda::resize(impl_->r.toGpuMat(), coarse_r.toGpuMat(), cv::Size(coarse_r.width, coarse_r.height));
-
-	// CT
-    impl_->cost_fine.set(impl_->l, impl_->r);
-    impl_->cost_medium.set(impl_->l, impl_->r, medium_l, medium_r);
-    impl_->cost_coarse.set(impl_->l, impl_->r, coarse_l, coarse_r);
-    impl_->cost.set();
-
-	cudaSafeCall(cudaDeviceSynchronize());
-	if (params.debug) { timer_print("census transform"); }
-
-	// cost aggregation
-	StandardSGM<MatchingCost::DataType> func = {impl_->cost.data(), impl_->cost_min_paths.data(), params.P1, params.P2};
-	auto &out = impl_->aggr(func, params.paths);
-
-	cudaSafeCall(cudaDeviceSynchronize());
-	if (params.debug) { timer_print("Aggregation"); }
-
-	impl_->wta(out, params.subpixel, params.lr_consistency);
-	cudaSafeCall(cudaDeviceSynchronize());
-	if (params.debug) { timer_print("WTA"); }
-
-	// Drory, A., Haubold, C., Avidan, S., & Hamprecht, F. A. (2014).
-	// Semi-global matching: A principled derivation in terms of
-	// message passing. Lecture Notes in Computer Science (Including Subseries
-	// Lecture Notes in Artificial Intelligence and Lecture Notes in
-	// Bioinformatics). https://doi.org/10.1007/978-3-319-11752-2_4
-
-	#if USE_GPU
-	auto uncertainty = impl_->uncertainty.toGpuMat();
-	cv::cuda::subtract(impl_->wta.min_cost.toGpuMat(), impl_->cost_min_paths.toGpuMat(), uncertainty);
-	cv::cuda::compare(uncertainty, params.uniqueness, uncertainty, cv::CMP_GT);
-	impl_->wta.disparity.toGpuMat().setTo(0, uncertainty);
-	#else
-	auto uncertainty = impl_->uncertainty.toMat();
-	cv::subtract(impl_->wta.min_cost.toMat(), impl_->cost_min_paths.toMat(), uncertainty);
-	cv::compare(uncertainty, params.uniqueness, uncertainty, cv::CMP_GT);
-	impl_->wta.disparity.toMat().setTo(0, uncertainty);
-	#endif
-
-	median_filter(impl_->wta.disparity, disparity);
-	if (params.debug) { timer_print("median filter"); }
-
-	Array2D<MatchingCost::Type> dsitmp_dev(l.cols(), l.rows());
-	dsi_slice(out, impl_->wta.disparity, dsitmp_dev);
-	show_dsi_slice(dsitmp_dev.toGpuMat());
-}
-
-StereoHierCensusSgm::~StereoHierCensusSgm() {
-	if (impl_) {
-		delete impl_;
-		impl_ = nullptr;
-	}
-}