diff --git a/cv-node/CMakeLists.txt b/cv-node/CMakeLists.txt
index 9e93a7257c6038a9d5b25eb5a0b0e9b719c1a1cb..9c8b3d96a0a6a71461fd070de6830f7554085137 100644
--- a/cv-node/CMakeLists.txt
+++ b/cv-node/CMakeLists.txt
@@ -51,7 +51,7 @@ add_definitions(-DFTL_DATA_ROOT=${FTL_DATA_ROOT})
 
 set(CMAKE_CXX_FLAGS "-pthread -std=c++17 -Wall")
 set(CMAKE_CXX_FLAGS_DEBUG "-D_DEBUG -pg -Wall")
-set(CMAKE_CXX_FLAGS_RELEASE "-O3")
+set(CMAKE_CXX_FLAGS_RELEASE "-O3 -msse3 -mfpmath=sse")
 
 SET(CMAKE_USE_RELATIVE_PATHS ON)
 
diff --git a/cv-node/src/rtcensus.cpp b/cv-node/src/rtcensus.cpp
index 12b9be59440fdefbf5cf5bd11a9bc77ab5244bf2..4afcfbd201acbeca5bb584d529542f28c31d4962 100644
--- a/cv-node/src/rtcensus.cpp
+++ b/cv-node/src/rtcensus.cpp
@@ -2,6 +2,7 @@
 #include <vector>
 #include <tuple>
 #include <bitset>
+#include <cmath>
 #include <glog/logging.h>
 
 using ftl::RTCensus;
@@ -69,6 +70,7 @@ static vector<uint16_t> dsi_ca(vector<uint64_t> &census_R, vector<uint64_t> &cen
 		const auto d_ = d * sign;
 		for (size_t v=2; v<h-2; v++) {
 		for (size_t u=2; u<w-2; u++) {
+			const size_t ix = d+v*w*ds+u*ds;
 			for (int n=-2; n<=2; n++) {
 			const auto u_ = u + n;
 			if (u_+d_ < 0 || u_+d_ >= w) continue;
@@ -76,7 +78,7 @@ static vector<uint16_t> dsi_ca(vector<uint64_t> &census_R, vector<uint64_t> &cen
 				const auto v_ = (v + m)*w;
 				auto r = census_R[u_+v_];
 				auto l = census_L[v_+(u_+d_)];
-				result[d+v*w*ds+u*ds] += hamming(r,l);
+				result[ix] += bitset<64>(r^l).count(); //hamming(r,l);
 			}
 			}
 			
@@ -133,6 +135,26 @@ static cv::Mat d_sub(vector<uint16_t> &dsi, size_t w, size_t h, size_t ds) {
 	return result;
 }
 
+static cv::Mat consistency(cv::Mat &d_sub_r, cv::Mat &d_sub_l) {
+	size_t w = d_sub_r.cols;
+	size_t h = d_sub_r.rows;
+	Mat result = Mat::zeros(Size(w,h), CV_64FC1);
+	
+	for (size_t v=0; v<h; v++) {
+	for (size_t u=0; u<w; u++) {
+		auto a = (int)(d_sub_l.at<double>(v,u));
+		if (u-a < 0) continue;
+		
+		auto b = d_sub_r.at<double>(v,u-a);
+		
+		if (std::abs(a-b) <= 1.0) result.at<double>(v,u) = std::abs((a+b)/2);
+		else result.at<double>(v,u) = 0.0;
+	}
+	}
+	
+	return result;
+}
+
 void RTCensus::disparity(cv::Mat &l, cv::Mat &r, cv::Mat &disp, size_t num_disp, float gamma, float tau) {
 	size_t d_min = 0;
 	size_t d_max = num_disp;
@@ -149,8 +171,8 @@ void RTCensus::disparity(cv::Mat &l, cv::Mat &r, cv::Mat &disp, size_t num_disp,
 	auto disp_L = d_sub(dsi_ca_L, l.cols, l.rows, d_max-d_min);
 	LOG(INFO) << "Disp done";
 
-	disp = disp_L;
-	//disp = consistency(disp_R, disp_L);
+	//disp = disp_L;
+	disp = consistency(disp_R, disp_L);
 
 	// TODO confidence and texture filtering
 }