diff --git a/cv-node/CMakeLists.txt b/cv-node/CMakeLists.txt
index 2b2d24cd36476860793585e05deb82d42a2ba884..dab94af564545322531eb4935bb736220e454a19 100644
--- a/cv-node/CMakeLists.txt
+++ b/cv-node/CMakeLists.txt
@@ -31,6 +31,7 @@ set(CMAKE_CUDA_FLAGS "-Xcompiler -Wall")
 set(CMAKE_CUDA_FLAGS_DEBUG "-g -DDEBUG -D_DEBUG -Wall")
 set(CMAKE_CUDA_FLAGS_RELEASE "")
 add_definitions(-DHAVE_CUDA)
+include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
 endif (CUDA_FOUND)
 
 # Need to include staged files and libs
diff --git a/cv-node/include/ftl/algorithms/rtcensus.hpp b/cv-node/include/ftl/algorithms/rtcensus.hpp
index 2981b8d2e1a6aac2456142acfd2d0fd82eebacfd..aed1b122b86a69673bec711855c399d2d4594ebb 100644
--- a/cv-node/include/ftl/algorithms/rtcensus.hpp
+++ b/cv-node/include/ftl/algorithms/rtcensus.hpp
@@ -27,11 +27,13 @@ class RTCensus : public ftl::Disparity {
 	float gamma_;
 	float tau_;
 	bool use_cuda_;
+	bool alternate_;
 	
 	#if defined HAVE_CUDA
 	cv::cuda::GpuMat disp_;
 	cv::cuda::GpuMat filtered_;
 	cv::cuda::GpuMat left_;
+	cv::cuda::GpuMat left2_;
 	cv::cuda::GpuMat right_;
 	#endif
 	
diff --git a/cv-node/src/algorithms/rtcensus.cpp b/cv-node/src/algorithms/rtcensus.cpp
index 47f1acc96039996ab2e1e78925bdc28af475bb0a..a2c323ea32239fc93157e05f255e4ea33a4c4ca7 100644
--- a/cv-node/src/algorithms/rtcensus.cpp
+++ b/cv-node/src/algorithms/rtcensus.cpp
@@ -183,7 +183,8 @@ RTCensus::RTCensus(nlohmann::json &config)
 	:	Disparity(config),
 		gamma_(0.0f),
 		tau_(0.0f),
-		use_cuda_(config.value("use_cuda",true)) {}
+		use_cuda_(config.value("use_cuda",true)),
+		alternate_(false) {}
 
 /*
  * Choose the implementation and perform disparity calculation.
@@ -231,25 +232,31 @@ void RTCensus::computeCPU(const cv::Mat &l, const cv::Mat &r, cv::Mat &disp) {
 using namespace cv::cuda;
 using namespace cv;
 
+#include <vector_types.h>
+
 namespace ftl { namespace gpu {
-void rtcensus_call(const PtrStepSzb &l, const PtrStepSzb &r, const PtrStepSz<float> &disp, size_t num_disp, const int &s=0);
+void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const PtrStepSz<float> &disp, size_t num_disp, const int &s=0);
 }}
 
 void RTCensus::computeCUDA(const cv::Mat &l, const cv::Mat &r, cv::Mat &disp) {
 	// Initialise gpu memory here because we need image size
 	if (disp_.empty()) disp_ = cuda::GpuMat(l.size(), CV_32FC1);
-	if (left_.empty()) left_ = cuda::GpuMat(l.size(), CV_8U);
-	if (right_.empty()) right_ = cuda::GpuMat(l.size(), CV_8U);
+	if (left_.empty()) left_ = cuda::GpuMat(l.size(), CV_8UC4);
+	if (left2_.empty()) left2_ = cuda::GpuMat(l.size(), CV_8UC4);
+	if (right_.empty()) right_ = cuda::GpuMat(l.size(), CV_8UC4);
 	
 	// Send images to GPU
-	left_.upload(l);
+	if (alternate_) left_.upload(l);
+	else left2_.upload(l);
 	right_.upload(r);
 
 	auto start = std::chrono::high_resolution_clock::now();
-	ftl::gpu::rtcensus_call(left_, right_, disp_, max_disp_);
+	ftl::gpu::rtcensus_call((alternate_)?left_:left2_, right_, disp_, max_disp_);
 	std::chrono::duration<double> elapsed = std::chrono::high_resolution_clock::now() - start;
 	LOG(INFO) << "CUDA census in " << elapsed.count() << "s";
 	
+	alternate_ = !alternate_;
+	
 	// Read disparity from GPU
 	disp_.download(disp);
 }
diff --git a/cv-node/src/algorithms/rtcensus.cu b/cv-node/src/algorithms/rtcensus.cu
index d008e06e61d4a41a59e6223ed1a43a6fa8c08c9c..583a823876bf089d832a2263921011ec339b56d1 100644
--- a/cv-node/src/algorithms/rtcensus.cu
+++ b/cv-node/src/algorithms/rtcensus.cu
@@ -75,13 +75,13 @@ cudaTextureObject_t makeTexture2D(void *ptr, int pitch, int width, int height) {
 __device__ uint64_t sparse_census(cudaTextureObject_t tex, int u, int v) {
 	uint64_t r = 0;
 
-	unsigned char t = tex2D<unsigned char>(tex, u,v);
+	unsigned char t = tex2D<uchar4>(tex, u,v).z;
 
 	for (int m=-7; m<=7; m+=2) {
 		//auto start_ix = (v + m)*w + u;
 		for (int n=-7; n<=7; n+=2) {
 			r <<= 1;
-			r |= XHI(t, tex2D<unsigned char>(tex, u+n, v+m));
+			r |= XHI(t, tex2D<uchar4>(tex, u+n, v+m).z);
 		}
 	}
 
@@ -141,7 +141,7 @@ __global__ void disp_kernel(float *disp_l, float *disp_r,
 		size_t ds) {	
 	//extern __shared__ uint64_t cache[];
 
-	const int gamma = 100;
+	const int gamma = 20;
 	
 	int u = (blockIdx.x * BLOCK_W) + threadIdx.x + RADIUS2;
 	int v_start = (blockIdx.y * ROWSperTHREAD) + RADIUS2;
@@ -275,7 +275,7 @@ __global__ void consistency_kernel(cudaTextureObject_t d_sub_l,
 
 		//disp(v,u) = a; //abs((a+b)/2);
 		
-		if (abs(a-b) <= 1.0) disp[v*pitch+u] = abs((a+b)/2); // was 1.0
+		if (abs(a-b) <= 1.0) disp[v*pitch+u] = (u < 300) ? b : a;//abs((a+b)/2); // was 1.0
 		else disp[v*pitch+u] = NAN;
 	//}
 	}
@@ -284,8 +284,8 @@ __global__ void consistency_kernel(cudaTextureObject_t d_sub_l,
 
 #define FILTER_WINDOW 11
 #define FILTER_WINDOW_R	5
-#define FILTER_SIM_THRESH 20
-#define FILTER_DISP_THRESH 10.0f
+#define FILTER_SIM_THRESH 5
+#define FILTER_DISP_THRESH 50.0f
 
 __global__ void filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d,
 		cudaTextureObject_t prevD,
@@ -293,44 +293,51 @@ __global__ void filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d,
 	size_t u = (blockIdx.x * BLOCK_W) + threadIdx.x + RADIUS;
 	size_t v = blockIdx.y + RADIUS;
 
-	if (u+num_disp > f.cols) {
+	/*if (u+num_disp > f.cols) {
 		f(v,u) = NAN;
 		return;
-	}
+	}*/
 
 	float disp = tex2D<float>(d,u,v);
-	/*if (!isnan(disp)) {
-		f(v,u) = disp;
-		return;
-	}*/
-	//if (isnan(disp)) disp = 100000.0f; //tex2D<float>(prev, u, v);
 
 	cudaTextureObject_t nTex = (prevT) ? prevT : t;
 	cudaTextureObject_t nDisp = (prevD) ? prevD : d;
 
 	float pdisp = tex2D<float>(nDisp,u,v);
 	if (isnan(pdisp)) pdisp = disp;
-	//if (isnan(disp)) disp = pdisp;
-	int pixel = tex2D<unsigned char>(t, u, v);
-	int ppixel = tex2D<unsigned char>(nTex, u, v);
+	if (isnan(disp)) disp = pdisp;
+	int pixel = tex2D<uchar4>(t, u, v).x;
+	int ppixel = tex2D<uchar4>(nTex, u, v).x;
 	float est = 0.0f; //(isnan(disp)) ? tex2D<float>(prev, u, v) : disp;
 	int nn = 0; //(isnan(disp)) ? 0 : 1;
 	int neigh_sq = 0;
 	int neigh_sum = 0;
+	
+	/*if (abs(ppixel-pixel) <= FILTER_SIM_THRESH) {
+		f(v,u) = disp;
+	} else {
+		f(v,u) = NAN;
+	}
+	return;*/
+	
+	if (!isnan(pdisp) && isnan(disp) && abs(pixel-ppixel) <= FILTER_SIM_THRESH) {
+		disp = pdisp;
+	}
 
 	for (int m=-FILTER_WINDOW_R; m<=FILTER_WINDOW_R; m++) {
 		for (int n=-FILTER_WINDOW_R; n<=FILTER_WINDOW_R; n++) {
-			int neigh = tex2D<unsigned char>(t, u+n, v+m);
+			int neigh = tex2D<uchar4>(t, u+n, v+m).x;
 			neigh_sq += neigh*neigh;
 			neigh_sum += neigh;
 
 			float ndisp = tex2D<float>(d,u+n,v+m);
-			if (isnan(ndisp)) {
+			/*if (isnan(ndisp)) {
 				ndisp = tex2D<float>(nDisp,u+n,v+m);
-				neigh = tex2D<unsigned char>(nTex, u+n, v+m);
-			}
+				neigh = tex2D<uchar4>(nTex, u+n, v+m).x;
+			}*/
 
-			if (m+n == 0) continue;
+			//if (isnan(tex2D<float>(nDisp,u+n,v+m))) continue;
+			if (m == 0 && n == 0) continue;
 
 			if (ndisp > 1.0f && !isnan(ndisp) && (abs(neigh-pixel) <= FILTER_SIM_THRESH)) { // && (isnan(disp) || abs(ndisp-disp) < FILTER_DISP_THRESH)) {
 				est += ndisp;
@@ -341,21 +348,25 @@ __global__ void filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d,
 
 	// Texture map filtering
 	int tm = (neigh_sq / (FILTER_WINDOW*FILTER_WINDOW)) - ((neigh_sum*neigh_sum) / (FILTER_WINDOW*FILTER_WINDOW));
-	if (tm >= -9000000) {
+	//if (tm >= -9000000) {
 			 // ) {
-		if (!isnan(disp) && (abs(ppixel-pixel) > FILTER_SIM_THRESH || abs(pdisp - disp) <= FILTER_DISP_THRESH)) {
+		/*if (!isnan(disp) && disp > 1.0f) { // && (abs(ppixel-pixel) > FILTER_SIM_THRESH || abs(pdisp - disp) <= FILTER_DISP_THRESH)) {
 			f(v,u) = disp;
-		} else if (nn > 2) f(v,u) = (nn==0) ? NAN : est / nn;
-		else f(v,u) = NAN;
-	} else {
-		f(v,u) = NAN;
-	}
+		} else*/
+		if (nn > 4) {
+			f(v,u) = (est+disp) / (nn+1);
+		} else if (!isnan(pdisp) && abs(pixel-ppixel) <= FILTER_SIM_THRESH) {
+			f(v,u) = pdisp;
+		} else f(v,u) = NAN;
+	//} else {
+	//	f(v,u) = NAN;
+	//}
 }
 
 cudaTextureObject_t prevDisp = 0;
 cudaTextureObject_t prevImage = 0;
 
-void rtcensus_call(const PtrStepSzb &l, const PtrStepSzb &r, const PtrStepSz<float> &disp, size_t num_disp, const int &stream) {
+void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const PtrStepSz<float> &disp, size_t num_disp, const int &stream) {
 	dim3 grid(1,1,1);
     dim3 threads(BLOCK_W, 1, 1);
 
@@ -389,8 +400,8 @@ void rtcensus_call(const PtrStepSzb &l, const PtrStepSzb &r, const PtrStepSz<flo
 	memset(&texDesc, 0, sizeof(texDesc));
 	texDesc.readMode = cudaReadModeElementType;
   
-	cudaTextureObject_t texLeft = makeTexture2D<unsigned char>(l);
-	cudaTextureObject_t texRight = makeTexture2D<unsigned char>(r);
+	cudaTextureObject_t texLeft = makeTexture2D<uchar4>(l);
+	cudaTextureObject_t texRight = makeTexture2D<uchar4>(r);
 
 	//size_t smem_size = (2 * l.cols * l.rows) * sizeof(uint64_t);
 	
diff --git a/cv-node/src/main.cpp b/cv-node/src/main.cpp
index ad055b968e111d7e352b88f153867f2d3e85a5ee..157a1d65be95af2f3f549063c25381b0ea26c7b2 100644
--- a/cv-node/src/main.cpp
+++ b/cv-node/src/main.cpp
@@ -134,19 +134,24 @@ static void run(const string &file) {
 		sync->get(RIGHT,r);
 		
 		// Black and white
-        cvtColor(l,  lbw,  COLOR_BGR2GRAY);
-        cvtColor(r, rbw, COLOR_BGR2GRAY);
+        cvtColor(l, lbw,  COLOR_BGR2HSV);
+        cvtColor(r, rbw, COLOR_BGR2HSV);
+        int from_to[] = {0,0,1,1,2,2,-1,3};
+        Mat hsval(lbw.size(), CV_8UC4);
+        Mat hsvar(lbw.size(), CV_8UC4);
+        mixChannels(&lbw, 1, &hsval, 1, from_to, 4);
+        mixChannels(&rbw, 1, &hsvar, 1, from_to, 4);
         
-        disparity->compute(lbw,rbw,disparity32F);
+        disparity->compute(hsval,hsvar,disparity32F);
 		//LOG(INFO) << "Disparity complete ";
 		
 		disparity32F.convertTo(disparity32F, CV_32F);
-		disparity32F += 50.0f; // TODO REMOVE
+		//disparity32F += 10.0f; // TODO REMOVE
 		
 		// Clip the left edge
 		Rect rect((int)config["disparity"]["maximum"],7,disparity32F.cols-(int)config["disparity"]["maximum"],disparity32F.rows-14);
-		disparity32F = disparity32F(rect);
-		l = l(rect);
+		//disparity32F = disparity32F(rect);
+		//l = l(rect);
 		
 		// HACK to make bad pixels invisible.
 		//normalize(disparity32F, depth32F, 0, 255, NORM_MINMAX, CV_8U);
@@ -202,6 +207,7 @@ static void run(const string &file) {
 		        break;
 		    }
         } else if (config["display"]["disparity"]) {
+        	disparity32F = disparity32F / (float)config["disparity"]["maximum"];
         	//normalize(disparity32F, disparity32F, 0, 255, NORM_MINMAX, CV_8U);
 			cv::imshow("Disparity", disparity32F);
 			if(cv::waitKey(10) == 27){