diff --git a/cv-node/CMakeLists.txt b/cv-node/CMakeLists.txt
index 1531cc233eee7057a9e1a06eb7746fdcf7452788..f0678311f69a4e4574933651723fb0da87db3194 100644
--- a/cv-node/CMakeLists.txt
+++ b/cv-node/CMakeLists.txt
@@ -94,7 +94,8 @@ if (CUDA_FOUND)
 		"src/algorithms/opencv_cuda_bp.cpp"
 		"src/algorithms/rtcensus.cu"
 		"src/algorithms/consistency.cu"
-		"src/algorithms/sparse_census.cu")
+		"src/algorithms/sparse_census.cu"
+		"src/algorithms/tex_filter.cu")
 endif (CUDA_FOUND)
 
 add_executable(cv-node ${CVNODESRC})
diff --git a/cv-node/include/ftl/cuda_algorithms.hpp b/cv-node/include/ftl/cuda_algorithms.hpp
index f4c4298ba5efcfdf3377911c092eb77676a25c17..38d1129ef2644f32c361e8af1908cd7728a925cd 100644
--- a/cv-node/include/ftl/cuda_algorithms.hpp
+++ b/cv-node/include/ftl/cuda_algorithms.hpp
@@ -12,6 +12,9 @@ namespace cuda {
 	void sparse_census(const TextureObject<uchar4> &l, const TextureObject<uchar4> &r,
 			TextureObject<uint2> &cl, TextureObject<uint2> &cr);
 
+	void texture_filter(const TextureObject<uchar4> &t, const TextureObject<float> &d,
+			TextureObject<float> &f, int num_disp, double thresh);
+
 }
 }
 
diff --git a/cv-node/include/ftl/cuda_common.hpp b/cv-node/include/ftl/cuda_common.hpp
index 834e2af362b2fa74ab31c2de23a33c14ac23d45d..f57c4c0ba081e35e82d58774263a9569019d6a9f 100644
--- a/cv-node/include/ftl/cuda_common.hpp
+++ b/cv-node/include/ftl/cuda_common.hpp
@@ -25,14 +25,15 @@ class TextureObject {
 	
 	int pitch() const { return pitch_; }
 	T *devicePtr() { return ptr_; };
-	int width() const { return width_; }
-	int height() const { return height_; }
+	__host__ __device__ T *devicePtr(int v) { return &ptr_[v*pitch2_]; }
+	__host__ __device__ int width() const { return width_; }
+	__host__ __device__ int height() const { return height_; }
 	cudaTextureObject_t cudaTexture() const { return texobj_; }
 	__device__ inline T tex2D(int u, int v) { return ::tex2D<T>(texobj_, u, v); }
 	__device__ inline T tex2D(float u, float v) { return ::tex2D<T>(texobj_, u, v); }
 	
-	inline const T &operator()(int u, int v) const { return ptr_[u+v*pitch_]; }
-	inline T &operator()(int u, int v) { return ptr_[u+v*pitch_]; }
+	__host__ __device__ inline const T &operator()(int u, int v) const { return ptr_[u+v*pitch2_]; }
+	__host__ __device__ inline T &operator()(int u, int v) { return ptr_[u+v*pitch2_]; }
 	
 	void free() {
 		if (texobj_ != 0) cudaSafeCall( cudaDestroyTextureObject (texobj_) );
@@ -44,6 +45,7 @@ class TextureObject {
 	private:
 	cudaTextureObject_t texobj_;
 	size_t pitch_;
+	size_t pitch2_; // in T units
 	int width_;
 	int height_;
 	T *ptr_;
@@ -73,6 +75,7 @@ TextureObject<T>::TextureObject(const cv::cuda::PtrStepSz<T> &d) {
 	cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
 	texobj_ = tex;
 	pitch_ = d.step;
+	pitch2_ = pitch_ / sizeof(T);
 	ptr_ = d.data;
 	width_ = d.cols;
 	height_ = d.rows;
@@ -103,6 +106,7 @@ TextureObject<T>::TextureObject(T *ptr, int pitch, int width, int height) {
 	cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
 	texobj_ = tex;
 	pitch_ = pitch;
+	pitch2_ = pitch_ / sizeof(T);
 	ptr_ = ptr;
 	width_ = width;
 	height_ = height;
@@ -133,6 +137,7 @@ TextureObject<T>::TextureObject(size_t width, size_t height) {
 	width_ = width;
 	height_ = height;
 	needsfree_ = true;
+	pitch2_ = pitch_ / sizeof(T);
 	//needsdestroy_ = true;
 }
 
@@ -143,6 +148,7 @@ TextureObject<T>::TextureObject(const TextureObject &p) {
 	width_ = p.width_;
 	height_ = p.height_;
 	pitch_ = p.pitch_;
+	pitch2_ = pitch_ / sizeof(T);
 	needsfree_ = p.needsfree_;
 	//needsdestroy_ = false;
 }
diff --git a/cv-node/src/algorithms/rtcensus.cu b/cv-node/src/algorithms/rtcensus.cu
index 0fc81808888101fbb51bcf2b022ca7f9c2c254b5..d0432668d1b81c750c3eb01a0ede62dea6dc8cf9 100644
--- a/cv-node/src/algorithms/rtcensus.cu
+++ b/cv-node/src/algorithms/rtcensus.cu
@@ -58,7 +58,7 @@ __global__ void disp_kernel(float *disp_l, float *disp_r,
 		size_t ds) {	
 	//extern __shared__ uint64_t cache[];
 
-	const int gamma = 20;
+	const int gamma = 35;
 	
 	int u = (blockIdx.x * BLOCK_W) + threadIdx.x + RADIUS2;
 	int v_start = (blockIdx.y * ROWSperTHREAD) + RADIUS2;
@@ -331,7 +331,8 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const
 	ftl::cuda::TextureObject<uint2> censusTexRight(r.cols, r.rows);
 	ftl::cuda::TextureObject<float> dispTexLeft(l.cols, l.rows);
 	ftl::cuda::TextureObject<float> dispTexRight(r.cols, r.rows);
-	ftl::cuda::TextureObject<float> dispTex(disp); //r.cols, r.rows);
+	ftl::cuda::TextureObject<float> dispTex(r.cols, r.rows);
+	ftl::cuda::TextureObject<float> output(disp);
 	
 	// Calculate the census for left and right
 	ftl::cuda::sparse_census(texLeft, texRight, censusTexLeft, censusTexRight);
@@ -353,7 +354,7 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const
 	// Check consistency between L and R disparities.
 	consistency(dispTexLeft, dispTexRight, dispTex);
 
-	
+	texture_filter(texLeft, dispTex, output, num_disp, 20.0);
 
 	/*grid.x = 4;
 	grid.y = l.rows;
@@ -378,6 +379,7 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const
 	dispTexLeft.free();
 	dispTexRight.free();
 	dispTex.free();
+	output.free();
 }
 
 };
diff --git a/cv-node/src/algorithms/tex_filter.cu b/cv-node/src/algorithms/tex_filter.cu
index b5934c6f3292b9ea897a1913cb92f5843f6f36d8..3962d1263fff7fe2af6e741f02ac26177f42f9dc 100644
--- a/cv-node/src/algorithms/tex_filter.cu
+++ b/cv-node/src/algorithms/tex_filter.cu
@@ -1,47 +1,48 @@
 #include <ftl/cuda_common.hpp>
 
-#define FILTER_WINDOW 11
+#define FILTER_WINDOW 11.0
 #define FILTER_WINDOW_R	5
 
 __global__ void texture_filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d,
-		ftl::cuda::TextureObject<float> f, int num_disp, int thresh) { // Thresh = -5000000
+		ftl::cuda::TextureObject<float> f, int num_disp, double thresh) { // Thresh = -5000000
 
-	float disp = tex2D<float>(d,u,v);
-	int neigh_sq = 0;
-	int neigh_sum = 0;
+	for (STRIDE_Y(v,f.height())) {
+	for (STRIDE_X(u,f.width())) {
+		float disp = tex2D<float>(d,u,v);
+		double neigh_sq = 0.0;
+		double neigh_sum = 0.0;
 
-	for (STRIDE_Y(v,h)) {
-	for (STRIDE_X(u,w)) {
 		for (int m=-FILTER_WINDOW_R; m<=FILTER_WINDOW_R; m++) {
 			for (int n=-FILTER_WINDOW_R; n<=FILTER_WINDOW_R; n++) {
 				uchar4 neigh = tex2D<uchar4>(t, u+n, v+m);
-				neigh_sq += neigh*neigh;
-				neigh_sum += neigh;
+				neigh_sq += (double)(neigh.z*neigh.z);
+				neigh_sum += (double)neigh.z;
 			}	
 		}
-	}
-	}
 
-	// Texture map filtering
-	int tm = (neigh_sq / (FILTER_WINDOW*FILTER_WINDOW)) -
-			((neigh_sum*neigh_sum) / (FILTER_WINDOW*FILTER_WINDOW));
+		// Texture map filtering
+		double tm = (neigh_sq / (FILTER_WINDOW*FILTER_WINDOW)) -
+				//((neigh_sum*neigh_sum) / (FILTER_WINDOW*FILTER_WINDOW));
+				((neigh_sum / (FILTER_WINDOW*FILTER_WINDOW)) * (neigh_sum / (FILTER_WINDOW*FILTER_WINDOW)));
 
-	if (tm < thesh) {
-		f(u,v) = disp;
-	} else {
-		f(u,v) = NAN;
+		if (tm >= thresh) {
+			f(u,v) = disp;
+		} else {
+			f(u,v) = NAN;
+		}
+	}
 	}
 }
 
 namespace ftl {
 namespace cuda {
 	void texture_filter(const TextureObject<uchar4> &t, const TextureObject<float> &d,
-			TextureObject<float> &f, int num_disp, int thresh) {
+			TextureObject<float> &f, int num_disp, double thresh) {
 		dim3 grid(1,1,1);
     	dim3 threads(128, 1, 1);
-    	grid.x = cv::cuda::device::divUp(disp.width(), 128);
-		grid.y = cv::cuda::device::divUp(disp.height(), 1);
-		texture_filter_kernel<<<grid, threads>>>
+    	grid.x = cv::cuda::device::divUp(d.width(), 128);
+		grid.y = cv::cuda::device::divUp(d.height(), 1);
+		texture_filter_kernel<<<grid, threads>>>(
 			t.cudaTexture(),
 			d.cudaTexture(),
 			f,