diff --git a/cv-node/src/algorithms/rtcensus.cu b/cv-node/src/algorithms/rtcensus.cu
index 2ec8078e97914e46acdd051d9c4fe9cca7dd6aba..d9a92df23435f3bd09750f44254b1284ebed24e8 100644
--- a/cv-node/src/algorithms/rtcensus.cu
+++ b/cv-node/src/algorithms/rtcensus.cu
@@ -29,7 +29,7 @@ namespace gpu {
 // --- SUPPORT -----------------------------------------------------------------
 
 template <typename T>
-cudaTextureObject_t makeTexture2D(const PtrStepSzb &d) {
+cudaTextureObject_t makeTexture2D(const PtrStepSz<T> &d) {
 	cudaResourceDesc resDesc;
 	memset(&resDesc, 0, sizeof(resDesc));
 	resDesc.resType = cudaResourceTypePitch2D;
@@ -141,7 +141,7 @@ __global__ void disp_kernel(float *disp_l, float *disp_r,
 		size_t ds) {	
 	//extern __shared__ uint64_t cache[];
 
-	const int gamma = 10;
+	const int gamma = 50;
 	
 	int u = (blockIdx.x * BLOCK_W) + threadIdx.x + RADIUS2;
 	int v_start = (blockIdx.y * ROWSperTHREAD) + RADIUS2;
@@ -283,37 +283,71 @@ __global__ void consistency_kernel(cudaTextureObject_t d_sub_l,
 }
 
 #define FILTER_WINDOW_R	7
-#define FILTER_SIM_THRESH 10
+#define FILTER_SIM_THRESH 20
+#define FILTER_DISP_THRESH 10.0f
 
-__global__ void filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d, PtrStepSz<float> f) {
+__global__ void filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d,
+		cudaTextureObject_t prevD,
+		cudaTextureObject_t prevT, PtrStepSz<float> f, int num_disp) {
 	size_t u = (blockIdx.x * BLOCK_W) + threadIdx.x + RADIUS;
-	size_t v = blockIdx.y;
+	size_t v = blockIdx.y + RADIUS;
+
+	if (u+num_disp > f.cols) {
+		f(v,u) = NAN;
+		return;
+	}
 
 	float disp = tex2D<float>(d,u,v);
-	if (!isnan(disp)) {
+	/*if (!isnan(disp)) {
 		f(v,u) = disp;
 		return;
-	}
+	}*/
+	//if (isnan(disp)) disp = 100000.0f; //tex2D<float>(prev, u, v);
+
+	cudaTextureObject_t nTex = (prevT) ? prevT : t;
+	cudaTextureObject_t nDisp = (prevD) ? prevD : d;
 
+	float pdisp = tex2D<float>(nDisp,u,v);
+	if (isnan(pdisp)) pdisp = disp;
+	if (isnan(disp)) disp = pdisp;
 	int pixel = tex2D<unsigned char>(t, u, v);
-	float est = 0.0f;
-	int nn = 0;
+	int ppixel = tex2D<unsigned char>(nTex, u, v);
+	float est = 0.0f; //(isnan(disp)) ? tex2D<float>(prev, u, v) : disp;
+	int nn = 0; //(isnan(disp)) ? 0 : 1;
+	int neigh_sq = 0;
+	int neigh_sum = 0;
 
 	for (int m=-FILTER_WINDOW_R; m<=FILTER_WINDOW_R; m++) {
 		for (int n=-FILTER_WINDOW_R; n<=FILTER_WINDOW_R; n++) {
 			int neigh = tex2D<unsigned char>(t, u+n, v+m);
+			neigh_sq += neigh*neigh;
+			neigh_sum += neigh;
+
 			float ndisp = tex2D<float>(d,u+n,v+m);
+			if (isnan(ndisp)) {
+				ndisp = tex2D<float>(nDisp,u+n,v+m);
+				neigh = tex2D<unsigned char>(nTex, u+n, v+m);
+			}
 
-			if (!isnan(ndisp) && (abs(neigh-pixel) <= FILTER_SIM_THRESH)) {
+			if (ndisp > 1.0f && !isnan(ndisp) && (abs(neigh-pixel) <= FILTER_SIM_THRESH)) { // && (isnan(disp) || abs(ndisp-disp) < FILTER_DISP_THRESH)) {
 				est += ndisp;
 				nn++;
 			}
 		}	
 	}
 
-	f(v,u) = (nn==0) ? NAN : est / nn;
+	// Texture map filtering
+	int tm = (neigh_sq / (15*15)) - ((neigh_sum*neigh_sum) / (15*15));
+	if (tm >= -9000000 && (abs(ppixel-pixel) > FILTER_SIM_THRESH || abs(pdisp - disp) <= FILTER_DISP_THRESH) ) {
+		f(v,u) = disp; // = (nn==0) ? NAN : est / nn;
+	} else {
+		f(v,u) = NAN;
+	}
 }
 
+cudaTextureObject_t prevDisp = 0;
+cudaTextureObject_t prevImage = 0;
+
 void rtcensus_call(const PtrStepSzb &l, const PtrStepSzb &r, const PtrStepSz<float> &disp, size_t num_disp, const int &stream) {
 	dim3 grid(1,1,1);
     dim3 threads(BLOCK_W, 1, 1);
@@ -378,13 +412,19 @@ void rtcensus_call(const PtrStepSzb &l, const PtrStepSzb &r, const PtrStepSz<flo
 
 	cudaTextureObject_t dispTex = makeTexture2D<float>(disp_raw, pitchD, r.cols, r.rows);
 
-	filter_kernel<<<grid, threads>>>(texLeft, dispTex, disp);
+	grid.x = cv::cuda::device::divUp(l.cols - 2 * RADIUS2, BLOCK_W);
+	filter_kernel<<<grid, threads>>>(texLeft, dispTex, prevDisp, prevImage, disp, num_disp);
 	cudaSafeCall( cudaGetLastError() );
+
+	if (prevDisp) cudaSafeCall( cudaDestroyTextureObject (prevDisp) );
+	prevDisp = makeTexture2D<float>(disp);
+	if (prevImage) cudaSafeCall( cudaDestroyTextureObject (prevImage) );
+	prevImage = texLeft;
 	
 	//if (&stream == Stream::Null())
 	cudaSafeCall( cudaDeviceSynchronize() );
 		
-	cudaSafeCall( cudaDestroyTextureObject (texLeft) );
+	//cudaSafeCall( cudaDestroyTextureObject (texLeft) );
 	cudaSafeCall( cudaDestroyTextureObject (texRight) );
 	cudaSafeCall( cudaDestroyTextureObject (censusTexLeft) );
 	cudaSafeCall( cudaDestroyTextureObject (censusTexRight) );