diff --git a/cv-node/CMakeLists.txt b/cv-node/CMakeLists.txt
index f0678311f69a4e4574933651723fb0da87db3194..2830734c87e28c52073d3ea40e2b8bb0a6237cc7 100644
--- a/cv-node/CMakeLists.txt
+++ b/cv-node/CMakeLists.txt
@@ -95,7 +95,9 @@ if (CUDA_FOUND)
 		"src/algorithms/rtcensus.cu"
 		"src/algorithms/consistency.cu"
 		"src/algorithms/sparse_census.cu"
-		"src/algorithms/tex_filter.cu")
+		"src/algorithms/tex_filter.cu"
+		"src/algorithms/nick1.cu"
+		"src/algorithms/nick.cpp")
 endif (CUDA_FOUND)
 
 add_executable(cv-node ${CVNODESRC})
diff --git a/cv-node/include/ftl/algorithms/nick.hpp b/cv-node/include/ftl/algorithms/nick.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..685882ba062f49b0b22f0f709ad8d1f9cb4ddb1e
--- /dev/null
+++ b/cv-node/include/ftl/algorithms/nick.hpp
@@ -0,0 +1,29 @@
+#ifndef _FTL_ALGORITHMS_NICK_HPP_
+#define _FTL_ALGORITHMS_NICK_HPP_
+
+#include <opencv2/core.hpp>
+#include <opencv2/opencv.hpp>
+#include <opencv2/cudastereo.hpp>
+#include <ftl/disparity.hpp>
+
+namespace ftl {
+namespace algorithms {
+class NickCuda : public ftl::Disparity {
+	public:
+	NickCuda(nlohmann::json &config);
+	
+	void compute(const cv::Mat &l, const cv::Mat &r, cv::Mat &disp);
+
+	static inline Disparity *create(nlohmann::json &config) { return new NickCuda(config); }
+	
+	private:
+	cv::cuda::GpuMat disp_;
+	//cv::cuda::GpuMat filtered_;
+	cv::cuda::GpuMat left_;
+	cv::cuda::GpuMat right_;
+};
+};
+};
+
+#endif // _FTL_ALGORITHMS_NICK_HPP_
+
diff --git a/cv-node/include/ftl/cuda_algorithms.hpp b/cv-node/include/ftl/cuda_algorithms.hpp
index 38d1129ef2644f32c361e8af1908cd7728a925cd..a7f3412390d9561cfceb05547105ae705991b90c 100644
--- a/cv-node/include/ftl/cuda_algorithms.hpp
+++ b/cv-node/include/ftl/cuda_algorithms.hpp
@@ -14,6 +14,9 @@ namespace cuda {
 
 	void texture_filter(const TextureObject<uchar4> &t, const TextureObject<float> &d,
 			TextureObject<float> &f, int num_disp, double thresh);
+			
+	void texture_map(const TextureObject<uchar4> &t,
+			TextureObject<float> &f);
 
 }
 }
diff --git a/cv-node/src/algorithms/consistency.cu b/cv-node/src/algorithms/consistency.cu
index 3924ace27891b58f1a896aab1ab150e0b356c84d..ca9a3df22af36ac3623ac9b9e316275e25946e7f 100644
--- a/cv-node/src/algorithms/consistency.cu
+++ b/cv-node/src/algorithms/consistency.cu
@@ -1,3 +1,7 @@
+/*
+ * Algorithms for checking the consistency of two disparity maps.
+ */
+ 
 #include <ftl/cuda_common.hpp>
 
 #define CONSISTENCY_THRESHOLD	1.0f
diff --git a/cv-node/src/algorithms/nick.cpp b/cv-node/src/algorithms/nick.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5699023e7683b1f8c57aeebd02d6c99a8f71559a
--- /dev/null
+++ b/cv-node/src/algorithms/nick.cpp
@@ -0,0 +1,41 @@
+#include <ftl/algorithms/nick.hpp>
+#include <vector_types.h>
+
+using ftl::algorithms::NickCuda;
+using namespace cv;
+using namespace cv::cuda;
+
+static ftl::Disparity::Register nickcuda("nick", NickCuda::create);
+
+NickCuda::NickCuda(nlohmann::json &config) : Disparity(config) {
+
+}
+
+namespace ftl { namespace gpu {
+void nick1_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const PtrStepSz<float> &disp, size_t num_disp);
+}}
+
+void NickCuda::compute(const cv::Mat &l, const cv::Mat &r, cv::Mat &disp) {
+	if (disp_.empty()) disp_ = cuda::GpuMat(l.size(), CV_32FC1);
+	if (left_.empty()) left_ = cuda::GpuMat(l.size(), CV_8UC4);
+	if (right_.empty()) right_ = cuda::GpuMat(l.size(), CV_8UC4);
+	
+	Mat lhsv, rhsv;
+	cv::cvtColor(l, lhsv,  COLOR_BGR2HSV);
+	cv::cvtColor(r, rhsv, COLOR_BGR2HSV);
+	int from_to[] = {0,0,1,1,2,2,-1,3};
+	Mat hsval(lhsv.size(), CV_8UC4);
+	Mat hsvar(rhsv.size(), CV_8UC4);
+	mixChannels(&lhsv, 1, &hsval, 1, from_to, 4);
+	mixChannels(&rhsv, 1, &hsvar, 1, from_to, 4);
+	
+	left_.upload(hsval);
+	right_.upload(hsvar);
+	
+	ftl::gpu::nick1_call(left_, right_, disp_, 200);
+	
+	disp_.download(disp);
+}
+
+
+
diff --git a/cv-node/src/algorithms/nick1.cu b/cv-node/src/algorithms/nick1.cu
new file mode 100644
index 0000000000000000000000000000000000000000..7c7702b7fad61bb37c15c678758f945c0907cc20
--- /dev/null
+++ b/cv-node/src/algorithms/nick1.cu
@@ -0,0 +1,264 @@
+/*
+ * Author: Nicolas Pope
+ * Based initially on rtcensus
+ *
+ */
+ 
+#include <ftl/cuda_common.hpp>
+#include <ftl/cuda_algorithms.hpp>
+
+using namespace cv::cuda;
+using namespace cv;
+
+
+#define BLOCK_W 60
+#define RADIUS 7
+#define RADIUS2 2
+#define ROWSperTHREAD 1
+
+template <typename T>
+__host__ __device__
+inline T lerp(T v0, T v1, T t) {
+    return fma(t, v1, fma(-t, v0, v0));
+}
+
+#define FILTER_WINDOW 21
+#define FILTER_WINDOW_R	10
+#define EDGE_SENSITIVITY 10.0f
+
+__device__ float calculate_edge_disp(cudaTextureObject_t t, cudaTextureObject_t d, cudaTextureObject_t pT, cudaTextureObject_t pD, uchar4 pixel, int u, int v) {
+	float est = 0.0;
+	int nn = 0;
+	//float pest = 0.0;
+	//int pnn = 0;
+	
+	//cudaTextureObject_t nTex = (pT) ? pT : t;
+	//cudaTextureObject_t nDisp = (pD) ? pD : d;
+
+	for (int m=-FILTER_WINDOW_R; m<=FILTER_WINDOW_R; m++) {
+		for (int n=-FILTER_WINDOW_R; n<=FILTER_WINDOW_R; n++) {
+			uchar4 neigh = tex2D<uchar4>(t, u+n, v+m);
+			float ndisp = tex2D<float>(d,u+n,v+m);
+			
+			//uchar4 pneigh = tex2D<uchar4>(nTex, u+n, v+m);
+			//float pndisp = tex2D<float>(nDisp,u+n,v+m);
+
+			//if (isnan(tex2D<float>(nDisp,u+n,v+m))) continue;
+			//if (m == 0 && n == 0) continue;
+
+			if (!isnan(ndisp) && (abs(neigh.z-pixel.z) <= EDGE_SENSITIVITY)) { // && (isnan(disp) || abs(ndisp-disp) < FILTER_DISP_THRESH)) {
+				est += ndisp;
+				nn++;
+			}
+			
+			//if (!isnan(pndisp) && (abs(pneigh.z-pixel.z) <= EDGE_SENSITIVITY)) { // && (isnan(disp) || abs(ndisp-disp) < FILTER_DISP_THRESH)) {
+			//	pest += pndisp;
+			//	pnn++;
+			//}
+		}	
+	}
+	
+	est = (nn > 0) ? est/nn : NAN;
+	//pest = (pnn > 0) ? pest/pnn : NAN;
+	
+	return est;
+}
+
+__device__ float colour_error(uchar4 v1, uchar4 v2) {
+	float dx = 0.05*abs(v1.x-v2.x);
+	float dy = 0.1*abs(v1.y-v2.y);
+	float dz = 0.85*abs(v1.z-v2.z);
+	return dx + dz + dy;
+}
+
+// TODO Use HUE also and perhaps increase window?
+// Or use more complex notion of texture?
+
+/* Just crossed and currently on edge */
+__device__ bool is_edge_left(uchar4 *line, int x, int n) {
+	if (x < 1 || x >= n-1) return false;
+	return (colour_error(line[x-1],line[x]) > EDGE_SENSITIVITY && colour_error(line[x],line[x+1]) <= EDGE_SENSITIVITY);
+}
+
+/* Just crossed but not on edge now */
+__device__ bool is_edge_right(uchar4 *line, int x, int n) {
+	if (x < 1 || x >= n-1) return false;
+	return (colour_error(line[x-1],line[x]) <= EDGE_SENSITIVITY && colour_error(line[x],line[x+1]) > EDGE_SENSITIVITY);
+}
+
+__global__ void filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d,
+		cudaTextureObject_t prevD,
+		cudaTextureObject_t prevT, PtrStepSz<float> f, int num_disp) {
+
+
+	extern __shared__ uchar4 line[]; // One entire line of hsv image
+	
+	for (STRIDE_Y(v,f.rows)) {
+		for (STRIDE_X(u,f.cols)) {
+			line[u] = tex2D<uchar4>(t, u, v);
+		}
+		__syncthreads();
+		
+		for (STRIDE_X(u,f.cols)) {
+			if (is_edge_right(line, u, f.cols)) {
+				float edge_disp = calculate_edge_disp(t,d,prevT,prevD,line[u],u+2,v); // tex2D<float>(d, u, v);
+				f(v,u) = edge_disp;
+				continue;
+				
+				float est = 0.0f;
+				int nn = 0;
+				
+				if (!isnan(edge_disp)) {
+					est += edge_disp;
+					nn++;
+				}
+				//f(v,u) = edge_disp;
+				
+				// TODO, find other edge first to get disparity
+				// Use middle disparities to:
+				//		estimate curve or linear (choose equation)
+				//		or ignore as noise if impossible
+				
+				// TODO For edge disparity, use a window to:
+				//		a) find a missing disparity
+				//		b) make sure disparity has some consensus (above or below mostly)
+				
+				// TODO Use past values?
+				// Another way to fill blanks and gain concensus
+				
+				// TODO Maintain a disparity stack to pop back to background?
+				// Issue of background disparity not being detected.
+				// Only if hsv also matches previous background
+				
+				// TODO Edge prediction (in vertical direction at least) could
+				// help fill both edge and disparity gaps. Propagate disparity
+				// along edges
+				
+				float last_disp = edge_disp;
+				
+				int i;
+				for (i=1; u+i<f.cols; i++) {
+					if (is_edge_right(line, u+i, f.cols)) {
+						//float end_disp = calculate_edge_disp(t,d,prevT,prevD,line[u+i-1],u+i-3,v);
+						//if (!isnan(end_disp)) last_disp = end_disp;
+						break;
+					}
+					
+					float di = tex2D<float>(d,u+i,v);
+					if (!isnan(di)) {
+						est += di;
+						nn++;
+					}
+					//f(v,u+i) = edge_disp;
+				}
+				
+				est = (nn > 0) ? est / nn : NAN;
+				//for (int j=1; j<i; j++) {
+				//	f(v,u+j) = est; //lerp(edge_disp, last_disp, (float)j / (float)(i-1));
+				//}
+			} else f(v,u) = NAN;
+		}
+	}
+}
+
+__global__ void edge_invar1_kernel(cudaTextureObject_t t, ftl::cuda::TextureObject<float2> o) {
+	for (STRIDE_Y(v,o.height())) {
+		for (STRIDE_X(u,o.width())) {
+			float gx = ((tex2D<uchar4>(t, u-1, v-1).z - tex2D<uchar4>(t, u+1, v-1).z) +
+						(tex2D<uchar4>(t, u-1, v).z - tex2D<uchar4>(t, u+1, v).z) +
+						(tex2D<uchar4>(t, u-1, v+1).z - tex2D<uchar4>(t, u+1, v+1).z)) / 3;
+			float gy = ((tex2D<uchar4>(t, u-1, v-1).z - tex2D<uchar4>(t, u-1, v+1).z) +
+						(tex2D<uchar4>(t, u, v-1).z - tex2D<uchar4>(t, u, v+1).z) +
+						(tex2D<uchar4>(t, u+1, v-1).z - tex2D<uchar4>(t, u+1, v+1).z)) / 3;
+						
+			float g = sqrt(gx*gx+gy*gy);
+			float a = atan2(gy,gx);
+			
+			// TODO adapt threshold using histeresis
+			o(u,v) = (g > 10.0f) ? make_float2(g,a) : make_float2(NAN,NAN);
+		}	
+	}
+}
+
+__global__ void edge_invar2_kernel(cudaTextureObject_t i1, ftl::cuda::TextureObject<float> o) {
+	for (STRIDE_Y(v,o.height())) {
+		for (STRIDE_X(u,o.width())) {
+			float2 pixel_i1 = tex2D<float2>(i1,u,v);
+			
+			if (isnan(pixel_i1.x)) {
+				o(u,v) = NAN;
+				continue;
+			}
+			
+			int dx = ((pixel_i1.y >= 0.785 && pixel_i1.y <= 2.356) ) ? 0 : 1;
+			int dy = (dx == 1) ? 0 : 1;
+			
+			float2 next_pix;
+			next_pix.x = NAN;
+			next_pix.y = NAN;
+			float diff = 10000.0f;
+			
+			for (int i=-10; i<=10; i++) {
+				float2 pix = tex2D<float2>(i1,u+dx*i+dy*2, v+dy*i+dx*2);
+				if (isnan(pix.x)) continue;
+				
+				float d = abs(pix.x-pixel_i1.x)*abs(pix.y-pixel_i1.y);
+				if (d < diff) {
+					next_pix = pix;
+					diff = d;
+				}
+			}
+			
+			if (!isnan(next_pix.x) && diff < 2.0f) {
+				o(u,v) = abs(pixel_i1.y - next_pix.y)*81.0f;
+			} else {
+				o(u,v) = NAN;
+			}
+			
+			// Search in two directions for next edge pixel
+			// Calculate curvature by monitoring gradient angle change
+			// Calculate length by stopping when change exceeds threshold
+		}	
+	}
+}
+
+ftl::cuda::TextureObject<float> prevDisp;
+ftl::cuda::TextureObject<uchar4> prevImage;
+
+namespace ftl {
+namespace gpu {
+
+void nick1_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const PtrStepSz<float> &disp, size_t num_disp) {
+	// Make all the required texture steps
+	// TODO Could reduce re-allocations by caching these
+	ftl::cuda::TextureObject<uchar4> texLeft(l);
+	ftl::cuda::TextureObject<uchar4> texRight(r);
+	ftl::cuda::TextureObject<float2> inv1(l.cols, l.rows);
+	ftl::cuda::TextureObject<float> output(disp);
+	
+	dim3 grid(1,1,1);
+    dim3 threads(BLOCK_W, 1, 1);
+	grid.x = cv::cuda::device::divUp(l.cols - 2 * RADIUS2, BLOCK_W);
+	grid.y = cv::cuda::device::divUp(l.rows - 2 * RADIUS2, ROWSperTHREAD);
+	
+	edge_invar1_kernel<<<grid,threads>>>(texLeft.cudaTexture(), inv1);
+	cudaSafeCall( cudaGetLastError() );
+	
+	edge_invar2_kernel<<<grid,threads>>>(inv1.cudaTexture(), output);
+	cudaSafeCall( cudaGetLastError() );
+	
+	//prevImage.free();
+	//prevImage = texLeft;
+	
+	//if (&stream == Stream::Null())
+	cudaSafeCall( cudaDeviceSynchronize() );
+	
+	texLeft.free();
+	texRight.free();
+	inv1.free();
+	output.free();
+}
+
+}
+}
+
diff --git a/cv-node/src/algorithms/rtcensus.cpp b/cv-node/src/algorithms/rtcensus.cpp
index ee7650cfa45bd2ada7092651d883cc0221f908c8..2c32225523394e128f2da92ae953a1f19013732a 100644
--- a/cv-node/src/algorithms/rtcensus.cpp
+++ b/cv-node/src/algorithms/rtcensus.cpp
@@ -6,6 +6,8 @@
  *     for embedded real-time systems
  * [2] Humenberger, Zinner, Kubinger: Performance Evaluation of Census-Based
  *     Stereo Matching Algorithm on Embedded and Multi-Core Hardware
+ * [3] Humenberger, Engelke, Kubinger: A Census-Based Stereo Vision Algorithm Using Modified Semi-Global Matching
+ *     and Plane Fitting to Improve Matching Quality.
  *
  * Equation numbering uses [1] unless otherwise stated
  */
diff --git a/cv-node/src/algorithms/rtcensus.cu b/cv-node/src/algorithms/rtcensus.cu
index d0432668d1b81c750c3eb01a0ede62dea6dc8cf9..d2aa073eb8db0730e1e108b381f8fe6b322571ff 100644
--- a/cv-node/src/algorithms/rtcensus.cu
+++ b/cv-node/src/algorithms/rtcensus.cu
@@ -6,6 +6,8 @@
  *     for embedded real-time systems
  * [2] Humenberger, Zinner, Kubinger: Performance Evaluation of Census-Based
  *     Stereo Matching Algorithm on Embedded and Multi-Core Hardware
+ * [3] Humenberger, Engelke, Kubinger: A Census-Based Stereo Vision Algorithm Using Modified Semi-Global Matching
+ *     and Plane Fitting to Improve Matching Quality.
  *
  * Equation numbering uses [1] unless otherwise stated
  *
@@ -49,7 +51,7 @@ __forceinline__ __device__ uint64_t uint2asull (uint2 a) {
 }
 
 /*
- * Generate left and right disparity images from census data. (19)
+ * Generate left and right disparity images from census data. (18)(19)(25)
  */
 __global__ void disp_kernel(float *disp_l, float *disp_r,
 		int pitchL, int pitchR,
@@ -68,25 +70,11 @@ __global__ void disp_kernel(float *disp_l, float *disp_r,
 	// Local cache
 	uint64_t l_cache_l1[5][5];
 	uint64_t l_cache_l2[5][5];
-
-	// Prepare the cache load
-	//const int cache_thread_width = (BLOCK_W+ds / BLOCK_W + RADIUS2*2 + 1)*2;
-	//uint64_t *cache_ptr = cache + (threadIdx.x * cache_thread_width);
 	
 	if (v_end >= height) v_end = height;
 	if (u+maxdisp >= width) maxdisp = width-u;
 	
 	for (int v=v_start; v<v_end; v++) {
-		/*const int cache_start = v*width*2 + cache_thread_width*blockIdx.x;
-		for (int i=0; i<cache_thread_width; i+=2) {
-			cache_ptr[i] = census[cache_start+i];
-			cache_ptr[i+1] = census[cache_start+i+1];
-		}
-		
-		__syncthreads();*/
-		
-		// Fill local cache for window 5x5
-		// TODO Use shared memory?
 		for (int m=-2; m<=2; m++) {
 			for (int n=-2; n<=2; n++) {
 				l_cache_l2[m+2][n+2] = uint2asull(tex2D<uint2>(censusL,u+n,v+m));
@@ -107,7 +95,7 @@ __global__ void disp_kernel(float *disp_l, float *disp_r,
 		int dix1 = 0;
 		int dix2 = 0;
 		
-		// TODO Use prediction textures to narrow range
+		// (19)
 		for (int d=0; d<maxdisp; d++) {
 			uint16_t hamming1 = 0;
 			uint16_t hamming2 = 0;
@@ -119,18 +107,17 @@ __global__ void disp_kernel(float *disp_l, float *disp_r,
 				for (int n=-2; n<=2; n++) {
 					const auto u_ = u + n;
 					
+					// (18)
 					auto l1 = l_cache_l1[m+2][n+2];
 					auto l2 = l_cache_l2[m+2][n+2];
-					
-					// TODO Somehow might use shared memory
 					auto r1 = uint2asull(tex2D<uint2>(censusL, u_+d, v_));
 					auto r2 = uint2asull(tex2D<uint2>(censusR, u_-d, v_));
-					
 					hamming1 += __popcll(r1^l1);
 					hamming2 += __popcll(r2^l2);
 				}
 			}
 			
+			// Find the two minimum costs
 			if (hamming1 < min_disp1) {
 				min_before1 = last_ham1;
 				min_disp1 = hamming1;
@@ -157,171 +144,16 @@ __global__ void disp_kernel(float *disp_l, float *disp_r,
 		//float d2 = (dix2 == 0 || dix2 == ds-1) ? (float)dix2 : fit_parabola(dix2, min_disp2, min_before2, min_after2);
 	
 		// TODO Allow for discontinuities with threshold
+		// Subpixel disparity (20)
 		float d1 = fit_parabola(dix1, min_disp1, min_before1, min_after1);
 		float d2 = fit_parabola(dix2, min_disp2, min_before2, min_after2);
 	
-		// Confidence filter (25)
-		// TODO choice of gamma to depend on disparity variance
-		// Variance with next option, variance with neighbours, variance with past value
+		// Confidence filter based on (25)
 		disp_l[v*pitchL+u] = ((min_disp2b - min_disp2) >= gamma) ? d2 : NAN;
 		disp_r[v*pitchR+u] = ((min_disp1b - min_disp1) >= gamma) ? d1 : NAN;
-
-		// TODO If disparity is 0.0f, perhaps
-		// Use previous value unless it conflicts with present
-		// Use neighbour values if texture matches 
-	}
-}
-
-
-
-template <typename T>
-__host__ __device__
-inline T lerp(T v0, T v1, T t) {
-    return fma(t, v1, fma(-t, v0, v0));
-}
-
-#define FILTER_WINDOW 21
-#define FILTER_WINDOW_R	10
-#define EDGE_SENSITIVITY 10.0f
-
-__device__ float calculate_edge_disp(cudaTextureObject_t t, cudaTextureObject_t d, cudaTextureObject_t pT, cudaTextureObject_t pD, uchar4 pixel, int u, int v) {
-	float est = 0.0;
-	int nn = 0;
-	//float pest = 0.0;
-	//int pnn = 0;
-	
-	//cudaTextureObject_t nTex = (pT) ? pT : t;
-	//cudaTextureObject_t nDisp = (pD) ? pD : d;
-
-	for (int m=-FILTER_WINDOW_R; m<=FILTER_WINDOW_R; m++) {
-		for (int n=-FILTER_WINDOW_R; n<=FILTER_WINDOW_R; n++) {
-			uchar4 neigh = tex2D<uchar4>(t, u+n, v+m);
-			float ndisp = tex2D<float>(d,u+n,v+m);
-			
-			//uchar4 pneigh = tex2D<uchar4>(nTex, u+n, v+m);
-			//float pndisp = tex2D<float>(nDisp,u+n,v+m);
-
-			//if (isnan(tex2D<float>(nDisp,u+n,v+m))) continue;
-			//if (m == 0 && n == 0) continue;
-
-			if (!isnan(ndisp) && (abs(neigh.z-pixel.z) <= EDGE_SENSITIVITY)) { // && (isnan(disp) || abs(ndisp-disp) < FILTER_DISP_THRESH)) {
-				est += ndisp;
-				nn++;
-			}
-			
-			//if (!isnan(pndisp) && (abs(pneigh.z-pixel.z) <= EDGE_SENSITIVITY)) { // && (isnan(disp) || abs(ndisp-disp) < FILTER_DISP_THRESH)) {
-			//	pest += pndisp;
-			//	pnn++;
-			//}
-		}	
 	}
-	
-	est = (nn > 0) ? est/nn : NAN;
-	//pest = (pnn > 0) ? pest/pnn : NAN;
-	
-	return est;
 }
 
-__device__ float colour_error(uchar4 v1, uchar4 v2) {
-	float dx = 0.05*abs(v1.x-v2.x);
-	float dy = 0.1*abs(v1.y-v2.y);
-	float dz = 0.85*abs(v1.z-v2.z);
-	return dx + dz + dy;
-}
-
-// TODO Use HUE also and perhaps increase window?
-// Or use more complex notion of texture?
-
-/* Just crossed and currently on edge */
-__device__ bool is_edge_left(uchar4 *line, int x, int n) {
-	if (x < 1 || x >= n-1) return false;
-	return (colour_error(line[x-1],line[x]) > EDGE_SENSITIVITY && colour_error(line[x],line[x+1]) <= EDGE_SENSITIVITY);
-}
-
-/* Just crossed but not on edge now */
-__device__ bool is_edge_right(uchar4 *line, int x, int n) {
-	if (x < 1 || x >= n-1) return false;
-	return (colour_error(line[x-1],line[x]) <= EDGE_SENSITIVITY && colour_error(line[x],line[x+1]) > EDGE_SENSITIVITY);
-}
-
-__global__ void filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d,
-		cudaTextureObject_t prevD,
-		cudaTextureObject_t prevT, PtrStepSz<float> f, int num_disp) {
-
-
-	extern __shared__ uchar4 line[]; // One entire line of hsv image
-	
-	for (STRIDE_Y(v,f.rows)) {
-		for (STRIDE_X(u,f.cols)) {
-			line[u] = tex2D<uchar4>(t, u, v);
-		}
-		__syncthreads();
-		
-		for (STRIDE_X(u,f.cols)) {
-			if (is_edge_right(line, u, f.cols)) {
-				float edge_disp = calculate_edge_disp(t,d,prevT,prevD,line[u],u+2,v); // tex2D<float>(d, u, v);
-				f(v,u) = edge_disp;
-				continue;
-				
-				float est = 0.0f;
-				int nn = 0;
-				
-				if (!isnan(edge_disp)) {
-					est += edge_disp;
-					nn++;
-				}
-				//f(v,u) = edge_disp;
-				
-				// TODO, find other edge first to get disparity
-				// Use middle disparities to:
-				//		estimate curve or linear (choose equation)
-				//		or ignore as noise if impossible
-				
-				// TODO For edge disparity, use a window to:
-				//		a) find a missing disparity
-				//		b) make sure disparity has some consensus (above or below mostly)
-				
-				// TODO Use past values?
-				// Another way to fill blanks and gain concensus
-				
-				// TODO Maintain a disparity stack to pop back to background?
-				// Issue of background disparity not being detected.
-				// Only if hsv also matches previous background
-				
-				// TODO Edge prediction (in vertical direction at least) could
-				// help fill both edge and disparity gaps. Propagate disparity
-				// along edges
-				
-				float last_disp = edge_disp;
-				
-				int i;
-				for (i=1; u+i<f.cols; i++) {
-					if (is_edge_right(line, u+i, f.cols)) {
-						//float end_disp = calculate_edge_disp(t,d,prevT,prevD,line[u+i-1],u+i-3,v);
-						//if (!isnan(end_disp)) last_disp = end_disp;
-						break;
-					}
-					
-					float di = tex2D<float>(d,u+i,v);
-					if (!isnan(di)) {
-						est += di;
-						nn++;
-					}
-					//f(v,u+i) = edge_disp;
-				}
-				
-				est = (nn > 0) ? est / nn : NAN;
-				//for (int j=1; j<i; j++) {
-				//	f(v,u+j) = est; //lerp(edge_disp, last_disp, (float)j / (float)(i-1));
-				//}
-			} else f(v,u) = NAN;
-		}
-	}
-}
-
-ftl::cuda::TextureObject<float> prevDisp;
-ftl::cuda::TextureObject<uchar4> prevImage;
-
 void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const PtrStepSz<float> &disp, size_t num_disp, const int &stream) {
 	// Make all the required texture steps
 	// TODO Could reduce re-allocations by caching these
@@ -334,7 +166,7 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const
 	ftl::cuda::TextureObject<float> dispTex(r.cols, r.rows);
 	ftl::cuda::TextureObject<float> output(disp);
 	
-	// Calculate the census for left and right
+	// Calculate the census for left and right (14)(15)(16)
 	ftl::cuda::sparse_census(texLeft, texRight, censusTexLeft, censusTexRight);
 
 	dim3 grid(1,1,1);
@@ -342,7 +174,7 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const
 	grid.x = cv::cuda::device::divUp(l.cols - 2 * RADIUS2, BLOCK_W);
 	grid.y = cv::cuda::device::divUp(l.rows - 2 * RADIUS2, ROWSperTHREAD);
 	
-	// Calculate L and R disparities
+	// Calculate L and R disparities (18)(19)(20)(21)(22)(25)
 	disp_kernel<<<grid, threads>>>(
 		dispTexLeft.devicePtr(), dispTexRight.devicePtr(),
 		dispTexLeft.pitch()/sizeof(float), dispTexRight.pitch()/sizeof(float),
@@ -351,28 +183,15 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const
 		num_disp);
 	cudaSafeCall( cudaGetLastError() );
 	
-	// Check consistency between L and R disparities.
+	// Check consistency between L and R disparities. (23)(24)
 	consistency(dispTexLeft, dispTexRight, dispTex);
 
-	texture_filter(texLeft, dispTex, output, num_disp, 20.0);
-
-	/*grid.x = 4;
-	grid.y = l.rows;
-	threads.x = l.cols;
-	size_t filter_smem = sizeof(uchar4) * l.cols;
-	filter_kernel<<<grid, threads, filter_smem>>>(texLeft.cudaTexture(), dispTex.cudaTexture(), prevDisp.cudaTexture(), prevImage.cudaTexture(), disp, num_disp);
-	cudaSafeCall( cudaGetLastError() );*/
+	// TM in (7) of paper [3]. Eq (26) in [1] is wrong.
+	texture_filter(texLeft, dispTex, output, num_disp, 10.0);
 
-	//prevDisp.free();
-	//prevDisp = disp;
-	prevImage.free();
-	prevImage = texLeft;
-	
-	//if (&stream == Stream::Null())
 	cudaSafeCall( cudaDeviceSynchronize() );
-		
-	//cudaSafeCall( cudaDestroyTextureObject (texLeft) );
 	
+	texLeft.free();
 	texRight.free();
 	censusTexLeft.free();
 	censusTexRight.free();
diff --git a/cv-node/src/algorithms/tex_filter.cu b/cv-node/src/algorithms/tex_filter.cu
index 3962d1263fff7fe2af6e741f02ac26177f42f9dc..12c9157267b9f433752462c70f8a6b7f071c628c 100644
--- a/cv-node/src/algorithms/tex_filter.cu
+++ b/cv-node/src/algorithms/tex_filter.cu
@@ -3,6 +3,8 @@
 #define FILTER_WINDOW 11.0
 #define FILTER_WINDOW_R	5
 
+// --- Filter by texture complexity --------------------------------------------
+
 __global__ void texture_filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d,
 		ftl::cuda::TextureObject<float> f, int num_disp, double thresh) { // Thresh = -5000000
 
@@ -22,14 +24,15 @@ __global__ void texture_filter_kernel(cudaTextureObject_t t, cudaTextureObject_t
 
 		// Texture map filtering
 		double tm = (neigh_sq / (FILTER_WINDOW*FILTER_WINDOW)) -
-				//((neigh_sum*neigh_sum) / (FILTER_WINDOW*FILTER_WINDOW));
 				((neigh_sum / (FILTER_WINDOW*FILTER_WINDOW)) * (neigh_sum / (FILTER_WINDOW*FILTER_WINDOW)));
 
 		if (tm >= thresh) {
 			f(u,v) = disp;
 		} else {
-			f(u,v) = NAN;
+			f(u,v) = NAN; //(tm <= 200.0) ? 0 : 200.0f;;
 		}
+		
+		//f(u,v) = tm;
 	}
 	}
 }
@@ -53,3 +56,46 @@ namespace cuda {
 }
 }
 
+// --- Generate a texture map --------------------------------------------------
+
+__global__ void texture_map_kernel(cudaTextureObject_t t,
+		ftl::cuda::TextureObject<float> f) {
+
+	for (STRIDE_Y(v,f.height())) {
+	for (STRIDE_X(u,f.width())) {
+		double neigh_sq = 0.0;
+		double neigh_sum = 0.0;
+
+		for (int m=-FILTER_WINDOW_R; m<=FILTER_WINDOW_R; m++) {
+			for (int n=-FILTER_WINDOW_R; n<=FILTER_WINDOW_R; n++) {
+				uchar4 neigh = tex2D<uchar4>(t, u+n, v+m);
+				neigh_sq += (double)(neigh.z*neigh.z);
+				neigh_sum += (double)neigh.z;
+			}	
+		}
+
+		// Texture map filtering
+		double tm = (neigh_sq / (FILTER_WINDOW*FILTER_WINDOW)) -
+				((neigh_sum / (FILTER_WINDOW*FILTER_WINDOW)) * (neigh_sum / (FILTER_WINDOW*FILTER_WINDOW)));
+		
+		f(u,v) = tm;
+	}
+	}
+}
+
+namespace ftl {
+namespace cuda {
+	void texture_map(const TextureObject<uchar4> &t,
+			TextureObject<float> &f) {
+		dim3 grid(1,1,1);
+    	dim3 threads(128, 1, 1);
+    	grid.x = cv::cuda::device::divUp(f.width(), 128);
+		grid.y = cv::cuda::device::divUp(f.height(), 1);
+		texture_map_kernel<<<grid, threads>>>(
+			t.cudaTexture(),
+			f);
+		cudaSafeCall( cudaGetLastError() );
+	}
+}
+}
+