diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt
index a2d3b43e33e3798532f494d576c694e8a409a99b..dce81e19c91a41f7e8bba90a2149a6602d73e5aa 100644
--- a/applications/reconstruct/CMakeLists.txt
+++ b/applications/reconstruct/CMakeLists.txt
@@ -17,6 +17,9 @@ set(REPSRC
 	#src/depth_camera.cpp
 	src/ilw/ilw.cpp
 	src/ilw/ilw.cu
+	src/ilw/fill.cu
+	src/ilw/discontinuity.cu
+	src/ilw/correspondence.cu
 )
 
 add_executable(ftl-reconstruct ${REPSRC})
diff --git a/applications/reconstruct/src/ilw/correspondence.cu b/applications/reconstruct/src/ilw/correspondence.cu
new file mode 100644
index 0000000000000000000000000000000000000000..8fb9d94b75df5a5d2687fa81da01d999f2bcfeeb
--- /dev/null
+++ b/applications/reconstruct/src/ilw/correspondence.cu
@@ -0,0 +1,198 @@
+#include "ilw_cuda.hpp"
+#include <ftl/cuda/weighting.hpp>
+
+using ftl::cuda::TextureObject;
+using ftl::rgbd::Camera;
+using ftl::cuda::Mask;
+
+#define T_PER_BLOCK 8
+
+template<int FUNCTION>
+__device__ float weightFunction(const ftl::cuda::ILWParams &params, float dweight, float cweight);
+
+template <>
+__device__ inline float weightFunction<0>(const ftl::cuda::ILWParams &params, float dweight, float cweight) {
+	return (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight);
+}
+
+template <>
+__device__ inline float weightFunction<1>(const ftl::cuda::ILWParams &param, float dweight, float cweight) {
+	return (cweight * cweight * dweight);
+}
+
+template <>
+__device__ inline float weightFunction<2>(const ftl::cuda::ILWParams &param, float dweight, float cweight) {
+	return (dweight * dweight * cweight);
+}
+
+template <>
+__device__ inline float weightFunction<3>(const ftl::cuda::ILWParams &params, float dweight, float cweight) {
+	return (dweight == 0.0f) ? 0.0f : (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight);
+}
+
+template <>
+__device__ inline float weightFunction<4>(const ftl::cuda::ILWParams &params, float dweight, float cweight) {
+	return cweight;
+}
+
+template<int COR_STEPS, int FUNCTION> 
+__global__ void correspondence_energy_vector_kernel(
+        TextureObject<float> d1,
+        TextureObject<float> d2,
+        TextureObject<uchar4> c1,
+        TextureObject<uchar4> c2,
+        TextureObject<float> dout,
+		TextureObject<float> conf,
+		TextureObject<int> mask,
+        float4x4 pose1,
+        float4x4 pose1_inv,
+        float4x4 pose2,  // Inverse
+        Camera cam1,
+        Camera cam2, ftl::cuda::ILWParams params) {
+
+    // Each warp picks point in p1
+    //const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x); // / WARP_SIZE;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+    
+    //const float3 world1 = make_float3(p1.tex2D(x, y));
+    const float depth1 = d1.tex2D(x,y); //(pose1_inv * world1).z;  // Initial starting depth
+	if (depth1 < cam1.minDepth || depth1 > cam1.maxDepth) return;
+
+	// TODO: Temporary hack to ensure depth1 is present
+	//const float4 temp = vout.tex2D(x,y);
+	//vout(x,y) =  make_float4(depth1, 0.0f, temp.z, temp.w);
+	
+	const float3 world1 = pose1 * cam1.screenToCam(x,y,depth1);
+
+    const uchar4 colour1 = c1.tex2D(x, y);
+
+	float bestdepth = 0.0f;
+	float bestweight = 0.0f;
+	float bestcolour = 0.0f;
+	float bestdweight = 0.0f;
+	float totalcolour = 0.0f;
+	int count = 0;
+	float contrib = 0.0f;
+    
+	const float step_interval = params.range / (COR_STEPS / 2);
+	
+	const float3 rayStep_world = pose1.getFloat3x3() * cam1.screenToCam(x,y,step_interval);
+	const float3 rayStart_2 = pose2 * world1;
+	const float3 rayStep_2 = pose2.getFloat3x3() * rayStep_world;
+
+    // Project to p2 using cam2
+    // Each thread takes a possible correspondence and calculates a weighting
+    //const int lane = tid % WARP_SIZE;
+	for (int i=0; i<COR_STEPS; ++i) {
+		const int j = i - (COR_STEPS/2);
+		const float depth_adjust = (float)j * step_interval + depth1;
+
+        // Calculate adjusted depth 3D point in camera 2 space
+        const float3 worldPos = world1 + j * rayStep_world; //(pose1 * cam1.screenToCam(x, y, depth_adjust));
+        const float3 camPos = rayStart_2 + j * rayStep_2; //pose2 * worldPos;
+        const uint2 screen = cam2.camToScreen<uint2>(camPos);
+
+        if (screen.x >= cam2.width || screen.y >= cam2.height) continue;
+
+		// Generate a depth correspondence value
+		const float depth2 = d2.tex2D((int)screen.x, (int)screen.y);
+		const float dweight = ftl::cuda::weighting(fabs(depth2 - camPos.z), params.spatial_smooth);
+		//const float dweight = ftl::cuda::weighting(fabs(depth_adjust - depth1), 2.0f*params.range);
+		
+		// Generate a colour correspondence value
+		const uchar4 colour2 = c2.tex2D((int)screen.x, (int)screen.y);
+		const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth);
+
+		const float weight = weightFunction<FUNCTION>(params, dweight, cweight);
+
+		++count;
+		contrib += weight;
+		bestcolour = max(cweight, bestcolour);
+		bestdweight = max(dweight, bestdweight);
+		totalcolour += cweight;
+		if (weight > bestweight) {
+			bestweight = weight;
+			bestdepth = depth_adjust;
+		}
+    }
+
+	const float avgcolour = totalcolour/(float)count;
+	const float confidence = bestcolour / totalcolour; //bestcolour - avgcolour;
+	
+	Mask m(mask.tex2D(x,y));
+
+    //if (bestweight > 0.0f) {
+        float old = conf.tex2D(x,y);
+
+        if (bestweight * confidence > old) {
+			dout(x,y) = bestdepth;
+			conf(x,y) = bestweight * confidence;
+		}
+	//}
+	
+	// If a good enough match is found, mark dodgy depth as solid
+	if ((m.isFilled() || m.isDiscontinuity()) && (bestweight > params.match_threshold)) mask(x,y) = 0;
+}
+
+void ftl::cuda::correspondence(
+        TextureObject<float> &d1,
+        TextureObject<float> &d2,
+        TextureObject<uchar4> &c1,
+        TextureObject<uchar4> &c2,
+        TextureObject<float> &dout,
+		TextureObject<float> &conf,
+		TextureObject<int> &mask,
+        float4x4 &pose1,
+        float4x4 &pose1_inv,
+        float4x4 &pose2,
+        const Camera &cam1,
+        const Camera &cam2, const ILWParams &params, int func,
+        cudaStream_t stream) {
+
+	const dim3 gridSize((d1.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (d1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    //printf("COR SIZE %d,%d\n", p1.width(), p1.height());
+
+	switch (func) {
+    case 0: correspondence_energy_vector_kernel<16,0><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
+	case 1: correspondence_energy_vector_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
+	case 2: correspondence_energy_vector_kernel<16,2><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
+	case 3: correspondence_energy_vector_kernel<16,3><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
+	case 4: correspondence_energy_vector_kernel<16,4><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
+	}
+
+    cudaSafeCall( cudaGetLastError() );
+}
+
+//==============================================================================
+
+__global__ void mask_filter_kernel(
+		TextureObject<float> depth,
+		TextureObject<int> mask) {
+	
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x);
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x >= 0 && x < depth.width() && y >=0 && y < depth.height()) {
+		Mask m(mask.tex2D(x,y));
+		if (m.isFilled()) {
+			depth(x,y) = MINF;
+		}
+	}
+}
+
+
+void ftl::cuda::mask_filter(
+		TextureObject<float> &depth,
+		TextureObject<int> &mask,
+		cudaStream_t stream) {
+	const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	mask_filter_kernel<<<gridSize, blockSize, 0, stream>>>(
+		depth, mask
+	);
+	cudaSafeCall( cudaGetLastError() );
+}
diff --git a/applications/reconstruct/src/ilw/discontinuity.cu b/applications/reconstruct/src/ilw/discontinuity.cu
new file mode 100644
index 0000000000000000000000000000000000000000..fe78d47158e81ce02be82953601151f5bf31703a
--- /dev/null
+++ b/applications/reconstruct/src/ilw/discontinuity.cu
@@ -0,0 +1,52 @@
+#include "ilw_cuda.hpp"
+
+#define T_PER_BLOCK 8
+
+using ftl::cuda::Mask;
+
+template <int RADIUS>
+__global__ void discontinuity_kernel(ftl::cuda::TextureObject<int> mask_out, ftl::cuda::TextureObject<float> depth, ftl::rgbd::Camera params) {
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < params.width && y < params.height) {
+		Mask mask(0);
+
+		const float d = depth.tex2D((int)x, (int)y);
+
+		// Calculate depth between 0.0 and 1.0
+		//float p = (d - params.minDepth) / (params.maxDepth - params.minDepth);
+
+		if (d >= params.minDepth && d <= params.maxDepth) {
+			/* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. */
+			// Is there a discontinuity nearby?
+			for (int u=-RADIUS; u<=RADIUS; ++u) {
+				for (int v=-RADIUS; v<=RADIUS; ++v) {
+					// If yes, the flag using w = -1
+					if (fabs(depth.tex2D((int)x+u, (int)y+v) - d) > 0.1f) mask.isDiscontinuity(true);
+				}
+			}
+        }
+        
+        mask_out(x,y) = (int)mask;
+	}
+}
+
+void ftl::cuda::discontinuity(ftl::cuda::TextureObject<int> &mask_out, ftl::cuda::TextureObject<float> &depth, const ftl::rgbd::Camera &params, uint discon, cudaStream_t stream) {
+	const dim3 gridSize((params.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (params.height + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	switch (discon) {
+    case 5 :	discontinuity_kernel<5><<<gridSize, blockSize, 0, stream>>>(mask_out, depth, params); break;
+	case 4 :	discontinuity_kernel<4><<<gridSize, blockSize, 0, stream>>>(mask_out, depth, params); break;
+	case 3 :	discontinuity_kernel<3><<<gridSize, blockSize, 0, stream>>>(mask_out, depth, params); break;
+	case 2 :	discontinuity_kernel<2><<<gridSize, blockSize, 0, stream>>>(mask_out, depth, params); break;
+	case 1 :	discontinuity_kernel<1><<<gridSize, blockSize, 0, stream>>>(mask_out, depth, params); break;
+	default:	break;
+	}
+	cudaSafeCall( cudaGetLastError() );
+
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
+}
diff --git a/applications/reconstruct/src/ilw/fill.cu b/applications/reconstruct/src/ilw/fill.cu
new file mode 100644
index 0000000000000000000000000000000000000000..4109c1034d045ecdede9e28582cd19df670f5a63
--- /dev/null
+++ b/applications/reconstruct/src/ilw/fill.cu
@@ -0,0 +1,71 @@
+#include "ilw_cuda.hpp"
+#include <ftl/cuda/weighting.hpp>
+
+using ftl::cuda::Mask;
+
+#define T_PER_BLOCK 8
+
+template <int RADIUS>
+__global__ void preprocess_kernel(
+    	ftl::cuda::TextureObject<float> depth_in,
+		ftl::cuda::TextureObject<float> depth_out,
+		ftl::cuda::TextureObject<uchar4> colour,
+		ftl::cuda::TextureObject<int> mask,
+		ftl::rgbd::Camera camera,
+		ftl::cuda::ILWParams params) {
+
+    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    float d = depth_in.tex2D((int)x,(int)y);
+    Mask main_mask(mask.tex2D((int)x,(int)y));
+	uchar4 c = colour.tex2D((int)x,(int)y);
+
+	// Calculate discontinuity mask
+
+	// Fill missing depths
+	if (d < camera.minDepth || d > camera.maxDepth) {
+		float depth_accum = 0.0f;
+		float contrib = 0.0f;
+
+        int v=0;
+		//for (int v=-RADIUS; v<=RADIUS; ++v) {
+			for (int u=-RADIUS; u<=RADIUS; ++u) {
+				uchar4 c2 = colour.tex2D((int)x+u,(int)y+v);
+                float d2 = depth_in.tex2D((int)x+u,(int)y+v);
+                Mask m(mask.tex2D((int)x+u,(int)y+v));
+
+				if (!m.isDiscontinuity() && d2 >= camera.minDepth && d2 <= camera.maxDepth) {
+					float w = ftl::cuda::colourWeighting(c, c2, params.fill_match);
+					depth_accum += d2*w;
+					contrib += w;
+				}
+			}
+		//}
+
+		if (contrib > params.fill_threshold) {
+            d = depth_accum / contrib;
+            main_mask.isFilled(true);
+        }
+	}
+
+    mask(x,y) = (int)main_mask;
+	depth_out(x,y) = d;
+}
+
+void ftl::cuda::preprocess_depth(
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		ftl::cuda::TextureObject<uchar4> &colour,
+		ftl::cuda::TextureObject<int> &mask,
+		const ftl::rgbd::Camera &camera,
+		const ftl::cuda::ILWParams &params,
+		cudaStream_t stream) {
+
+	const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	preprocess_kernel<10><<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, colour, mask, camera, params);
+
+	cudaSafeCall( cudaGetLastError() );
+}
diff --git a/applications/reconstruct/src/ilw/ilw.cpp b/applications/reconstruct/src/ilw/ilw.cpp
index c4de69f0a50c8a53afe4daf0557df3408a260cba..93fd75a188b5e48e48e387c5ddf96ea43bcb1eec 100644
--- a/applications/reconstruct/src/ilw/ilw.cpp
+++ b/applications/reconstruct/src/ilw/ilw.cpp
@@ -38,7 +38,7 @@ ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) {
 	fill_depth_ = value("fill_depth", false);
 
     on("fill_depth", [this](const ftl::config::Event &e) {
-        fill_depth_ = value("fill_depth", true);
+        fill_depth_ = value("fill_depth", false);
     });
 
 	on("ilw_align", [this](const ftl::config::Event &e) {
@@ -148,6 +148,9 @@ bool ILW::process(ftl::rgbd::FrameSet &fs) {
     _phase0(fs, stream_);
 
 	params_.range = value("search_range", 0.05f);
+    params_.fill_match = value("fill_match", 50.0f);
+    params_.fill_threshold = value("fill_threshold", 0.0f);
+	params_.match_threshold = value("match_threshold", 0.3f);
 
     for (int i=0; i<iterations_; ++i) {
         _phase1(fs, value("cost_function",3), stream_);
@@ -215,6 +218,8 @@ bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
 		f.createTexture<int>(Channel::Mask, Format<int>(f.get<GpuMat>(Channel::Colour).size()));
         f.createTexture<uchar4>(Channel::Colour);
 		f.createTexture<float>(Channel::Depth);
+
+		f.get<GpuMat>(Channel::Mask).setTo(cv::Scalar(0));
     }
 
 	//cudaSafeCall(cudaStreamSynchronize(stream_));
@@ -226,6 +231,20 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream) {
     // Run correspondence kernel to create an energy vector
     cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
 
+    // Find discontinuity mask
+    for (size_t i=0; i<fs.frames.size(); ++i) {
+        auto &f = fs.frames[i];
+        auto s = fs.sources[i];
+
+        ftl::cuda::discontinuity(
+            f.getTexture<int>(Channel::Mask),
+            f.getTexture<float>(Channel::Depth),
+            s->parameters(),
+            discon_mask_,
+            stream
+        );
+    }
+
 	// First do any preprocessing
 	if (fill_depth_) {
 		for (size_t i=0; i<fs.frames.size(); ++i) {
@@ -287,6 +306,7 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream) {
                 // TODO: Add normals and other things...
                 f1.getTexture<float>(Channel::Depth2),
                 f1.getTexture<float>(Channel::Confidence),
+				f1.getTexture<int>(Channel::Mask),
                 pose1,
                 pose1_inv,
                 pose2,
@@ -332,6 +352,11 @@ bool ILW::_phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream) {
              motion_window_,
              stream
         );
+
+        if (f.hasChannel(Channel::Mask)) {
+			ftl::cuda::mask_filter(f.getTexture<float>(Channel::Depth),
+				f.getTexture<int>(Channel::Mask), stream_);
+		}
     }
 
     return true;
diff --git a/applications/reconstruct/src/ilw/ilw.cu b/applications/reconstruct/src/ilw/ilw.cu
index 97ea74eebdc8f3b3d8aeec75f5153ce468808a33..710f7521d4a508dd8fc968f4648a5402f8045fd7 100644
--- a/applications/reconstruct/src/ilw/ilw.cu
+++ b/applications/reconstruct/src/ilw/ilw.cu
@@ -26,214 +26,11 @@ __device__ inline float warpSum(float e) {
 
 //==============================================================================
 
-template <int RADIUS>
-__global__ void preprocess_kernel(
-    	ftl::cuda::TextureObject<float> depth_in,
-		ftl::cuda::TextureObject<float> depth_out,
-		ftl::cuda::TextureObject<uchar4> colour,
-		ftl::cuda::TextureObject<int> mask,
-		ftl::rgbd::Camera camera,
-		ftl::cuda::ILWParams params) {
 
-    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	float d = depth_in.tex2D((int)x,(int)y);
-	uchar4 c = colour.tex2D((int)x,(int)y);
-
-	// Calculate discontinuity mask
-
-	// Fill missing depths
-	if (d < camera.minDepth || d > camera.maxDepth) {
-		float depth_accum = 0.0f;
-		float contrib = 0.0f;
-
-		for (int v=-RADIUS; v<=RADIUS; ++v) {
-			for (int u=-RADIUS; u<=RADIUS; ++u) {
-				uchar4 c2 = colour.tex2D((int)x+u,(int)y+v);
-				float d2 = depth_in.tex2D((int)x+u,(int)y+v);
-				if (d2 >= camera.minDepth && d2 <= camera.maxDepth) {
-					float w = ftl::cuda::colourWeighting(c, c2, params.colour_smooth);
-					depth_accum += d2*w;
-					contrib += w;
-				}
-			}
-		}
-
-		if (contrib >= 0.0f) d = depth_accum / contrib;
-	}
-
-	depth_out(x,y) = d;
-}
-
-void ftl::cuda::preprocess_depth(
-		ftl::cuda::TextureObject<float> &depth_in,
-		ftl::cuda::TextureObject<float> &depth_out,
-		ftl::cuda::TextureObject<uchar4> &colour,
-		ftl::cuda::TextureObject<int> &mask,
-		const ftl::rgbd::Camera &camera,
-		const ftl::cuda::ILWParams &params,
-		cudaStream_t stream) {
-
-	const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-	preprocess_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, colour, mask, camera, params);
-
-	cudaSafeCall( cudaGetLastError() );
-}
 
 //==============================================================================
 
-template<int FUNCTION>
-__device__ float weightFunction(const ftl::cuda::ILWParams &params, float dweight, float cweight);
-
-template <>
-__device__ inline float weightFunction<0>(const ftl::cuda::ILWParams &params, float dweight, float cweight) {
-	return (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight);
-}
-
-template <>
-__device__ inline float weightFunction<1>(const ftl::cuda::ILWParams &param, float dweight, float cweight) {
-	return (cweight * cweight * dweight);
-}
-
-template <>
-__device__ inline float weightFunction<2>(const ftl::cuda::ILWParams &param, float dweight, float cweight) {
-	return (dweight * dweight * cweight);
-}
-
-template <>
-__device__ inline float weightFunction<3>(const ftl::cuda::ILWParams &params, float dweight, float cweight) {
-	return (dweight == 0.0f) ? 0.0f : (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight);
-}
-
-template <>
-__device__ inline float weightFunction<4>(const ftl::cuda::ILWParams &params, float dweight, float cweight) {
-	return cweight;
-}
 
-template<int COR_STEPS, int FUNCTION> 
-__global__ void correspondence_energy_vector_kernel(
-        TextureObject<float> d1,
-        TextureObject<float> d2,
-        TextureObject<uchar4> c1,
-        TextureObject<uchar4> c2,
-        TextureObject<float> dout,
-        TextureObject<float> conf,
-        float4x4 pose1,
-        float4x4 pose1_inv,
-        float4x4 pose2,  // Inverse
-        Camera cam1,
-        Camera cam2, ftl::cuda::ILWParams params) {
-
-    // Each warp picks point in p1
-    //const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
-	const int x = (blockIdx.x*blockDim.x + threadIdx.x); // / WARP_SIZE;
-    const int y = blockIdx.y*blockDim.y + threadIdx.y;
-    
-    //const float3 world1 = make_float3(p1.tex2D(x, y));
-    const float depth1 = d1.tex2D(x,y); //(pose1_inv * world1).z;  // Initial starting depth
-	if (depth1 < cam1.minDepth || depth1 > cam1.maxDepth) return;
-
-	// TODO: Temporary hack to ensure depth1 is present
-	//const float4 temp = vout.tex2D(x,y);
-	//vout(x,y) =  make_float4(depth1, 0.0f, temp.z, temp.w);
-	
-	const float3 world1 = pose1 * cam1.screenToCam(x,y,depth1);
-
-    const uchar4 colour1 = c1.tex2D(x, y);
-
-	float bestdepth = 0.0f;
-	float bestweight = 0.0f;
-	float bestcolour = 0.0f;
-	float totalcolour = 0.0f;
-	int count = 0;
-	float contrib = 0.0f;
-    
-	const float step_interval = params.range / (COR_STEPS / 2);
-	
-	const float3 rayStep_world = pose1.getFloat3x3() * cam1.screenToCam(x,y,step_interval);
-	const float3 rayStart_2 = pose2 * world1;
-	const float3 rayStep_2 = pose2.getFloat3x3() * rayStep_world;
-
-    // Project to p2 using cam2
-    // Each thread takes a possible correspondence and calculates a weighting
-    //const int lane = tid % WARP_SIZE;
-	for (int i=0; i<COR_STEPS; ++i) {
-		const int j = i - (COR_STEPS/2);
-		const float depth_adjust = (float)j * step_interval + depth1;
-
-        // Calculate adjusted depth 3D point in camera 2 space
-        const float3 worldPos = world1 + j * rayStep_world; //(pose1 * cam1.screenToCam(x, y, depth_adjust));
-        const float3 camPos = rayStart_2 + j * rayStep_2; //pose2 * worldPos;
-        const uint2 screen = cam2.camToScreen<uint2>(camPos);
-
-        if (screen.x >= cam2.width || screen.y >= cam2.height) continue;
-
-		// Generate a depth correspondence value
-		const float depth2 = d2.tex2D((int)screen.x, (int)screen.y);
-		const float dweight = ftl::cuda::weighting(fabs(depth2 - camPos.z), params.spatial_smooth);
-		//const float dweight = ftl::cuda::weighting(fabs(depth_adjust - depth1), 2.0f*params.range);
-		
-		// Generate a colour correspondence value
-		const uchar4 colour2 = c2.tex2D((int)screen.x, (int)screen.y);
-		const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth);
-
-		const float weight = weightFunction<FUNCTION>(params, dweight, cweight);
-
-		++count;
-		contrib += weight;
-		bestcolour = max(cweight, bestcolour);
-		totalcolour += cweight;
-		if (weight > bestweight) {
-			bestweight = weight;
-			bestdepth = depth_adjust;
-		}
-    }
-
-	const float avgcolour = totalcolour/(float)count;
-    const float confidence = bestcolour / totalcolour; //bestcolour - avgcolour;
-
-    //if (bestweight > 0.0f) {
-        float old = conf.tex2D(x,y);
-
-        if (bestweight * confidence >= old) {
-			dout(x,y) = bestdepth;
-			conf(x,y) = bestweight * confidence;
-		}
-    //}
-}
-
-void ftl::cuda::correspondence(
-        TextureObject<float> &d1,
-        TextureObject<float> &d2,
-        TextureObject<uchar4> &c1,
-        TextureObject<uchar4> &c2,
-        TextureObject<float> &dout,
-        TextureObject<float> &conf,
-        float4x4 &pose1,
-        float4x4 &pose1_inv,
-        float4x4 &pose2,
-        const Camera &cam1,
-        const Camera &cam2, const ILWParams &params, int func,
-        cudaStream_t stream) {
-
-	const dim3 gridSize((d1.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (d1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-    //printf("COR SIZE %d,%d\n", p1.width(), p1.height());
-
-	switch (func) {
-    case 0: correspondence_energy_vector_kernel<16,0><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params); break;
-	case 1: correspondence_energy_vector_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params); break;
-	case 2: correspondence_energy_vector_kernel<16,2><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params); break;
-	case 3: correspondence_energy_vector_kernel<16,3><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params); break;
-	case 4: correspondence_energy_vector_kernel<16,4><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params); break;
-	}
-
-    cudaSafeCall( cudaGetLastError() );
-}
 
 //==============================================================================
 
diff --git a/applications/reconstruct/src/ilw/ilw_cuda.hpp b/applications/reconstruct/src/ilw/ilw_cuda.hpp
index aac0493a86af1cfd69925aa054ee3d34c901ec5f..fad97afbdb8385d9381eff5af40fb272f172ca1a 100644
--- a/applications/reconstruct/src/ilw/ilw_cuda.hpp
+++ b/applications/reconstruct/src/ilw/ilw_cuda.hpp
@@ -4,6 +4,7 @@
 #include <ftl/cuda_common.hpp>
 #include <ftl/rgbd/camera.hpp>
 #include <ftl/cuda_matrix_util.hpp>
+#include <ftl/cuda/mask.hpp>
 
 namespace ftl {
 namespace cuda {
@@ -11,6 +12,9 @@ namespace cuda {
 struct ILWParams {
     float spatial_smooth;
     float colour_smooth;
+	float fill_match;
+	float fill_threshold;
+	float match_threshold;
     float cost_ratio;
     float cost_threshold;
 	float range;
@@ -22,35 +26,17 @@ static const uint kILWFlag_RestrictZ = 0x0002;
 static const uint kILWFlag_SkipBadColour = 0x0004;
 static const uint kILWFlag_ColourConfidenceOnly = 0x0008;
 
-/**
- * Wrap an int mask value used to flag individual depth pixels.
- */
-class ILWMask {
-	public:
-	__device__ explicit inline ILWMask(int v) : v_(v) {}
-	#ifdef __CUDACC__
-	__device__ inline ILWMask(const ftl::cuda::TextureObject<int> &m, int x, int y) : v_(m.tex2D(x,y)) {}
-	#endif
-	__device__ inline operator int() const { return v_; }
-
-	__device__ inline bool isFilled() const { return v_ & kMask_Filled; }
-	__device__ inline bool isDiscontinuity() const { return v_ & kMask_Discontinuity; }
-	__device__ inline bool hasCorrespondence() const { return v_ & kMask_Correspondence; }
-	__device__ inline bool isBad() const { return v_ & kMask_Bad; }
-
-	__device__ inline void isFilled(bool v) { v_ = (v) ? v_ | kMask_Filled : v_ & (~kMask_Filled); }
-	__device__ inline void isDiscontinuity(bool v) { v_ = (v) ? v_ | kMask_Discontinuity : v_ & (~kMask_Discontinuity); }
-	__device__ inline void hasCorrespondence(bool v) { v_ = (v) ? v_ | kMask_Correspondence : v_ & (~kMask_Correspondence); }
-	__device__ inline void isBad(bool v) { v_ = (v) ? v_ | kMask_Bad : v_ & (~kMask_Bad); }
-
-	private:
-	int v_;
+void discontinuity(
+	ftl::cuda::TextureObject<int> &mask_out,
+	ftl::cuda::TextureObject<float> &depth,
+	const ftl::rgbd::Camera &params,
+	uint discon, cudaStream_t stream
+);
 
-	static const int kMask_Filled = 0x0001;
-	static const int kMask_Discontinuity = 0x0002;
-	static const int kMask_Correspondence = 0x0004;
-	static const int kMask_Bad = 0x0008;
-};
+void mask_filter(
+	ftl::cuda::TextureObject<float> &depth,
+	ftl::cuda::TextureObject<int> &mask,
+	cudaStream_t stream);
 
 void preprocess_depth(
 	ftl::cuda::TextureObject<float> &depth_in,
@@ -69,6 +55,7 @@ void correspondence(
     ftl::cuda::TextureObject<uchar4> &c2,
     ftl::cuda::TextureObject<float> &dout,
     ftl::cuda::TextureObject<float> &conf,
+	ftl::cuda::TextureObject<int> &mask,
     float4x4 &pose1,
     float4x4 &pose1_inv,
     float4x4 &pose2,
diff --git a/components/renderers/cpp/CMakeLists.txt b/components/renderers/cpp/CMakeLists.txt
index 63cf8f1e75663c61e05f7c68d26217de30e964e6..8fc7996efcbad8502bcc3dce0fef4fee8ffd327d 100644
--- a/components/renderers/cpp/CMakeLists.txt
+++ b/components/renderers/cpp/CMakeLists.txt
@@ -3,6 +3,7 @@ add_library(ftlrender
 	src/splatter.cu
 	src/points.cu
 	src/normals.cu
+	src/mask.cu
 )
 
 # These cause errors in CI build and are being removed from PCL in newer versions
diff --git a/components/renderers/cpp/include/ftl/cuda/mask.hpp b/components/renderers/cpp/include/ftl/cuda/mask.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..3f49f3bfc65e67186cc8caf321743ae2800805e7
--- /dev/null
+++ b/components/renderers/cpp/include/ftl/cuda/mask.hpp
@@ -0,0 +1,43 @@
+#ifndef _FTL_CUDA_MASK_HPP_
+#define _FTL_CUDA_MASK_HPP_
+
+namespace ftl {
+namespace cuda {
+
+/**
+ * Wrap an int mask value used to flag individual depth pixels.
+ */
+class Mask {
+	public:
+	__device__ inline Mask() : v_(0) {}
+	__device__ explicit inline Mask(int v) : v_(v) {}
+	#ifdef __CUDACC__
+	__device__ inline Mask(const ftl::cuda::TextureObject<int> &m, int x, int y) : v_(m.tex2D(x,y)) {}
+	#endif
+	__device__ inline operator int() const { return v_; }
+
+    __device__ inline bool is(int m) const { return v_ & m; }
+
+	__device__ inline bool isFilled() const { return v_ & kMask_Filled; }
+	__device__ inline bool isDiscontinuity() const { return v_ & kMask_Discontinuity; }
+	__device__ inline bool hasCorrespondence() const { return v_ & kMask_Correspondence; }
+	__device__ inline bool isBad() const { return v_ & kMask_Bad; }
+
+	__device__ inline void isFilled(bool v) { v_ = (v) ? v_ | kMask_Filled : v_ & (~kMask_Filled); }
+	__device__ inline void isDiscontinuity(bool v) { v_ = (v) ? v_ | kMask_Discontinuity : v_ & (~kMask_Discontinuity); }
+	__device__ inline void hasCorrespondence(bool v) { v_ = (v) ? v_ | kMask_Correspondence : v_ & (~kMask_Correspondence); }
+	__device__ inline void isBad(bool v) { v_ = (v) ? v_ | kMask_Bad : v_ & (~kMask_Bad); }
+
+    static constexpr int kMask_Filled = 0x0001;
+	static constexpr int kMask_Discontinuity = 0x0002;
+	static constexpr int kMask_Correspondence = 0x0004;
+	static constexpr int kMask_Bad = 0x0008;
+
+	private:
+	int v_;
+};
+
+}
+}
+
+#endif
diff --git a/components/renderers/cpp/include/ftl/render/splat_render.hpp b/components/renderers/cpp/include/ftl/render/splat_render.hpp
index bd83d8bd1bda862d71e2ef12f5239a578e2d86e6..3b36e8ec98dd37aaf40b0e3a7e12cad6b3b5a14e 100644
--- a/components/renderers/cpp/include/ftl/render/splat_render.hpp
+++ b/components/renderers/cpp/include/ftl/render/splat_render.hpp
@@ -53,6 +53,7 @@ class Splatter : public ftl::render::Renderer {
 	uchar4 light_ambient_;
 	ftl::render::SplatParams params_;
 	cudaStream_t stream_;
+	float3 light_pos_;
 
 	template <typename T>
 	void __blendChannel(ftl::rgbd::Frame &, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t);
diff --git a/components/renderers/cpp/src/mask.cu b/components/renderers/cpp/src/mask.cu
new file mode 100644
index 0000000000000000000000000000000000000000..6e9621c8064359021de616c8662f3b4227ba1806
--- /dev/null
+++ b/components/renderers/cpp/src/mask.cu
@@ -0,0 +1,39 @@
+#include "splatter_cuda.hpp"
+#include <ftl/cuda/mask.hpp>
+
+using ftl::cuda::TextureObject;
+using ftl::cuda::Mask;
+using ftl::render::SplatParams;
+
+#define T_PER_BLOCK 16
+
+__global__ void show_mask_kernel(
+        TextureObject<uchar4> colour,
+        TextureObject<int> mask,
+        int id, uchar4 style) {
+        
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x);
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x >= 0 && x < colour.width() && y >=0 && y < colour.height()) {
+        Mask m(mask.tex2D(x,y));
+
+        if (m.is(id)) {
+            colour(x,y) = style;
+        }
+    }
+}
+
+
+void ftl::cuda::show_mask(
+        TextureObject<uchar4> &colour,
+        TextureObject<int> &mask,
+        int id, uchar4 style, cudaStream_t stream) {
+	const dim3 gridSize((colour.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    show_mask_kernel<<<gridSize, blockSize, 0, stream>>>(
+        colour, mask, id, style
+    );
+    cudaSafeCall( cudaGetLastError() );
+}
diff --git a/components/renderers/cpp/src/normals.cu b/components/renderers/cpp/src/normals.cu
index 626015ce7ad9f9471079917b0678369bbb23f5cb..14357d02ed00f212577e463051d315e6e5a90078 100644
--- a/components/renderers/cpp/src/normals.cu
+++ b/components/renderers/cpp/src/normals.cu
@@ -77,6 +77,8 @@ __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
     float3 nsum = make_float3(0.0f);
     float contrib = 0.0f;
 
+    output(x,y) = make_float4(0.0f,0.0f,0.0f,0.0f);
+
     if (p0.x == MINF) return;
 
     for (int v=-RADIUS; v<=RADIUS; ++v) {
@@ -120,6 +122,8 @@ __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
     float3 nsum = make_float3(0.0f);
     float contrib = 0.0f;
 
+    output(x,y) = make_float4(0.0f,0.0f,0.0f,0.0f);
+
     if (p0.z < camera.minDepth || p0.z > camera.maxDepth) return;
 
     for (int v=-RADIUS; v<=RADIUS; ++v) {
@@ -149,6 +153,33 @@ __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
     output(x,y) = (contrib > 0.0f) ? make_float4(pose*nsum, 1.0f) : make_float4(0.0f);
 }
 
+template <>
+__global__ void smooth_normals_kernel<0>(ftl::cuda::TextureObject<float4> norms,
+        ftl::cuda::TextureObject<float4> output,
+        ftl::cuda::TextureObject<int> depth,
+        ftl::rgbd::Camera camera, float3x3 pose, float smoothing) {
+    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if(x >= depth.width() || y >= depth.height()) return;
+
+    output(x,y) = make_float4(0.0f,0.0f,0.0f,0.0f);
+
+    const float3 p0 = camera.screenToCam(x,y, (float)depth.tex2D((int)x,(int)y) / 1000.0f);
+
+    if (p0.z < camera.minDepth || p0.z > camera.maxDepth) return;
+
+    // Compute dot product of normal with camera to obtain measure of how
+    // well this point faces the source camera, a measure of confidence
+    //float3 ray = camera.screenToCam(x, y, 1.0f);
+    //ray = ray / length(ray);
+    //nsum /= contrib;
+    //nsum /= length(nsum);
+
+    const float4 n = norms.tex2D((int)x,(int)y);
+    output(x,y) = n;
+}
+
 void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
         ftl::cuda::TextureObject<float4> &temp,
 		ftl::cuda::TextureObject<float4> &input,
@@ -166,7 +197,10 @@ void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
 	case 9: smooth_normals_kernel<9><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break;
 	case 7: smooth_normals_kernel<7><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break;
 	case 5: smooth_normals_kernel<5><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break;
-	case 3: smooth_normals_kernel<3><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break;
+    case 3: smooth_normals_kernel<3><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break;
+    case 2: smooth_normals_kernel<2><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break;
+    case 1: smooth_normals_kernel<1><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break;
+    case 0: smooth_normals_kernel<0><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break;
 	}
     cudaSafeCall( cudaGetLastError() );
 
diff --git a/components/renderers/cpp/src/points.cu b/components/renderers/cpp/src/points.cu
index f1f23974fb7c118e64a0d2cc0caaa1706d1d33c0..1b8dbf84ed7b1cc58e4a19c28928075d495759cc 100644
--- a/components/renderers/cpp/src/points.cu
+++ b/components/renderers/cpp/src/points.cu
@@ -97,7 +97,7 @@ __global__ void clipping_kernel(ftl::cuda::TextureObject<float> depth, ftl::rgbd
 		float4 p = make_float4(camera.screenToCam(x,y,d), 0.0f);
 
 		if (isClipped(p, clip)) {
-			depth(x,y) = 0.0f;
+			depth(x,y) = MINF;
 		}
 	}
 }
diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp
index 9ea54526db9273a3787c510f4974157ef71e414e..4b94e29c355fdd675566c44c52876fdff7866760 100644
--- a/components/renderers/cpp/src/splat_render.cpp
+++ b/components/renderers/cpp/src/splat_render.cpp
@@ -3,6 +3,7 @@
 #include "splatter_cuda.hpp"
 #include <ftl/cuda/points.hpp>
 #include <ftl/cuda/normals.hpp>
+#include <ftl/cuda/mask.hpp>
 
 #include <opencv2/core/cuda_stream_accessor.hpp>
 
@@ -14,6 +15,7 @@ using ftl::codecs::Channels;
 using ftl::rgbd::Format;
 using cv::cuda::GpuMat;
 using std::stoul;
+using ftl::cuda::Mask;
 
 static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) {
   Eigen::Affine3d rx =
@@ -121,6 +123,11 @@ Splatter::Splatter(nlohmann::json &config, ftl::rgbd::FrameSet *fs) : ftl::rende
 		light_ambient_ = parseCUDAColour(value("ambient", std::string("#0e0e0e")));
 	});
 
+	light_pos_ = make_float3(value("light_x", 0.3f), value("light_y", 0.2f), value("light_z", 1.0f));
+	on("light_x", [this](const ftl::config::Event &e) { light_pos_.x = value("light_x", 0.3f); });
+	on("light_y", [this](const ftl::config::Event &e) { light_pos_.y = value("light_y", 0.3f); });
+	on("light_z", [this](const ftl::config::Event &e) { light_pos_.z = value("light_z", 0.3f); });
+
 	cudaSafeCall(cudaStreamCreate(&stream_));
 }
 
@@ -313,14 +320,6 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 
 
 	if (scene_->frames.size() == 0) return false;
-
-	int aligned_source = value("aligned_source",-1);
-	if (aligned_source >= 0 && aligned_source < scene_->frames.size()) {
-		// FIXME: Output may not be same resolution as source!
-		scene_->frames[aligned_source].swapTo(Channel::Depth + Channel::Colour, out);
-		return true;
-	}
-
 	auto &g = scene_->frames[0].get<GpuMat>(Channel::Colour);
 
 	temp_.create<GpuMat>(Channel::Colour, Format<float4>(camera.width, camera.height));
@@ -334,7 +333,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 	// Parameters object to pass to CUDA describing the camera
 	SplatParams &params = params_;
 	params.m_flags = 0;
-	if (value("show_discontinuity_mask", false)) params.m_flags |= ftl::render::kShowDisconMask;
+	//if () params.m_flags |= ftl::render::kShowDisconMask;
 	if (value("normal_weight_colours", true)) params.m_flags |= ftl::render::kNormalWeightColours;
 	params.m_viewMatrix = MatrixConversion::toCUDA(src->getPose().cast<float>().inverse());
 	params.m_viewMatrixInverse = MatrixConversion::toCUDA(src->getPose().cast<float>());
@@ -347,6 +346,9 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 
 	//LOG(INFO) << "Render ready: " << camera.width << "," << camera.height;
 
+	bool show_discon = value("show_discontinuity_mask", false);
+	bool show_fill = value("show_filled", false);
+
 	temp_.createTexture<int>(Channel::Depth);
 	//temp_.get<GpuMat>(Channel::Normals).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
 
@@ -356,6 +358,15 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 		auto &f = scene_->frames[i];
 		auto s = scene_->sources[i];
 
+		if (f.hasChannel(Channel::Mask)) {
+			if (show_discon) {
+				ftl::cuda::show_mask(f.getTexture<uchar4>(Channel::Colour), f.getTexture<int>(Channel::Mask), Mask::kMask_Discontinuity, make_uchar4(0,0,255,255), stream_);
+			}
+			if (show_fill) {
+				ftl::cuda::show_mask(f.getTexture<uchar4>(Channel::Colour), f.getTexture<int>(Channel::Mask), Mask::kMask_Filled, make_uchar4(0,255,0,255), stream_);
+			}
+		}
+
 		// Needs to create points channel first?
 		if (!f.hasChannel(Channel::Points)) {
 			//LOG(INFO) << "Creating points... " << s->parameters().width;
@@ -380,7 +391,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 			ftl::cuda::normals(f.createTexture<float4>(Channel::Normals, Format<float4>(g.cols, g.rows)),
 				temp_.getTexture<float4>(Channel::Normals),
 				f.getTexture<float4>(Channel::Points),
-				3, 0.04f,
+				1, 0.02f,
 				s->parameters(), pose.getFloat3x3(), stream_);
 
 			if (norm_filter_ > -0.1f) {
@@ -389,10 +400,33 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 		}
 	}
 
+	Channel chan = src->getChannel();
+
+	int aligned_source = value("aligned_source",-1);
+	if (aligned_source >= 0 && aligned_source < scene_->frames.size()) {
+		// FIXME: Output may not be same resolution as source!
+		cudaSafeCall(cudaStreamSynchronize(stream_));
+		scene_->frames[aligned_source].copyTo(Channel::Depth + Channel::Colour, out);
+
+		if (chan == Channel::Normals) {
+			// Convert normal to single float value
+			temp_.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height)).setTo(cv::Scalar(0,0,0,0), cvstream);
+			ftl::cuda::normal_visualise(scene_->frames[aligned_source].getTexture<float4>(Channel::Normals), temp_.createTexture<uchar4>(Channel::Colour),
+					light_pos_,
+					light_diffuse_,
+					light_ambient_, stream_);
+
+			// Put in output as single float
+			cv::cuda::swap(temp_.get<GpuMat>(Channel::Colour), out.create<GpuMat>(Channel::Normals));
+			out.resetTexture(Channel::Normals);
+		}
+
+		return true;
+	}
+
 	_dibr(stream_);
 	_renderChannel(out, Channel::Colour, Channel::Colour, stream_);
 	
-	Channel chan = src->getChannel();
 	if (chan == Channel::Depth)
 	{
 		//temp_.get<GpuMat>(Channel::Depth).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream);
@@ -403,9 +437,9 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 		_renderChannel(out, Channel::Normals, Channel::Normals, stream_);
 
 		// Convert normal to single float value
-		temp_.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height));
+		temp_.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height)).setTo(cv::Scalar(0,0,0,0), cvstream);
 		ftl::cuda::normal_visualise(out.getTexture<float4>(Channel::Normals), temp_.createTexture<uchar4>(Channel::Colour),
-				make_float3(0.3f, 0.2f, 1.0f),
+				light_pos_,
 				light_diffuse_,
 				light_ambient_, stream_);
 
diff --git a/components/renderers/cpp/src/splatter.cu b/components/renderers/cpp/src/splatter.cu
index cd69098ea6f80c269f66deb935e652ffc33398c8..3a9270e542ee58e836410e0de598cbd4ab9e269c 100644
--- a/components/renderers/cpp/src/splatter.cu
+++ b/components/renderers/cpp/src/splatter.cu
@@ -44,8 +44,10 @@ using ftl::cuda::warpSum;
 	// Compile time enable/disable of culling back facing points
 	if (CULLING) {
 		float3 ray = params.m_viewMatrixInverse.getFloat3x3() * params.camera.screenToCam(x,y,1.0f);
-		ray = ray / length(ray);
-		float3 n = make_float3(normals.tex2D((int)x,(int)y));
+        ray = ray / length(ray);
+        float4 n4 = normals.tex2D((int)x,(int)y);
+        if (n4.w == 0.0f) return;
+		float3 n = make_float3(n4);
 		float l = length(n);
 		if (l == 0) {
 			return;
diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp
index 684b0de5597f79ab8df78188d6552c08b5734c9a..42ec8a0c7dbca51b1c3fe7e22ba2c38eb2447169 100644
--- a/components/renderers/cpp/src/splatter_cuda.hpp
+++ b/components/renderers/cpp/src/splatter_cuda.hpp
@@ -39,6 +39,11 @@ namespace cuda {
 		ftl::cuda::TextureObject<B> &out,
 		ftl::cuda::TextureObject<float> &contribs,
 		cudaStream_t stream);
+
+	void show_mask(
+        ftl::cuda::TextureObject<uchar4> &colour,
+		ftl::cuda::TextureObject<int> &mask,
+        int id, uchar4 style, cudaStream_t stream);
 }
 }
 
diff --git a/components/rgbd-sources/include/ftl/rgbd/frame.hpp b/components/rgbd-sources/include/ftl/rgbd/frame.hpp
index 5e38fe5cc319e2ac9930199386c3c25ec55b23df..b08673c973f42253670b51a3da8e28735406c952 100644
--- a/components/rgbd-sources/include/ftl/rgbd/frame.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/frame.hpp
@@ -59,6 +59,11 @@ public:
 
 	void swapChannels(ftl::codecs::Channel, ftl::codecs::Channel);
 
+	/**
+	 * Does a host or device memory copy into the given frame.
+	 */
+	void copyTo(ftl::codecs::Channels, Frame &);
+
 	/**
 	 * Create a channel with a given format. This will discard any existing
 	 * data associated with the channel and ensure all data structures and
diff --git a/components/rgbd-sources/src/frame.cpp b/components/rgbd-sources/src/frame.cpp
index 851e75af76b96543f1f404303b50a2e3682652ae..212ca55375993789bb832e678fe4f1a8f22d5c48 100644
--- a/components/rgbd-sources/src/frame.cpp
+++ b/components/rgbd-sources/src/frame.cpp
@@ -85,6 +85,19 @@ void Frame::swapChannels(ftl::codecs::Channel a, ftl::codecs::Channel b) {
 	m1.tex = std::move(temptex);
 }
 
+void Frame::copyTo(ftl::codecs::Channels channels, Frame &f) {
+	f.reset();
+
+	// For all channels in this frame object
+	for (auto c : channels_) {
+		// Should we copy this channel?
+		if (channels.has(c)) {
+			if (isCPU(c)) get<cv::Mat>(c).copyTo(f.create<cv::Mat>(c));
+			else get<cv::cuda::GpuMat>(c).copyTo(f.create<cv::cuda::GpuMat>(c));
+		}
+	}
+}
+
 template<> cv::Mat& Frame::get(ftl::codecs::Channel channel) {
 	if (channel == Channel::None) {
 		DLOG(WARNING) << "Cannot get the None channel from a Frame";