diff --git a/components/operators/src/smoothing_cuda.hpp b/components/operators/include/ftl/operators/cuda/smoothing_cuda.hpp
similarity index 93%
rename from components/operators/src/smoothing_cuda.hpp
rename to components/operators/include/ftl/operators/cuda/smoothing_cuda.hpp
index c44c7787b9fe8dd18d6e148d96c8d71803ba368c..8c99b5e9f2dc130bcc236b8f4cdea9c2932ab7f4 100644
--- a/components/operators/src/smoothing_cuda.hpp
+++ b/components/operators/include/ftl/operators/cuda/smoothing_cuda.hpp
@@ -17,6 +17,15 @@ void mls_smooth(
 		const ftl::rgbd::Camera &camera,
 		cudaStream_t stream);
 
+void mls_smooth(
+		ftl::cuda::TextureObject<half4> &normals_in,
+		ftl::cuda::TextureObject<half4> &normals_out,
+		ftl::cuda::TextureObject<float> &depth_in,
+		float smoothing,
+		int radius,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream);
+
 void colour_mls_smooth(
 		ftl::cuda::TextureObject<half4> &normals_in,
 		ftl::cuda::TextureObject<half4> &normals_out,
diff --git a/components/operators/src/fusion/mvmls.cpp b/components/operators/src/fusion/mvmls.cpp
index 38328f33ea724a1b687163bc961c2ccec1d6b4fa..15742b05544fea94846db4927ee0434a22c20967 100644
--- a/components/operators/src/fusion/mvmls.cpp
+++ b/components/operators/src/fusion/mvmls.cpp
@@ -1,5 +1,5 @@
 #include <ftl/operators/mvmls.hpp>
-#include "smoothing_cuda.hpp"
+#include <ftl/operators/cuda/smoothing_cuda.hpp>
 #include <ftl/utility/matrix_conversion.hpp>
 #include "mvmls_cuda.hpp"
 #include <ftl/cuda/normals.hpp>
diff --git a/components/operators/src/mls.cu b/components/operators/src/mls.cu
index 55813446ab7a62d0913b96d107187b2bb62f0543..d677de67ff7d278c4884739318be55a06b8df04b 100644
--- a/components/operators/src/mls.cu
+++ b/components/operators/src/mls.cu
@@ -1,4 +1,4 @@
-#include "smoothing_cuda.hpp"
+#include <ftl/operators/cuda/smoothing_cuda.hpp>
 
 #include <ftl/cuda/weighting.hpp>
 
@@ -32,6 +32,7 @@ using ftl::cuda::TextureObject;
 
 	float d0 = depth_in.tex2D(x, y);
 	depth_out(x,y) = d0;
+	normals_out(x,y) = normals_in(x,y);
 	if (d0 < camera.minDepth || d0 > camera.maxDepth) return;
 	float3 X = camera.screenToCam((int)(x),(int)(y),d0);
 
@@ -100,6 +101,95 @@ void ftl::cuda::mls_smooth(
 	#endif
 }
 
+/*
+ * Smooth depth map using Moving Least Squares. Normals only output
+ */
+ template <int SEARCH_RADIUS>
+ __global__ void mls_smooth_kernel(
+		TextureObject<half4> normals_in,
+		TextureObject<half4> normals_out,
+        TextureObject<float> depth_in,        // Virtual depth map
+		float smoothing,
+        ftl::rgbd::Camera camera) {
+        
+    const int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return;
+
+	float3 aX = make_float3(0.0f,0.0f,0.0f);
+	float3 nX = make_float3(0.0f,0.0f,0.0f);
+    float contrib = 0.0f;
+
+	float d0 = depth_in.tex2D(x, y);
+	normals_out(x,y) = normals_in(x,y);
+	if (d0 < camera.minDepth || d0 > camera.maxDepth) return;
+	float3 X = camera.screenToCam((int)(x),(int)(y),d0);
+
+    // Neighbourhood
+    for (int v=-SEARCH_RADIUS; v<=SEARCH_RADIUS; ++v) {
+    for (int u=-SEARCH_RADIUS; u<=SEARCH_RADIUS; ++u) {
+		const float d = depth_in.tex2D(x+u, y+v);
+		if (d < camera.minDepth || d > camera.maxDepth) continue;
+
+		// Point and normal of neighbour
+		const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y)+v,d);
+		const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y)+v));
+
+		// Gauss approx weighting function using point distance
+		const float w = ftl::cuda::spatialWeighting(X,Xi,smoothing);
+
+		aX += Xi*w;
+		nX += Ni*w;
+		contrib += w;
+    }
+	}
+	
+	nX /= contrib;  // Weighted average normal
+	aX /= contrib;  // Weighted average point (centroid)
+
+	// Signed-Distance Field function
+	float fX = nX.x * (X.x - aX.x) + nX.y * (X.y - aX.y) + nX.z * (X.z - aX.z);
+
+	// Calculate new point using SDF function to adjust depth (and position)
+	X = X - nX * fX;
+	
+	//uint2 screen = camera.camToScreen<uint2>(X);
+
+    //if (screen.x < depth_out.width() && screen.y < depth_out.height()) {
+    //    depth_out(screen.x,screen.y) = X.z;
+	//}
+	//depth_out(x,y) = X.z;
+	normals_out(x,y) = make_half4(nX / length(nX), 0.0f);
+}
+
+void ftl::cuda::mls_smooth(
+		ftl::cuda::TextureObject<half4> &normals_in,
+		ftl::cuda::TextureObject<half4> &normals_out,
+		ftl::cuda::TextureObject<float> &depth_in,
+		float smoothing,
+		int radius,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream) {
+
+	const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	switch (radius) {
+		case 5: mls_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, smoothing, camera); break;
+		case 4: mls_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, smoothing, camera); break;
+		case 3: mls_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, smoothing, camera); break;
+		case 2: mls_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, smoothing, camera); break;
+		case 1: mls_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, smoothing, camera); break;
+	}
+	cudaSafeCall( cudaGetLastError() );
+
+
+	#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	#endif
+}
+
 
 // ===== Colour MLS Smooth =====================================================
 
diff --git a/components/operators/src/smoothchan.cu b/components/operators/src/smoothchan.cu
index ea40854fccd7579fc3973a6acb41543ea7bc75d5..2766e6d23ab37f92e3a56bc0938a91df9ac45bd0 100644
--- a/components/operators/src/smoothchan.cu
+++ b/components/operators/src/smoothchan.cu
@@ -1,4 +1,4 @@
-#include "smoothing_cuda.hpp"
+#include <ftl/operators/cuda/smoothing_cuda.hpp>
 
 #include <ftl/cuda/weighting.hpp>
 
diff --git a/components/operators/src/smoothing.cpp b/components/operators/src/smoothing.cpp
index 347b6b7754b49363be98578a94e1d049b6ffec1a..44fcea6b0f58ad956f4c532c3022eb888aa1e2a7 100644
--- a/components/operators/src/smoothing.cpp
+++ b/components/operators/src/smoothing.cpp
@@ -1,5 +1,5 @@
 #include <ftl/operators/smoothing.hpp>
-#include "smoothing_cuda.hpp"
+#include <ftl/operators/cuda/smoothing_cuda.hpp>
 
 #define LOGURU_REPLACE_GLOG 1
 #include <loguru.hpp>
diff --git a/components/operators/src/smoothing.cu b/components/operators/src/smoothing.cu
index a2b7377fba011243c08cf5347f3dea4800b8ab51..a1184475317a174e65c30ea087a4922f20e3ccb0 100644
--- a/components/operators/src/smoothing.cu
+++ b/components/operators/src/smoothing.cu
@@ -1,4 +1,4 @@
-#include "smoothing_cuda.hpp"
+#include <ftl/operators/cuda/smoothing_cuda.hpp>
 
 #include <ftl/cuda/weighting.hpp>
 
diff --git a/components/renderers/cpp/src/CUDARender.cpp b/components/renderers/cpp/src/CUDARender.cpp
index 418ffd9132cec9e3013f8d9ca0934f9aafa7edbe..d4f7cb4515ed9514e3f2e0b8abaf683b0c9f53e7 100644
--- a/components/renderers/cpp/src/CUDARender.cpp
+++ b/components/renderers/cpp/src/CUDARender.cpp
@@ -6,6 +6,7 @@
 #include <ftl/operators/cuda/mask.hpp>
 #include <ftl/render/colouriser.hpp>
 #include <ftl/cuda/transform.hpp>
+#include <ftl/operators/cuda/smoothing_cuda.hpp>
 
 #include <ftl/cuda/colour_cuda.hpp>
 
@@ -241,12 +242,12 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 
 	bool do_blend = value("mesh_blend", false);
 	float blend_alpha = value("blend_alpha", 0.02f);
-	if (do_blend) {
-		temp_.set<GpuMat>(Channel::Depth).setTo(cv::Scalar(0x7FFFFFFF), cvstream);
-		temp_.set<GpuMat>(Channel::Weights).setTo(cv::Scalar(0.0f), cvstream);
-	} else {
+	//if (do_blend) {
+	//	temp_.set<GpuMat>(Channel::Depth).setTo(cv::Scalar(0x7FFFFFFF), cvstream);
+	//	temp_.set<GpuMat>(Channel::Weights).setTo(cv::Scalar(0.0f), cvstream);
+	//} else {
 		temp_.set<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0x7FFFFFFF), cvstream);
-	}
+	//}
 
 	int valid_count = 0;
 
@@ -307,9 +308,9 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 		}
 
 		// Must reset depth channel if blending
-		if (do_blend) {
-			temp_.set<GpuMat>(Channel::Depth).setTo(cv::Scalar(0x7FFFFFFF), cvstream);
-		}
+		//if (do_blend) {
+		//	temp_.set<GpuMat>(Channel::Depth).setTo(cv::Scalar(0x7FFFFFFF), cvstream);
+		//}
 
 		depth_out_.to_gpumat().setTo(cv::Scalar(1000.0f), cvstream);
 
@@ -324,7 +325,7 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 		// TODO: Reproject here
 		// And merge based upon weight adjusted distances
 
-		if (do_blend) {
+		/*if (do_blend) {
 			// Blend this sources mesh with previous meshes
 			ftl::cuda::mesh_blender(
 				temp_.getTexture<int>(Channel::Depth),
@@ -338,7 +339,7 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 				blend_alpha,
 				stream
 			);
-		}
+		}*/
 	}
 
 	if (valid_count == 0) return;
@@ -346,7 +347,7 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 	// Convert from int depth to float depth
 	//temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 100000.0f, cvstream);
 
-	if (do_blend) {
+	/*if (do_blend) {
 		ftl::cuda::dibr_normalise(
 			//out.getTexture<float>(_getDepthChannel()),
 			//out.getTexture<float>(_getDepthChannel()),
@@ -355,54 +356,79 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 			temp_.getTexture<float>(Channel::Weights),
 			stream_
 		);
-	} else {
+	} else {*/
 		//ftl::cuda::merge_convert_depth(temp_.getTexture<int>(Channel::Depth2), out.createTexture<float>(_getDepthChannel()), 1.0f / 100000.0f, stream_);
 		ftl::cuda::merge_convert_depth(temp_.createTexture<int>(Channel::Depth2), depth_out_, 1.0f / 100000.0f, stream_);
-	}
+	//}
 
 	// Now merge new render to any existing frameset render, detecting collisions
 	ftl::cuda::touch_merge(depth_out_, out.createTexture<float>(_getDepthChannel()), collisions_, 1024, touch_dist_, stream_);
 
 	// For each source depth map, verify results
-	for (size_t i=0; i < scene_->frames.size(); ++i) {
-		//if (!scene_->hasFrame(i)) continue;
-		auto &f = scene_->frames[i].cast<ftl::rgbd::Frame>();
-		//auto *s = scene_->sources[i];
-
-		if (!f.has(Channel::Colour)) {
-			//LOG(ERROR) << "Missing required channel";
-			continue;
-		}
+	if (value("carve_result", true)) {
+		for (size_t i=0; i < scene_->frames.size(); ++i) {
+			//if (!scene_->hasFrame(i)) continue;
+			auto &f = scene_->frames[i].cast<ftl::rgbd::Frame>();
+			//auto *s = scene_->sources[i];
+
+			if (!f.has(Channel::Colour)) {
+				//LOG(ERROR) << "Missing required channel";
+				continue;
+			}
 
-		// We have the needed depth data?
-		if (use_depth && !f.hasOwn(Channel::Depth) && !f.hasOwn(Channel::GroundTruth)) {
-			continue;
-		}
+			// We have the needed depth data?
+			if (use_depth && !f.hasOwn(Channel::Depth) && !f.hasOwn(Channel::GroundTruth)) {
+				continue;
+			}
 
-		//auto pose = MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>());
-		auto transformR = MatrixConversion::toCUDA(f.getPose().cast<float>().inverse() * t.cast<float>().inverse()) * poseInverse_;
-		auto transform = pose_ * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>());
+			//auto pose = MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>());
+			auto transformR = MatrixConversion::toCUDA(f.getPose().cast<float>().inverse() * t.cast<float>().inverse()) * poseInverse_;
+			auto transform = pose_ * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>());
 
-		ftl::cuda::reverse_verify(
-			out.getTexture<float>(_getDepthChannel()),
-			f.getTexture<float>(Channel::Depth),
-			transformR,
-			transform,
-			params_.camera,
-			f.getLeft(),
-			stream_
-		);
+			ftl::cuda::reverse_verify(
+				out.getTexture<float>(_getDepthChannel()),
+				f.getTexture<float>(Channel::Depth),
+				transformR,
+				transform,
+				params_.camera,
+				f.getLeft(),
+				stream_
+			);
+		}
 	}
 
 	//filters_->filter(out, src, stream);
 
 	// Generate normals for final virtual image
-	ftl::cuda::normals(
+	/*ftl::cuda::normals(
 		out.createTexture<half4>(_getNormalsChannel()),
 		temp_.createTexture<half4>(Channel::Normals),
 		out.getTexture<float>(_getDepthChannel()),
 		value("normal_radius", 1), value("normal_smoothing", 0.02f),
-		params_.camera, pose_.getFloat3x3(), poseInverse_.getFloat3x3(), stream_);
+		params_.camera, pose_.getFloat3x3(), poseInverse_.getFloat3x3(), stream_);*/
+
+	ftl::cuda::normals(
+		//temp_.createTexture<half4>(Channel::Normals),
+		out.createTexture<half4>(_getNormalsChannel()),
+		out.getTexture<float>(_getDepthChannel()),
+		params_.camera, stream_);
+
+	/*ftl::cuda::mls_smooth(
+		temp_.createTexture<half4>(Channel::Normals),
+		out.createTexture<half4>(_getNormalsChannel()),
+		out.getTexture<float>(_getDepthChannel()),
+		//out.getTexture<float>(_getDepthChannel()),
+		value("mls_smooth", 0.01f),
+		value("mls_radius", 2),
+		params_.camera,
+		stream_
+	);*/
+
+	ftl::cuda::transform_normals(
+		out.createTexture<half4>(_getNormalsChannel()),
+		poseInverse_.getFloat3x3(),
+		stream_
+	);
 }
 
 void CUDARender::_allocateChannels(ftl::rgbd::Frame &out, ftl::codecs::Channel chan) {
diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu
index 40ee1250e2b05e90c8f4d8122022493925ced613..6bc257aa01a347f642249234c6503da9b69ffa87 100644
--- a/components/renderers/cpp/src/triangle_render.cu
+++ b/components/renderers/cpp/src/triangle_render.cu
@@ -241,19 +241,24 @@ __global__ void reverse_check_kernel(
 
 	float d = depth_in.tex2D(x,y);
 	
-	float3 campos = transformR * vintrin.screenToCam(x,y,d);
-	int2 spos = ointrin.camToScreen<int2>(campos);
-	int ox = spos.x;
-	int oy = spos.y;
-
-	if (campos.z > 0.0f && ox >= 0 && ox < ointrin.width && oy >= 0 && oy < ointrin.height) {
-		float d2 = depth_original.tex2D(ox,oy);
-
-		if (d2 < ointrin.maxDepth && d2 - campos.z > d2*0.001f) {
-			//printf("Original %f, %f\n", d2, campos.z);
-			//depth_in(x,y) = 1.5f; //(transform * ointrin.screenToCam(ox,oy,d2)).z;
-			d = 0.0f;
-		}
+	// FIXME: This is dangerous, need to check through alternates instead
+	while (true) {
+		float3 campos = transformR * vintrin.screenToCam(x,y,d);
+		int2 spos = ointrin.camToScreen<int2>(campos);
+		int ox = spos.x;
+		int oy = spos.y;
+
+		if (campos.z > 0.0f && ox >= 0 && ox < ointrin.width && oy >= 0 && oy < ointrin.height) {
+			float d2 = depth_original.tex2D(ox,oy);
+
+			if (!(d2 < ointrin.maxDepth && d2 - campos.z > d2*0.001f)) {
+				//printf("Original %f, %f\n", d2, campos.z);
+				//depth_in(x,y) = 1.5f; //(transform * ointrin.screenToCam(ox,oy,d2)).z;
+				//d = 0.0f;
+				break;
+			}
+			d += 0.002f;
+		} else break;
 	}
 
 	depth_in(x,y) = d;