diff --git a/components/renderers/cpp/include/ftl/render/CUDARender.hpp b/components/renderers/cpp/include/ftl/render/CUDARender.hpp
index 7369d5c9cc9762159b6aaf241f1768ac908fda3c..59eb1377685af76a5eed1d78314dd5c9c76b698d 100644
--- a/components/renderers/cpp/include/ftl/render/CUDARender.hpp
+++ b/components/renderers/cpp/include/ftl/render/CUDARender.hpp
@@ -59,6 +59,9 @@ class CUDARender : public ftl::render::FSRenderer {
 	ftl::cuda::TextureObject<int> contrib_;
 	//ftl::cuda::TextureObject<half4> normals_;
 	cv::cuda::GpuMat colour_scale_;
+	cv::cuda::GpuMat mls_contrib_;
+	cv::cuda::GpuMat mls_centroid_;
+	cv::cuda::GpuMat mls_normals_;
 
 	std::list<cv::cuda::GpuMat*> screen_buffers_;
 	std::list<cv::cuda::GpuMat*> depth_buffers_;
diff --git a/components/renderers/cpp/src/CUDARender.cpp b/components/renderers/cpp/src/CUDARender.cpp
index 9ee7b0457d13acd5cffa8ea33789224a1c6d235a..9dab392f3b651a6250e5d455986d8d8efff0348c 100644
--- a/components/renderers/cpp/src/CUDARender.cpp
+++ b/components/renderers/cpp/src/CUDARender.cpp
@@ -308,11 +308,11 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 				);
 			}
 
-			ftl::cuda::apply_colour_scaling(colour_scale_, f.create<cv::cuda::GpuMat>(Channel::Colour), 3, stream_);
+			//ftl::cuda::apply_colour_scaling(colour_scale_, f.create<cv::cuda::GpuMat>(Channel::Colour), 3, stream_);
 		}
 	}
 
-	if (!colour_scale_.empty()) ftl::utility::show_image(colour_scale_, "CScale", 1.0f, ftl::utility::ImageVisualisation::HEAT_MAPPED);
+	//if (!colour_scale_.empty()) ftl::utility::show_image(colour_scale_, "CScale", 1.0f, ftl::utility::ImageVisualisation::HEAT_MAPPED);
 
 	// For each source depth map
 	for (size_t i=0; i < scene_->frames.size(); ++i) {
@@ -424,17 +424,21 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 	// Now merge new render to any existing frameset render, detecting collisions
 	ftl::cuda::touch_merge(depth_out_, out.createTexture<float>(_getDepthChannel()), collisions_, 1024, touch_dist_, stream_);
 
-	// For each source depth map, verify results
-	/*if (value("post_carve_result", false)) {
+	// Generate actual depth map using MLS with mesh as estimate
+	float mls_smoothing = value("mls_smooth", 0.005f);
+	if (value("mls_full", true)) {
+		// Clear buffers
+		mls_centroid_.create(params_.camera.height, params_.camera.width, CV_32FC4);
+		mls_contrib_.create(params_.camera.height, params_.camera.width, CV_32F);
+		mls_normals_.create(params_.camera.height, params_.camera.width, CV_16FC4);
+		mls_centroid_.setTo(cv::Scalar(0,0,0,0), cvstream);
+		mls_contrib_.setTo(cv::Scalar(0), cvstream);
+		mls_normals_.setTo(cv::Scalar(0,0,0,0), cvstream);
+
 		for (size_t i=0; i < scene_->frames.size(); ++i) {
-			//if (!scene_->hasFrame(i)) continue;
 			auto &f = scene_->frames[i].cast<ftl::rgbd::Frame>();
-			//auto *s = scene_->sources[i];
 
-			if (!f.has(Channel::Colour)) {
-				//LOG(ERROR) << "Missing required channel";
-				continue;
-			}
+			if (!f.has(Channel::Colour)) continue;
 
 			// We have the needed depth data?
 			if (use_depth && !f.hasOwn(Channel::Depth) && !f.hasOwn(Channel::GroundTruth)) {
@@ -442,54 +446,63 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 			}
 
 			//auto pose = MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>());
+			// VCAM to Original
 			auto transformR = MatrixConversion::toCUDA(f.getPose().cast<float>().inverse() * t.cast<float>().inverse()) * poseInverse_;
-			//auto transform = pose_ * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>());
+			// Original to VCAM
+			auto transform = pose_ * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>());
 
-			ftl::cuda::depth_carve(
-				out.create<cv::cuda::GpuMat>(_getDepthChannel()),
+			ftl::cuda::mls_gather(
+				f.get<cv::cuda::GpuMat>(Channel::Normals),
+				mls_normals_,
+				out.get<cv::cuda::GpuMat>(_getDepthChannel()),
 				f.get<cv::cuda::GpuMat>(Channel::Depth),
+				mls_centroid_,
+				mls_contrib_,
+				mls_smoothing,
 				transformR,
+				transform,
 				params_.camera,
 				f.getLeft(),
 				stream_
 			);
 		}
-	}*/
-
-	//filters_->filter(out, src, stream);
-
-	// Generate normals for final virtual image
-	/*ftl::cuda::normals(
-		out.createTexture<half4>(_getNormalsChannel()),
-		temp_.createTexture<half4>(Channel::Normals),
-		out.getTexture<float>(_getDepthChannel()),
-		value("normal_radius", 1), value("normal_smoothing", 0.02f),
-		params_.camera, pose_.getFloat3x3(), poseInverse_.getFloat3x3(), stream_);*/
-
-	float mls_smoothing = value("mls_smooth", 0.01f);
-	int mls_radius = value("mls_radius", 0);
 
-	if (mls_radius == 0) {
-		ftl::cuda::normals(
-			out.createTexture<half4>(_getNormalsChannel()),
-			out.getTexture<float>(_getDepthChannel()),
-			params_.camera, stream_);
-	} else {
-		ftl::cuda::normals(
-			temp_.createTexture<half4>(Channel::Normals),
-			out.getTexture<float>(_getDepthChannel()),
-			params_.camera, stream_);
-
-		ftl::cuda::mls_smooth(
-			temp_.createTexture<half4>(Channel::Normals),
-			out.createTexture<half4>(_getNormalsChannel()),
-			out.getTexture<float>(_getDepthChannel()),
-			//out.getTexture<float>(_getDepthChannel()),
-			value("mls_smooth", 0.01f),
-			value("mls_radius", 2),
+		// Now reduce MLS results to new depth+normals
+		ftl::cuda::mls_reduce(
+			mls_centroid_,
+			mls_normals_,
+			mls_contrib_,
+			out.create<cv::cuda::GpuMat>(_getNormalsChannel()),
+			out.create<cv::cuda::GpuMat>(_getDepthChannel()),
 			params_.camera,
 			stream_
 		);
+
+	} else {
+		int mls_radius = value("mls_radius", 0);
+
+		if (mls_radius == 0) {
+			ftl::cuda::normals(
+				out.createTexture<half4>(_getNormalsChannel()),
+				out.getTexture<float>(_getDepthChannel()),
+				params_.camera, stream_);
+		} else {
+			ftl::cuda::normals(
+				temp_.createTexture<half4>(Channel::Normals),
+				out.getTexture<float>(_getDepthChannel()),
+				params_.camera, stream_);
+
+			ftl::cuda::mls_smooth(
+				temp_.createTexture<half4>(Channel::Normals),
+				out.createTexture<half4>(_getNormalsChannel()),
+				out.getTexture<float>(_getDepthChannel()),
+				//out.getTexture<float>(_getDepthChannel()),
+				value("mls_smooth", 0.01f),
+				value("mls_radius", 2),
+				params_.camera,
+				stream_
+			);
+		}
 	}
 
 	ftl::cuda::transform_normals(
diff --git a/components/renderers/cpp/src/carver.cu b/components/renderers/cpp/src/carver.cu
index 79c78fc36823f16f3138edfb14eef738161940fa..edc4a5315cdf6df8382895d524735a4039fa6520 100644
--- a/components/renderers/cpp/src/carver.cu
+++ b/components/renderers/cpp/src/carver.cu
@@ -1,5 +1,6 @@
 #include "carver.hpp"
 #include <cudatl/fixed.hpp>
+#include <ftl/cuda/weighting.hpp>
 
 __device__ inline float depthErrorCoef(const ftl::rgbd::Camera &cam, float disps=1.0f) {
 	return disps / (cam.baseline*cam.fx);
@@ -103,9 +104,9 @@ __global__ void reverse_check_kernel(
 	}
 
 	// We found a match, so do a colour check
-	float idiff = 0.0f;
+	//float idiff = 127.0f;
 	//if (match) {
-		// Generate colour scaling
+	/*	// Generate colour scaling
 		const float ximgscale = float(cwidth) / float(ointrin.width);
 		ox = float(ox) * ximgscale;
 		const float yimgscale = float(cheight) / float(ointrin.height);
@@ -126,10 +127,10 @@ __global__ void reverse_check_kernel(
 		//const float scaleZ = (vcol.z == 0) ? 1.0f : float(ocol.z) / float(vcol.z);
 		//scale = (0.2126f*scaleZ + 0.7152f*scaleY + 0.0722f*scaleX);
 	//}
-	colour_scale[x+pitch*y] = int8_t(max(-127.0f,min(127.0f,idiff)));
+	colour_scale[x+pitch*y] = int8_t(max(-127.0f,min(127.0f,idiff)));*/
 
 	// Too much carving means just outright remove the point.
-	depth_in[y*pitch4+x] = (count < 0 || fabsf(idiff) > 50.0f) ? 0.0f : d;
+	depth_in[y*pitch4+x] = (count < 0) ? 0.0f : d;
 }
 
 void ftl::cuda::depth_carve(
@@ -170,6 +171,193 @@ void ftl::cuda::depth_carve(
 	cudaSafeCall( cudaGetLastError() );
 }
 
+// ==== Multi image MLS ========================================================
+
+/*
+ * Gather points for Moving Least Squares, from each source image
+ */
+ template <int SEARCH_RADIUS>
+ __global__ void mls_gather_kernel(
+	const half4* __restrict__ normals_in,
+	half4* __restrict__ normals_out,
+	const float* __restrict__ depth_origin,
+	const float* __restrict__ depth_in,
+	float4* __restrict__ centroid_out,
+	float* __restrict__ contrib_out,
+	float smoothing,
+	float4x4 o_2_in,
+	float4x4 in_2_o,
+	ftl::rgbd::Camera camera_origin,
+	ftl::rgbd::Camera camera_in,
+	int npitch_out,
+	int cpitch_out,
+	int wpitch_out,
+	int dpitch_o,
+	int dpitch_i,
+	int npitch_in
+) {        
+    const int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x < 0 || y < 0 || x >= camera_origin.width || y >= camera_origin.height) return;
+
+	float3 nX = make_float3(normals_out[y*npitch_out+x]);
+	float3 aX = make_float3(centroid_out[y*cpitch_out+x]);
+    float contrib = contrib_out[y*wpitch_out+x];
+
+	float d0 = depth_origin[x+y*dpitch_o];
+	if (d0 <= camera_origin.minDepth || d0 >= camera_origin.maxDepth) return;
+
+	float3 X = camera_origin.screenToCam((int)(x),(int)(y),d0);
+
+	int2 s = camera_in.camToScreen<int2>(o_2_in * X);
+
+    // Neighbourhood
+    for (int v=-SEARCH_RADIUS; v<=SEARCH_RADIUS; ++v) {
+    for (int u=-SEARCH_RADIUS; u<=SEARCH_RADIUS; ++u) {
+		const float d = (s.x+u >= 0 && s.x+u < camera_in.width && s.y+v >= 0 && s.y+v < camera_in.height) ? depth_in[s.x+u+(s.y+v)*dpitch_i] : 0.0f;
+		if (d <= camera_in.minDepth || d >= camera_in.maxDepth) continue;
+
+		// Point and normal of neighbour
+		const float3 Xi = in_2_o * camera_in.screenToCam(s.x+u, s.y+v, d);
+		const float3 Ni = in_2_o.getFloat3x3() * make_float3(normals_in[s.x+u+(s.y+v)*npitch_in]);
+
+		// Gauss approx weighting function using point distance
+		const float w = (Ni.x+Ni.y+Ni.z > 0.0f) ? ftl::cuda::spatialWeighting(X,Xi,smoothing) : 0.0f;
+
+		aX += Xi*w;
+		nX += Ni*w;
+		contrib += w;
+    }
+	}
+
+	normals_out[y*npitch_out+x] = make_half4(nX, 0.0f);
+	centroid_out[y*cpitch_out+x] = make_float4(aX, 0.0f);
+	contrib_out[y*wpitch_out+x] = contrib;
+}
+
+/**
+ * Convert accumulated values into estimate of depth and normals at pixel.
+ */
+__global__ void mls_reduce_kernel(
+	const float4* __restrict__ centroid,
+	const half4* __restrict__ normals,
+	const float* __restrict__ contrib_out,
+	half4* __restrict__ normals_out,
+	float* __restrict__ depth,
+	ftl::rgbd::Camera camera,
+	int npitch_in,
+	int cpitch_in,
+	int wpitch,
+	int npitch,
+	int dpitch
+) {
+	const int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x >= 0 && y >= 0 && x < camera.width && y < camera.height) {
+		float3 nX = make_float3(normals[y*npitch_in+x]);
+		float3 aX = make_float3(centroid[y*cpitch_in+x]);
+		float contrib = contrib_out[y*wpitch+x];
+
+		//depth[x+y*dpitch] = X.z;
+		normals_out[x+y*npitch] = make_half4(0.0f, 0.0f, 0.0f, 0.0f);
+
+		float d0 = depth[x+y*dpitch];
+		if (d0 < camera.minDepth || d0 > camera.maxDepth) return;
+		float3 X = camera.screenToCam((int)(x),(int)(y),d0);
+		
+		nX /= contrib;  // Weighted average normal
+		aX /= contrib;  // Weighted average point (centroid)
+
+		// Signed-Distance Field function
+		float fX = nX.x * (X.x - aX.x) + nX.y * (X.y - aX.y) + nX.z * (X.z - aX.z);
+
+		// Calculate new point using SDF function to adjust depth (and position)
+		X = X - nX * fX;
+
+		depth[x+y*dpitch] = X.z;
+		normals_out[x+y*npitch] = make_half4(nX / length(nX), 0.0f);
+	}
+}
+
+#define T_PER_BLOCK 8
+
+void ftl::cuda::mls_gather(
+	const cv::cuda::GpuMat &normals_in,		// Source frame
+	cv::cuda::GpuMat &normals_out,
+	const cv::cuda::GpuMat &depth_origin,  // Rendered image
+	const cv::cuda::GpuMat &depth_in,
+	cv::cuda::GpuMat &centroid_out,
+	cv::cuda::GpuMat &contrib_out,
+	float smoothing,
+	const float4x4 &o_2_in,
+	const float4x4 &in_2_o,
+	const ftl::rgbd::Camera &camera_origin,  // Virtual camera
+	const ftl::rgbd::Camera &camera_in,
+	cudaStream_t stream
+) {
+
+	const dim3 gridSize((depth_origin.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_origin.rows + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	normals_out.create(depth_origin.size(), CV_16FC4);
+	centroid_out.create(depth_origin.size(), CV_32FC4);
+	contrib_out.create(depth_origin.size(), CV_32F);
+
+	mls_gather_kernel<2><<<gridSize, blockSize, 0, stream>>>(
+		normals_in.ptr<half4>(),
+		normals_out.ptr<half4>(),
+		depth_origin.ptr<float>(),
+		depth_in.ptr<float>(),
+		centroid_out.ptr<float4>(),
+		contrib_out.ptr<float>(),
+		smoothing,
+		o_2_in,
+		in_2_o,
+		camera_origin,
+		camera_in,
+		normals_out.step1()/4,
+		centroid_out.step1()/4,
+		contrib_out.step1(),
+		depth_origin.step1(),
+		depth_in.step1(),
+		normals_in.step1()/4
+	);
+	cudaSafeCall( cudaGetLastError() );
+}
+
+void ftl::cuda::mls_reduce(
+	const cv::cuda::GpuMat &centroid,
+	const cv::cuda::GpuMat &normals,
+	const cv::cuda::GpuMat &contrib,
+	cv::cuda::GpuMat &normals_out,
+	cv::cuda::GpuMat &depth,
+	const ftl::rgbd::Camera &camera,
+	cudaStream_t stream
+) {
+
+	const dim3 gridSize((depth.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.rows + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	normals_out.create(depth.size(), CV_16FC4);
+
+	mls_reduce_kernel<<<gridSize, blockSize, 0, stream>>>(
+		centroid.ptr<float4>(),
+		normals.ptr<half4>(),
+		contrib.ptr<float>(),
+		normals_out.ptr<half4>(),
+		depth.ptr<float>(),
+		camera,
+		normals.step1()/4,
+		centroid.step1()/4,
+		contrib.step1(),
+		normals_out.step1()/4,
+		depth.step1()
+	);
+	cudaSafeCall( cudaGetLastError() );
+}
+
 // ==== Apply colour scale =====================================================
 
 template <int RADIUS>
@@ -191,23 +379,27 @@ __global__ void apply_colour_scaling_kernel(
 		int sy = (float(sheight) / float(cheight)) * float(y);
 
 		float s = 0.0f;
+		int count = 0;
 		//float mindiff = 100.0f;
 
 		for (int v=-RADIUS; v<=RADIUS; ++v) {
 			#pragma unroll
 			for (int u=-RADIUS; u<=RADIUS; ++u) {
 				float ns = (sx >= RADIUS && sy >= RADIUS && sx < swidth-RADIUS && sy < sheight-RADIUS) ? scale[sx+u+(sy+v)*spitch] : 0.0f;
-				s += ns;
+				if (fabsf(ns) < 30) {
+					s += ns;
+					++count;
+				}
 			}
 		}
 
-		s /= float((2*RADIUS+1)*(2*RADIUS+1));
+		if (count > 0) s /= float(count);
 
 		uchar4 c = colour[x+y*cpitch];
 		colour[x+y*cpitch] = make_uchar4(
-			max(0.0f, min(255.0f, float(c.x) + 0.0722f*s)),
-			max(0.0f, min(255.0f, float(c.y) + 0.7152f*s)),
-			max(0.0f, min(255.0f, float(c.z) + 0.2126f*s)),
+			max(0.0f, min(255.0f, float(c.x) + s)),
+			max(0.0f, min(255.0f, float(c.y) + s)),
+			max(0.0f, min(255.0f, float(c.z) + s)),
 			255.0f
 		);
 	}
diff --git a/components/renderers/cpp/src/carver.hpp b/components/renderers/cpp/src/carver.hpp
index a50a8d396d725db63572df3cfca7a070d62d4f35..41e39350135cd68937bb9aed5afec56e423a8470 100644
--- a/components/renderers/cpp/src/carver.hpp
+++ b/components/renderers/cpp/src/carver.hpp
@@ -28,6 +28,31 @@ void apply_colour_scaling(
 	int radius,
 	cudaStream_t stream);
 
+void mls_reduce(
+	const cv::cuda::GpuMat &centroid,
+	const cv::cuda::GpuMat &normals,
+	const cv::cuda::GpuMat &contrib,
+	cv::cuda::GpuMat &normals_out,
+	cv::cuda::GpuMat &depth,
+	const ftl::rgbd::Camera &camera,
+	cudaStream_t stream
+);
+
+void mls_gather(
+	const cv::cuda::GpuMat &normals_in,		// Source frame
+	cv::cuda::GpuMat &normals_out,
+	const cv::cuda::GpuMat &depth_origin,  // Rendered image
+	const cv::cuda::GpuMat &depth_in,
+	cv::cuda::GpuMat &centroid_out,
+	cv::cuda::GpuMat &contrib_out,
+	float smoothing,
+	const float4x4 &o_2_in,
+	const float4x4 &in_2_o,
+	const ftl::rgbd::Camera &camera_origin,  // Virtual camera
+	const ftl::rgbd::Camera &camera_in,
+	cudaStream_t stream
+);
+
 }
 }