diff --git a/components/renderers/cpp/src/CUDARender.cpp b/components/renderers/cpp/src/CUDARender.cpp
index 9dab392f3b651a6250e5d455986d8d8efff0348c..6d6fccf6a1e58c22b66cf43d2f2819a54416aef7 100644
--- a/components/renderers/cpp/src/CUDARender.cpp
+++ b/components/renderers/cpp/src/CUDARender.cpp
@@ -425,12 +425,15 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 	ftl::cuda::touch_merge(depth_out_, out.createTexture<float>(_getDepthChannel()), collisions_, 1024, touch_dist_, stream_);
 
 	// Generate actual depth map using MLS with mesh as estimate
-	float mls_smoothing = value("mls_smooth", 0.005f);
+	float mls_smoothing = value("mls_smooth", 0.05f);
+	int mls_iter = value("mls_iter", 3);
 	if (value("mls_full", true)) {
 		// Clear buffers
 		mls_centroid_.create(params_.camera.height, params_.camera.width, CV_32FC4);
 		mls_contrib_.create(params_.camera.height, params_.camera.width, CV_32F);
 		mls_normals_.create(params_.camera.height, params_.camera.width, CV_16FC4);
+
+		for (int iter=0; iter<mls_iter; ++iter) {
 		mls_centroid_.setTo(cv::Scalar(0,0,0,0), cvstream);
 		mls_contrib_.setTo(cv::Scalar(0), cvstream);
 		mls_normals_.setTo(cv::Scalar(0,0,0,0), cvstream);
@@ -450,6 +453,7 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 			auto transformR = MatrixConversion::toCUDA(f.getPose().cast<float>().inverse() * t.cast<float>().inverse()) * poseInverse_;
 			// Original to VCAM
 			auto transform = pose_ * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>());
+			auto transform33 = pose_.getFloat3x3() * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>()).getFloat3x3();
 
 			ftl::cuda::mls_gather(
 				f.get<cv::cuda::GpuMat>(Channel::Normals),
@@ -461,6 +465,7 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 				mls_smoothing,
 				transformR,
 				transform,
+				transform33,
 				params_.camera,
 				f.getLeft(),
 				stream_
@@ -477,6 +482,7 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 			params_.camera,
 			stream_
 		);
+		}
 
 	} else {
 		int mls_radius = value("mls_radius", 0);
diff --git a/components/renderers/cpp/src/carver.cu b/components/renderers/cpp/src/carver.cu
index edc4a5315cdf6df8382895d524735a4039fa6520..12faea3299d8fa31fcb4fa9e338a513ff07275a6 100644
--- a/components/renderers/cpp/src/carver.cu
+++ b/components/renderers/cpp/src/carver.cu
@@ -187,6 +187,7 @@ void ftl::cuda::depth_carve(
 	float smoothing,
 	float4x4 o_2_in,
 	float4x4 in_2_o,
+	float3x3 in_2_o33,
 	ftl::rgbd::Camera camera_origin,
 	ftl::rgbd::Camera camera_in,
 	int npitch_out,
@@ -220,7 +221,7 @@ void ftl::cuda::depth_carve(
 
 		// Point and normal of neighbour
 		const float3 Xi = in_2_o * camera_in.screenToCam(s.x+u, s.y+v, d);
-		const float3 Ni = in_2_o.getFloat3x3() * make_float3(normals_in[s.x+u+(s.y+v)*npitch_in]);
+		const float3 Ni = in_2_o33 * make_float3(normals_in[s.x+u+(s.y+v)*npitch_in]);
 
 		// Gauss approx weighting function using point distance
 		const float w = (Ni.x+Ni.y+Ni.z > 0.0f) ? ftl::cuda::spatialWeighting(X,Xi,smoothing) : 0.0f;
@@ -264,7 +265,7 @@ __global__ void mls_reduce_kernel(
 		normals_out[x+y*npitch] = make_half4(0.0f, 0.0f, 0.0f, 0.0f);
 
 		float d0 = depth[x+y*dpitch];
-		if (d0 < camera.minDepth || d0 > camera.maxDepth) return;
+		if (d0 < camera.minDepth || d0 > camera.maxDepth || contrib == 0.0f) return;
 		float3 X = camera.screenToCam((int)(x),(int)(y),d0);
 		
 		nX /= contrib;  // Weighted average normal
@@ -293,6 +294,7 @@ void ftl::cuda::mls_gather(
 	float smoothing,
 	const float4x4 &o_2_in,
 	const float4x4 &in_2_o,
+	const float3x3 &in_2_o33,
 	const ftl::rgbd::Camera &camera_origin,  // Virtual camera
 	const ftl::rgbd::Camera &camera_in,
 	cudaStream_t stream
@@ -305,7 +307,7 @@ void ftl::cuda::mls_gather(
 	centroid_out.create(depth_origin.size(), CV_32FC4);
 	contrib_out.create(depth_origin.size(), CV_32F);
 
-	mls_gather_kernel<2><<<gridSize, blockSize, 0, stream>>>(
+	mls_gather_kernel<3><<<gridSize, blockSize, 0, stream>>>(
 		normals_in.ptr<half4>(),
 		normals_out.ptr<half4>(),
 		depth_origin.ptr<float>(),
@@ -315,6 +317,7 @@ void ftl::cuda::mls_gather(
 		smoothing,
 		o_2_in,
 		in_2_o,
+		in_2_o33,
 		camera_origin,
 		camera_in,
 		normals_out.step1()/4,
diff --git a/components/renderers/cpp/src/carver.hpp b/components/renderers/cpp/src/carver.hpp
index 41e39350135cd68937bb9aed5afec56e423a8470..3f9a66ec567b20ca6ae71869e6c2d6086644ce93 100644
--- a/components/renderers/cpp/src/carver.hpp
+++ b/components/renderers/cpp/src/carver.hpp
@@ -48,6 +48,7 @@ void mls_gather(
 	float smoothing,
 	const float4x4 &o_2_in,
 	const float4x4 &in_2_o,
+	const float3x3 &in_2_o33,
 	const ftl::rgbd::Camera &camera_origin,  // Virtual camera
 	const ftl::rgbd::Camera &camera_in,
 	cudaStream_t stream