diff --git a/components/operators/src/correspondence.cu b/components/operators/src/correspondence.cu
index 1611a8483f8faf2bcb6b56640c3f7a07edc3fb22..05d60106059df6855b65003af076bb751176e56f 100644
--- a/components/operators/src/correspondence.cu
+++ b/components/operators/src/correspondence.cu
@@ -64,6 +64,15 @@ __device__ inline int halfWarpCensus(float e) {
 	return c;
 }
 
+__device__ inline float halfWarpBest(float e, float c) {
+	for (int i = WARP_SIZE/4; i > 0; i /= 2) {
+		const float o1 = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
+		const float o2 = __shfl_xor_sync(FULL_MASK, c, i, WARP_SIZE);
+		e = (o2 > c) ? o1 : e;
+	}
+	return e;
+}
+
 __device__ inline float4 relativeDelta(const float4 &e) {
 	const float e0x = __shfl_sync(FULL_MASK, e.x, 0, WARP_SIZE/2);
 	const float e0y = __shfl_sync(FULL_MASK, e.y, 0, WARP_SIZE/2);
@@ -142,14 +151,14 @@ __global__ void corresponding_point_kernel(
         float depthPos2 = camPosOrigin.z - (float((COR_STEPS/2)) * depthM2);
         
         uint badMask = 0;
-        int bestStep = 0;
+        int bestStep = COR_STEPS/2;
 
 
         // Project to p2 using cam2
         // Each thread takes a possible correspondence and calculates a weighting
         //const int lane = tid % WARP_SIZE;
         for (int i=0; i<COR_STEPS; ++i) {			
-			float weight = (linePos.x >= cam2.width || linePos.y >= cam2.height) ? 0.0f : 1.0f;
+			//float weight = 1.0f; //(linePos.x >= cam2.width || linePos.y >= cam2.height) ? 0.0f : 1.0f;
 
 			// Generate a colour correspondence value
             const auto colour2 = c2.tex2D(linePos.x, linePos.y);
@@ -161,10 +170,18 @@ __global__ void corresponding_point_kernel(
             const float depth2 = d2.tex2D(int(linePos.x+0.5f), int(linePos.y+0.5f));
             
             // Record which correspondences are invalid
-            badMask |= (depth2 <= cam2.minDepth || depth2 >= cam2.maxDepth) ? 1 << i : 0;
+            badMask |= (
+					depth2 <= cam2.minDepth ||
+					depth2 >= cam2.maxDepth ||
+					linePos.x < 0.5f ||
+					linePos.y < 0.5f ||
+					linePos.x >= d2.width()-0.5f ||
+					linePos.y >= d2.height()-0.5f
+				) ? 1 << i : 0;
 			
 			//if (FUNCTION == 1) {
-				weight *= ftl::cuda::weighting(fabs(depth2 - depthPos2), cweight*params.spatial_smooth);
+			float weight = ftl::cuda::weighting(fabs(depth2 - depthPos2), cweight*params.spatial_smooth);
+			//weight = ftl::cuda::halfWarpSum(weight);
 			//} else {
 			//	const float dweight = ftl::cuda::weighting(fabs(depth2 - depthPos2), params.spatial_smooth);
             //	weight *= weightFunction<FUNCTION>(params, dweight, cweight);
@@ -191,23 +208,26 @@ __global__ void corresponding_point_kernel(
         }
 
         //const float avgcolour = totalcolour/(float)count;
-        const float confidence = bestcolour / totalcolour; //bestcolour - avgcolour;
-        const float bestadjust = float(bestStep-(COR_STEPS/2))*depthM;
+        const float confidence = ((bestcolour / totalcolour) - (1.0f / 16.0f)) * (1.0f + (1.0f/16.0f));
+        float bestadjust = float(bestStep-(COR_STEPS/2))*depthM;
 
         // Detect matches to boundaries, and discard those
         uint stepMask = 1 << bestStep;
-        if ((stepMask & (badMask << 1)) || (stepMask & (badMask >> 1))) bestweight = 0.0f;
+		if ((stepMask & badMask) || (stepMask & (badMask << 1)) || (stepMask & (badMask >> 1))) bestweight = 0.0f;
+		
+		//bestadjust = halfWarpBest(bestadjust, (bestweight > 0.0f) ? confidence : 0.0f);
 
         //Mask m(mask.tex2D(x,y));
 
         //if (bestweight > 0.0f) {
             float old = conf.tex2D(x,y);
 
-            if (bestweight * confidence > old) {
+            if (bestweight > 0.0f) {
 				d1(x,y) = (0.4f*bestadjust) + depth1;
 				//d2(bestScreen.x, bestScreen.y) = bestdepth2;
                 //screenOut(x,y) = bestScreen;
-                conf(x,y) = bestweight * confidence;
+				conf(x,y) = max(old,confidence); //bestweight * confidence;
+				//conf(x,y) = max(old,fabs(bestadjust));
             }
         //}
         
diff --git a/components/operators/src/mvmls.cpp b/components/operators/src/mvmls.cpp
index 4a61e63223bc14e07b9dd90695a6600b4be99b7e..8ff3c89dd8ce9f10882e4c5386b4de631f7836b9 100644
--- a/components/operators/src/mvmls.cpp
+++ b/components/operators/src/mvmls.cpp
@@ -72,6 +72,8 @@ bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda
         f.createTexture<float>(Channel::Confidence);
         f.create<GpuMat>(Channel::Screen, Format<short2>(size));
         f.createTexture<short2>(Channel::Screen);
+
+        f.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream);
     }
 
     for (int iter=0; iter<iters; ++iter) {
@@ -84,7 +86,7 @@ bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda
             for (size_t i=0; i<in.frames.size(); ++i) {
                 auto &f1 = in.frames[i];
                 //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream);
-                f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream);
+                //f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream);
 
                 Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0);
                 d1 = in.sources[i]->getPose() * d1;
diff --git a/components/renderers/cpp/include/ftl/cuda/warp.hpp b/components/renderers/cpp/include/ftl/cuda/warp.hpp
index 8c0fdef9802c1ffb86e226c803d806ea56ea003e..10fc460f3f33a30fadce45912caf72d4a7a86ac3 100644
--- a/components/renderers/cpp/include/ftl/cuda/warp.hpp
+++ b/components/renderers/cpp/include/ftl/cuda/warp.hpp
@@ -76,6 +76,14 @@ __device__ inline int halfWarpSum(int e) {
 	return e;
 }
 
+__device__ inline float halfWarpMul(float e) {
+	for (int i = WARP_SIZE/4; i > 0; i /= 2) {
+		const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
+		e *= other;
+	}
+	return e;
+}
+
 
 }
 }
diff --git a/components/renderers/cpp/src/reprojection.cu b/components/renderers/cpp/src/reprojection.cu
index b23ad81e9c462f9d4aeb2b6a83d53331487b1db5..9c414f8927572f93795ea4bfb2e17c491f2deb44 100644
--- a/components/renderers/cpp/src/reprojection.cu
+++ b/components/renderers/cpp/src/reprojection.cu
@@ -97,7 +97,8 @@ __global__ void reprojection_kernel(
 	const auto input = in.tex2D(screenPos.x, screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos);
 
 	// TODO: Z checks need to interpolate between neighbors if large triangles are used
-	float weight = ftl::cuda::weighting(fabs(camPos.z - d2), 0.02f);
+	//float weight = ftl::cuda::weighting(fabs(camPos.z - d2), params.depthThreshold);
+	float weight = (fabs(camPos.z - d2) <= params.depthThreshold) ? 1.0f : 0.0f;
 
 	/* Buehler C. et al. 2001. Unstructured Lumigraph Rendering. */
 	/* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. */
diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu
index 2e1966c4252fcf751639cc70a94dbfbdccd43b3b..61f44f320e0b889649d1f82c5f2c090d326e1221 100644
--- a/components/renderers/cpp/src/triangle_render.cu
+++ b/components/renderers/cpp/src/triangle_render.cu
@@ -77,7 +77,7 @@ __device__ static
  float3 calculateBarycentricCoordinate(const short2 (&tri)[3], const short2 &point) {
 	 float beta = calculateBarycentricCoordinateValue(tri[0], point, tri[2], tri);
 	 float gamma = calculateBarycentricCoordinateValue(tri[0], tri[1], point, tri);
-	 float alpha = 1.0 - beta - gamma;
+	 float alpha = 1.0f - beta - gamma;
 	 return make_float3(alpha, beta, gamma);
  }
  
@@ -86,9 +86,9 @@ __device__ static
   */
  __host__ __device__ static
  bool isBarycentricCoordInBounds(const float3 &barycentricCoord) {
-	 return barycentricCoord.x >= 0.0 && barycentricCoord.x <= 1.0 &&
-			barycentricCoord.y >= 0.0 && barycentricCoord.y <= 1.0 &&
-			barycentricCoord.z >= 0.0 && barycentricCoord.z <= 1.0;
+	 return barycentricCoord.x >= -0.0001f && //barycentricCoord.x <= 1.0f &&
+			barycentricCoord.y >= -0.0001f && //barycentricCoord.y <= 1.0f &&
+			barycentricCoord.z >= -0.0001f; // &&barycentricCoord.z <= 1.0f;
  }
 
  /**
@@ -137,6 +137,11 @@ float getZAtCoordinate(const float3 &barycentricCoord, const float (&tri)[3]) {
 	const int maxX = max(v[0].x, max(v[1].x, v[2].x));
 	const int maxY = max(v[0].y, max(v[1].y, v[2].y));
 
+	// Ensure the points themselves are drawn
+	//atomicMin(&depth_out(v[0].x,v[0].y), int(d[0]*100000.0f));
+	//atomicMin(&depth_out(v[1].x,v[1].y), int(d[1]*100000.0f));
+	//atomicMin(&depth_out(v[2].x,v[2].y), int(d[2]*100000.0f));
+
 	// Remove really large triangles
 	if ((maxX - minX) * (maxY - minY) > params.triangle_limit) return;