diff --git a/applications/reconstruct/include/ftl/voxel_hash.hpp b/applications/reconstruct/include/ftl/voxel_hash.hpp
index c0a228834337e60f5807313735eedee25eb51dcb..40b981740caef12869ad977f42293276879578cb 100644
--- a/applications/reconstruct/include/ftl/voxel_hash.hpp
+++ b/applications/reconstruct/include/ftl/voxel_hash.hpp
@@ -73,6 +73,7 @@ struct __align__(16) HashEntry
 {
 	HashEntryHead head;
 	uint voxels[16];  // 512 bits, 1 bit per voxel
+	uint validity[16];  // Is the voxel valid, 512 bit
 	
 	/*__device__ void operator=(const struct HashEntry& e) {
 		((long long*)this)[0] = ((const long long*)&e)[0];
diff --git a/applications/reconstruct/src/integrators.cu b/applications/reconstruct/src/integrators.cu
index 984fd29dce023bd1491c2637b73b98accaf303d9..9abb28c8431893f9d33ce5da54990002b8b81c13 100644
--- a/applications/reconstruct/src/integrators.cu
+++ b/applications/reconstruct/src/integrators.cu
@@ -47,10 +47,10 @@ __device__ bool colordiff(const uchar4 &pa, const uchar3 &pb, float epsilon) {
  */
 __device__ float spatialWeighting(float r) {
 	const float h = c_hashParams.m_spatialSmoothing;
+	if (r >= h) return 0.0f;
 	float rh = r / h;
 	rh = 1.0f - rh*rh;
-	rh = rh*rh*rh*rh;
-	return (rh < h) ? rh : 0.0f;
+	return rh*rh*rh*rh;
 }
 
 
@@ -103,6 +103,10 @@ __global__ void integrateDepthMapsKernel(HashData hashData, HashParams hashParam
 			//printf("screen pos %d\n", color.x);
 			//return;
 
+			// TODO:(Nick) Accumulate weighted positions
+			// TODO:(Nick) Accumulate weighted normals
+			// TODO:(Nick) Accumulate weights
+
 			// Depth is within accepted max distance from camera
 			if (depth > 0.01f && depth < hashParams.m_maxIntegrationDistance) { // valid depth and color (Nick: removed colour check)
 				//camdepths[count] = depth;
@@ -181,6 +185,102 @@ __global__ void integrateDepthMapsKernel(HashData hashData, HashParams hashParam
 	}
 }
 
+#define WINDOW_RADIUS 7
+
+__global__ void integrateMLSKernel(HashData hashData, HashParams hashParams, int numcams) {
+	__shared__ uint voxels[16];
+	__shared__ uint valid[16];
+
+	const uint i = threadIdx.x;	//inside of an SDF block
+	const int3 po = make_int3(hashData.delinearizeVoxelIndex(i));
+
+	// Stride over all allocated blocks
+	for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) {
+
+	//TODO check if we should load this in shared memory
+	//HashEntryHead entry = hashData.d_hashCompactified[bi]->head;
+
+	int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(make_int3(hashData.d_hashCompactified[bi]->head.posXYZ));
+
+	//uint idx = entry.offset + i;
+	int3 pi = pi_base + po;
+	float3 pfb = hashData.virtualVoxelPosToWorld(pi);
+	int count = 0;
+	//float camdepths[MAX_CAMERAS];
+
+	Voxel oldVoxel; // = hashData.d_SDFBlocks[idx];
+	hashData.deleteVoxel(oldVoxel);
+
+	float3 wpos = make_float3(0.0f);
+	float3 wnorm = make_float3(0.0f);
+	float weights = 0.0f;
+
+	for (uint cam=0; cam<numcams; ++cam) {
+		const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
+		const uint height = camera.params.m_imageHeight;
+		const uint width = camera.params.m_imageWidth;
+	
+		float3 pf = camera.poseInverse * pfb;
+		uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf));
+
+		for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) {
+			for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) {
+				// For this voxel in hash, get its screen position and check it is on screen
+				if (screenPos.x+u < width && screenPos.y+v < height) {	//on screen
+					float depth = tex2D<float>(camera.depth, screenPos.x+u, screenPos.y+v);
+					float4 normal = tex2D<float4>(camera.normal, screenPos.x+u, screenPos.y+v);
+					float3 worldPos = camera.pose * camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth);
+
+					float weight = spatialWeighting(length(pfb - worldPos));
+
+					wpos += weight*worldPos;
+					wnorm += weight*make_float3(normal);
+					weights += weight;				
+				}
+			}
+		}
+	}
+
+	wpos /= weights;
+	wnorm /= weights;
+
+	float sdf = length(pfb - wpos); //wnorm.x * (pfb.x - wpos.x) + wnorm.y * (pfb.y - wpos.y) + wnorm.z * (pfb.z - wpos.z);
+
+	//printf("WEIGHTS: %f\n", weights);
+
+	//if (weights < 0.00001f) sdf = 0.0f;
+
+	// Calculate voxel sign values across a warp
+	int warpNum = i / WARP_SIZE;
+	uint solid_ballot = __ballot_sync(0xFFFFFFFF, (sdf >= 0.0f) ? 0 : 1);
+	uint valid_ballot = __ballot_sync(0xFFFFFFFF, (weights >= 2.0f) ? 1 : 0);
+
+	// Aggregate each warp result into voxel mask
+	if (i % WARP_SIZE == 0) {
+		voxels[warpNum] = solid_ballot;
+		valid[warpNum] = valid_ballot;
+	}
+
+	__syncthreads();
+
+	// Work out if block is occupied or not and save voxel masks
+	// TODO:(Nick) Is it faster to do this in a separate garbage kernel?
+	if (i < 16) {
+		const uint v = voxels[i];
+		hashData.d_hashCompactified[bi]->voxels[i] = v;
+		hashData.d_hashCompactified[bi]->validity[i] = valid[i];
+		const uint mask = 0x0000FFFF;
+		uint b1 = __ballot_sync(mask, v == 0xFFFFFFFF);
+		uint b2 = __ballot_sync(mask, v == 0);
+		if (i == 0) {
+			if (b1 != mask && b2 != mask) hashData.d_hashCompactified[bi]->head.flags |= ftl::voxhash::kFlagSurface;
+			else hashData.d_hashCompactified[bi]->head.flags &= ~ftl::voxhash::kFlagSurface;
+		}
+	}
+
+	}
+}
+
 
 
 void ftl::cuda::integrateDepthMaps(HashData& hashData, const HashParams& hashParams, int numcams, cudaStream_t stream) {
@@ -189,7 +289,7 @@ const dim3 gridSize(NUM_CUDA_BLOCKS, 1);
 const dim3 blockSize(threadsPerBlock, 1);
 
 //if (hashParams.m_numOccupiedBlocks > 0) {	//this guard is important if there is no depth in the current frame (i.e., no blocks were allocated)
-	integrateDepthMapsKernel << <gridSize, blockSize, 0, stream >> >(hashData, hashParams, numcams);
+	integrateMLSKernel << <gridSize, blockSize, 0, stream >> >(hashData, hashParams, numcams);
 //}
 
 //cudaSafeCall( cudaGetLastError() );
diff --git a/applications/reconstruct/src/splat_render.cu b/applications/reconstruct/src/splat_render.cu
index 0e3598f129bf40c72294ab19501b1fd3c95bb1e9..58e4bbf2a4b1fbdb87877d4deee70665661f9813 100644
--- a/applications/reconstruct/src/splat_render.cu
+++ b/applications/reconstruct/src/splat_render.cu
@@ -79,6 +79,7 @@ __device__ inline bool getVoxel(uint *voxels, int ix) {
 __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, TextureObject<uint> depth, SplatParams params) {
 	// TODO:(Nick) Reduce bank conflicts by aligning these
 	__shared__ uint voxels[16];
+	__shared__ uint valid[16];
 	__shared__ ftl::voxhash::HashEntryHead block;
 
 	// Stride over all allocated blocks
@@ -88,7 +89,10 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture
 	const uint i = threadIdx.x;	//inside of an SDF block
 
 	if (i == 0) block = hashData.d_hashCompactified[bi]->head;
-	if (i < 16) voxels[i] = hashData.d_hashCompactified[bi]->voxels[i];
+	if (i < 16) {
+		voxels[i] = hashData.d_hashCompactified[bi]->voxels[i];
+		valid[i] = hashData.d_hashCompactified[bi]->validity[i];
+	}
 
 	// Make sure all hash entries are cached
 	__syncthreads();
@@ -118,7 +122,7 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture
 	bool is_surface = false; //((params.m_flags & ftl::render::kShowBlockBorders) && edgeX + edgeY + edgeZ >= 2);
 	//if (is_surface) color = make_uchar4(255,(vp.x == 0 && vp.y == 0 && vp.z == 0) ? 255 : 0,0,255);
 
-	if (!is_surface && v) continue;
+	if (!getVoxel(valid, i)) continue;
 
 	//if (vp.z == 7) voxels[j].color = make_uchar3(0,255,(voxels[j].sdf < 0.0f) ? 255 : 0);
 
@@ -136,7 +140,7 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture
 				//if (uvi.x == 8 || uvi.z == 8 || uvi.y == 8) continue;
 
 				const bool vox = getVoxel(voxels, hashData.linearizeVoxelPos(uvi));
-				if (vox) {
+				if (getVoxel(valid, hashData.linearizeVoxelPos(uvi))) {
 					is_surface = true;
 					// Should break but is slower?
 				}