diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt
index dce81e19c91a41f7e8bba90a2149a6602d73e5aa..931802aedce15c0f94617385d05aeed141d24fdd 100644
--- a/applications/reconstruct/CMakeLists.txt
+++ b/applications/reconstruct/CMakeLists.txt
@@ -20,6 +20,7 @@ set(REPSRC
 	src/ilw/fill.cu
 	src/ilw/discontinuity.cu
 	src/ilw/correspondence.cu
+	src/filters/smoothing.cu
 )
 
 add_executable(ftl-reconstruct ${REPSRC})
diff --git a/applications/reconstruct/src/filters/smoothing.cu b/applications/reconstruct/src/filters/smoothing.cu
new file mode 100644
index 0000000000000000000000000000000000000000..b5557ed0ba636eb887e615e4229a91f45e6b5cfc
--- /dev/null
+++ b/applications/reconstruct/src/filters/smoothing.cu
@@ -0,0 +1,83 @@
+#include "smoothing.hpp"
+
+#include <ftl/cuda/weighting.hpp>
+
+#define T_PER_BLOCK 8
+
+template <int RADIUS>
+__global__ void depth_smooth_kernel(
+		ftl::cuda::TextureObject<float> depth_in,
+		ftl::cuda::TextureObject<uchar4> colour_in,
+		ftl::cuda::TextureObject<float> depth_out,
+		ftl::rgbd::Camera camera,
+		float factor, float thresh) {
+
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < depth_in.width() && y < depth_in.height()) {
+		float d = depth_in.tex2D((int)x,(int)y);
+		depth_out(x,y) = 0.0f;
+
+		if (d < camera.minDepth || d > camera.maxDepth) return;
+
+		uchar4 c = colour_in.tex2D((int)x, (int)y);
+		float3 pos = camera.screenToCam(x,y,d);
+
+		float contrib = 0.0f;
+		float new_depth = 0.0f;
+
+		for (int u=-RADIUS; u<=RADIUS; ++u) {
+			for (int v=-RADIUS; v<=RADIUS; ++v) {
+				// Get colour difference to center
+				uchar4 cN = colour_in.tex2D((int)x+u, (int)y+v);
+				float colourWeight = ftl::cuda::colourWeighting(c, cN, thresh);
+				float dN = depth_in.tex2D((int)x + u, (int)y + v);
+				float3 posN = camera.screenToCam(x+u, y+v, dN);
+				
+				float weight = ftl::cuda::spatialWeighting(posN, pos, factor * colourWeight);
+				contrib += weight;
+				new_depth += dN * weight;
+			}
+		}
+
+		if (contrib > 0.0f) {
+			depth_out(x,y) = new_depth / contrib;
+		}
+	}
+}
+
+void ftl::cuda::depth_smooth(
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<uchar4> &colour_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		const ftl::rgbd::Camera &camera,
+		int radius, float factor, float thresh, cudaStream_t stream) {
+
+	const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	switch (radius) {
+    case 5 :	depth_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
+	case 4 :	depth_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
+	case 3 :	depth_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
+	case 2 :	depth_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
+	case 1 :	depth_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
+	default:	break;
+	}
+	cudaSafeCall( cudaGetLastError() );
+
+	switch (radius) {
+	case 5 :	depth_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
+	case 4 :	depth_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
+	case 3 :	depth_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
+	case 2 :	depth_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
+	case 1 :	depth_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
+	default:	break;
+	}
+	cudaSafeCall( cudaGetLastError() );
+
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
+}
diff --git a/applications/reconstruct/src/filters/smoothing.hpp b/applications/reconstruct/src/filters/smoothing.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..61f85e2727b8fe733e12fafe8d38311b919ab4a0
--- /dev/null
+++ b/applications/reconstruct/src/filters/smoothing.hpp
@@ -0,0 +1,21 @@
+#ifndef _FTL_CUDA_SMOOTHING_HPP_
+#define _FTL_CUDA_SMOOTHING_HPP_
+
+#include <ftl/rgbd/camera.hpp>
+#include <ftl/cuda_common.hpp>
+
+namespace ftl {
+namespace cuda {
+
+void depth_smooth(
+	ftl::cuda::TextureObject<float> &depth_in,
+	ftl::cuda::TextureObject<uchar4> &colour_in,
+	ftl::cuda::TextureObject<float> &depth_out,
+	const ftl::rgbd::Camera &camera,
+	int radius, float factor, float thresh,
+	cudaStream_t stream);
+
+}
+}
+
+#endif  // _FTL_CUDA_SMOOTHING_HPP_
diff --git a/applications/reconstruct/src/ilw.cpp b/applications/reconstruct/src/ilw.cpp
deleted file mode 100644
index 435cd886eba1b83d7530f19f28ccfb09f7e37f3a..0000000000000000000000000000000000000000
--- a/applications/reconstruct/src/ilw.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-#include "ilw.hpp"
-#include <ftl/utility/matrix_conversion.hpp>
-#include <ftl/rgbd/source.hpp>
-#include <ftl/cuda/points.hpp>
-#include <loguru.hpp>
-
-#include "ilw_cuda.hpp"
-
-using ftl::ILW;
-using ftl::detail::ILWData;
-using ftl::codecs::Channel;
-using ftl::codecs::Channels;
-using ftl::rgbd::Format;
-using cv::cuda::GpuMat;
-
-ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) {
-
-}
-
-ILW::~ILW() {
-
-}
-
-bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
-    _phase0(fs, stream);
-
-    //for (int i=0; i<2; ++i) {
-        _phase1(fs, stream);
-        //for (int j=0; j<3; ++j) {
-        //    _phase2(fs);
-        //}
-
-		// TODO: Break if no time left
-    //}
-
-    return true;
-}
-
-bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
-    // Make points channel...
-    for (size_t i=0; i<fs.frames.size(); ++i) {
-		auto &f = fs.frames[i];
-		auto *s = fs.sources[i];
-
-		if (f.empty(Channel::Depth + Channel::Colour)) {
-			LOG(ERROR) << "Missing required channel";
-			continue;
-		}
-			
-        auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size()));
-        auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse());
-        ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, stream);
-
-        // TODO: Create energy vector texture and clear it
-        // Create energy and clear it
-
-        // Convert colour from BGR to BGRA if needed
-		if (f.get<GpuMat>(Channel::Colour).type() == CV_8UC3) {
-			// Convert to 4 channel colour
-			auto &col = f.get<GpuMat>(Channel::Colour);
-			GpuMat tmp(col.size(), CV_8UC4);
-			cv::cuda::swap(col, tmp);
-			cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA);
-		}
-
-        f.createTexture<float4>(Channel::EnergyVector, Format<float4>(f.get<GpuMat>(Channel::Colour).size()));
-        f.createTexture<float>(Channel::Energy, Format<float>(f.get<GpuMat>(Channel::Colour).size()));
-        f.createTexture<uchar4>(Channel::Colour);
-
-		cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
-
-		f.get<GpuMat>(Channel::EnergyVector).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
-		f.get<GpuMat>(Channel::Energy).setTo(cv::Scalar(0.0f), cvstream);
-    }
-
-    return true;
-}
-
-bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
-    // Run correspondence kernel to create an energy vector
-
-	// For each camera combination
-    for (size_t i=0; i<fs.frames.size(); ++i) {
-        for (size_t j=0; j<fs.frames.size(); ++j) {
-            if (i == j) continue;
-
-            LOG(INFO) << "Running phase1";
-
-            auto &f1 = fs.frames[i];
-            auto &f2 = fs.frames[j];
-            //auto s1 = fs.frames[i];
-            auto s2 = fs.sources[j];
-
-            auto pose = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse());
-
-            try {
-            //Calculate energy vector to best correspondence
-            ftl::cuda::correspondence_energy_vector(
-                f1.getTexture<float4>(Channel::Points),
-                f2.getTexture<float4>(Channel::Points),
-                f1.getTexture<uchar4>(Channel::Colour),
-                f2.getTexture<uchar4>(Channel::Colour),
-                // TODO: Add normals and other things...
-                f1.getTexture<float4>(Channel::EnergyVector),
-                f1.getTexture<float>(Channel::Energy),
-                pose,
-                s2->parameters(),
-                stream
-            );
-            } catch (ftl::exception &e) {
-                LOG(ERROR) << "Exception in correspondence: " << e.what();
-            }
-
-            LOG(INFO) << "Correspondences done... " << i;
-        }
-    }
-
-    return true;
-}
-
-bool ILW::_phase2(ftl::rgbd::FrameSet &fs) {
-    // Run energies and motion kernel
-
-    return true;
-}
diff --git a/applications/reconstruct/src/ilw.cu b/applications/reconstruct/src/ilw.cu
deleted file mode 100644
index 999b5ec9031eed08fc4bc527471961c3236d7445..0000000000000000000000000000000000000000
--- a/applications/reconstruct/src/ilw.cu
+++ /dev/null
@@ -1,90 +0,0 @@
-#include "ilw_cuda.hpp"
-
-using ftl::cuda::TextureObject;
-using ftl::rgbd::Camera;
-
-#define WARP_SIZE 32
-#define T_PER_BLOCK 8
-#define FULL_MASK 0xffffffff
-
-__device__ inline float warpMax(float e) {
-	for (int i = WARP_SIZE/2; i > 0; i /= 2) {
-		const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
-		e = max(e, other);
-	}
-	return e;
-}
-
-__global__ void correspondence_energy_vector_kernel(
-        TextureObject<float4> p1,
-        TextureObject<float4> p2,
-        TextureObject<uchar4> c1,
-        TextureObject<uchar4> c2,
-        TextureObject<float4> vout,
-        TextureObject<float> eout,
-        float4x4 pose2,  // Inverse
-        Camera cam2) {
-
-    // Each warp picks point in p1
-    const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
-	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
-    const int y = blockIdx.y*blockDim.y + threadIdx.y;
-    
-	const float3 world1 = make_float3(p1.tex2D(x, y));
-	if (world1.x == MINF) {
-        vout(x,y) = make_float4(0.0f);
-        eout(x,y) = 0.0f;
-        return;
-    }
-    const float3 camPos2 = pose2 * world1;
-	const uint2 screen2 = cam2.camToScreen<uint2>(camPos2);
-
-    const int upsample = 8;
-
-    // Project to p2 using cam2
-    // Each thread takes a possible correspondence and calculates a weighting
-    const int lane = tid % WARP_SIZE;
-	for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
-		const float u = (i % upsample) - (upsample / 2);
-        const float v = (i / upsample) - (upsample / 2);
-        
-		const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v));
-		if (world2.x == MINF) continue;
-
-        // Determine degree of correspondence
-        const float confidence = 1.0f / length(world1 - world2);
-        const float maxconf = warpMax(confidence);
-
-        // This thread has best confidence value
-        if (maxconf == confidence) {
-            vout(x,y) = vout.tex2D(x, y) + make_float4(
-                (world1.x - world2.x) * maxconf,
-                (world1.y - world2.y) * maxconf,
-                (world1.z - world2.z) * maxconf,
-                maxconf);
-            eout(x,y) = eout.tex2D(x,y) + length(world1 - world2)*maxconf;
-        }
-    }
-}
-
-void ftl::cuda::correspondence_energy_vector(
-        TextureObject<float4> &p1,
-        TextureObject<float4> &p2,
-        TextureObject<uchar4> &c1,
-        TextureObject<uchar4> &c2,
-        TextureObject<float4> &vout,
-        TextureObject<float> &eout,
-        float4x4 &pose2,
-        const Camera &cam2,
-        cudaStream_t stream) {
-
-    const dim3 gridSize((p1.width() + 2 - 1)/2, (p1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-    const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK);
-
-    printf("COR SIZE %d,%d\n", p1.width(), p1.height());
-
-    correspondence_energy_vector_kernel<<<gridSize, blockSize, 0, stream>>>(
-        p1, p2, c1, c2, vout, eout, pose2, cam2
-    );
-    cudaSafeCall( cudaGetLastError() );
-}
diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp
index 9ed20069fc53a5e72d08b1947c08a285a5c788e1..4da112c8ac13d09c974e783a9fd3c60970499e20 100644
--- a/applications/reconstruct/src/main.cpp
+++ b/applications/reconstruct/src/main.cpp
@@ -30,6 +30,7 @@
 #include <opencv2/opencv.hpp>
 #include <ftl/net/universe.hpp>
 
+#include "filters/smoothing.hpp"
 #include <ftl/registration.hpp>
 
 #include <cuda_profiler_api.h>
@@ -268,6 +269,37 @@ static void run(ftl::Configurable *root) {
 
 			UNIQUE_LOCK(scene_A.mtx, lk);
 
+			cv::cuda::GpuMat tmp;
+			float factor = align->value("smooth_factor", 0.04f);
+			float colour_limit = align->value("colour_limit", 50.0f);
+			bool do_smooth = align->value("pre_smooth", false);
+
+			if (do_smooth) {
+				// Presmooth...
+				for (int i=0; i<scene_A.frames.size(); ++i) {
+					auto &f = scene_A.frames[i];
+					auto s = scene_A.sources[i];
+
+					// Convert colour from BGR to BGRA if needed
+					if (f.get<cv::cuda::GpuMat>(Channel::Colour).type() == CV_8UC3) {
+						//cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+						// Convert to 4 channel colour
+						auto &col = f.get<cv::cuda::GpuMat>(Channel::Colour);
+						tmp.create(col.size(), CV_8UC4);
+						cv::cuda::swap(col, tmp);
+						cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA, 0);
+					}
+
+					ftl::cuda::depth_smooth(
+						f.createTexture<float>(Channel::Depth),
+						f.createTexture<uchar4>(Channel::Colour),
+						f.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(f.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+						s->parameters(),
+						1, factor, colour_limit, 0
+					);
+				}
+			}
+
 			// Send all frames to GPU, block until done?
 			//scene_A.upload(Channel::Colour + Channel::Depth);  // TODO: (Nick) Add scene stream.
 			align->process(scene_A);