diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0f866eda014e6b6353824435cbacbc500fe2c683..af6a4173c4f2a08d18b2bb1f3a68336898240fa5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -225,6 +225,7 @@ add_subdirectory(components/codecs)
 add_subdirectory(components/net)
 add_subdirectory(components/rgbd-sources)
 add_subdirectory(components/control/cpp)
+add_subdirectory(components/filters)
 add_subdirectory(applications/calibration)
 add_subdirectory(applications/groupview)
 add_subdirectory(applications/player)
diff --git a/applications/gui/src/camera.cpp b/applications/gui/src/camera.cpp
index cca8a40f35c6e679597185668661fe4a814e504f..debb8e73d5cf6f1f1bc1d685bb7298350014614c 100644
--- a/applications/gui/src/camera.cpp
+++ b/applications/gui/src/camera.cpp
@@ -433,6 +433,7 @@ const GLTexture &ftl::gui::Camera::captureFrame() {
 		cv::Mat tmp;
 
 		switch(channel_) {
+			case Channel::Smoothing:
 			case Channel::Confidence:
 				if (im2_.rows == 0) { break; }
 				visualizeEnergy(im2_, tmp, 1.0);
@@ -462,7 +463,7 @@ const GLTexture &ftl::gui::Camera::captureFrame() {
 				texture2_.update(tmp);*/
 				break;
 
-		case Channel::Flow:
+		//case Channel::Flow:
 		case Channel::Normals:
 		case Channel::Right:
 				if (im2_.rows == 0 || im2_.type() != CV_8UC3) { break; }
diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt
index 931802aedce15c0f94617385d05aeed141d24fdd..1896417ead0e05d137e818af2338e243753a059a 100644
--- a/applications/reconstruct/CMakeLists.txt
+++ b/applications/reconstruct/CMakeLists.txt
@@ -20,7 +20,6 @@ set(REPSRC
 	src/ilw/fill.cu
 	src/ilw/discontinuity.cu
 	src/ilw/correspondence.cu
-	src/filters/smoothing.cu
 )
 
 add_executable(ftl-reconstruct ${REPSRC})
@@ -37,6 +36,6 @@ set_property(TARGET ftl-reconstruct PROPERTY CUDA_SEPARABLE_COMPILATION ON)
 endif()
 
 #target_include_directories(cv-node PUBLIC ${PROJECT_SOURCE_DIR}/include)
-target_link_libraries(ftl-reconstruct ftlcommon ftlrgbd Threads::Threads ${OpenCV_LIBS} ftlctrl ftlnet ftlrender)
+target_link_libraries(ftl-reconstruct ftlcommon ftlrgbd Threads::Threads ${OpenCV_LIBS} ftlctrl ftlnet ftlrender ftlfilter)
 
 
diff --git a/applications/reconstruct/src/filters/smoothing.cu b/applications/reconstruct/src/filters/smoothing.cu
deleted file mode 100644
index 21f30cafda69682e15c987b692a8f712e4373d7d..0000000000000000000000000000000000000000
--- a/applications/reconstruct/src/filters/smoothing.cu
+++ /dev/null
@@ -1,85 +0,0 @@
-#include "smoothing.hpp"
-
-#include <ftl/cuda/weighting.hpp>
-
-#define T_PER_BLOCK 8
-
-template <int RADIUS>
-__global__ void depth_smooth_kernel(
-		ftl::cuda::TextureObject<float> depth_in,
-		ftl::cuda::TextureObject<uchar4> colour_in,
-		ftl::cuda::TextureObject<float> depth_out,
-		ftl::rgbd::Camera camera,
-		float factor, float thresh) {
-
-	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	if (x < depth_in.width() && y < depth_in.height()) {
-		float d = depth_in.tex2D((int)x,(int)y);
-		depth_out(x,y) = 0.0f;
-
-		if (d < camera.minDepth || d > camera.maxDepth) return;
-
-		uchar4 c = colour_in.tex2D((int)x, (int)y);
-		float3 pos = camera.screenToCam(x,y,d);
-
-		float contrib = 0.0f;
-		float new_depth = 0.0f;
-
-		for (int v=-RADIUS; v<=RADIUS; ++v) {
-			for (int u=-RADIUS; u<=RADIUS; ++u) {
-				// Get colour difference to center
-				const uchar4 cN = colour_in.tex2D((int)x+u, (int)y+v);
-				const float colourWeight = ftl::cuda::colourWeighting(c, cN, thresh);
-				const float dN = depth_in.tex2D((int)x + u, (int)y + v);
-				const float3 posN = camera.screenToCam(x+u, y+v, dN);
-				const float weight = ftl::cuda::spatialWeighting(posN, pos, factor * colourWeight);
-				
-				contrib += weight;
-				new_depth += dN * weight;
-			}
-		}
-
-		if (contrib > 0.0f) {
-			depth_out(x,y) = new_depth / contrib;
-		}
-	}
-}
-
-void ftl::cuda::depth_smooth(
-		ftl::cuda::TextureObject<float> &depth_in,
-		ftl::cuda::TextureObject<uchar4> &colour_in,
-		ftl::cuda::TextureObject<float> &depth_out,
-		const ftl::rgbd::Camera &camera,
-		int radius, float factor, float thresh, int iters, cudaStream_t stream) {
-
-	const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-	for (int n=0; n<iters; ++n) {
-		switch (radius) {
-		case 5 :	depth_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
-		case 4 :	depth_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
-		case 3 :	depth_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
-		case 2 :	depth_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
-		case 1 :	depth_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
-		default:	break;
-		}
-		cudaSafeCall( cudaGetLastError() );
-
-		switch (radius) {
-		case 5 :	depth_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
-		case 4 :	depth_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
-		case 3 :	depth_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
-		case 2 :	depth_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
-		case 1 :	depth_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
-		default:	break;
-		}
-		cudaSafeCall( cudaGetLastError() );
-	}
-
-#ifdef _DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-#endif
-}
diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp
index e6a7d2d8b3e7f39bb47bbd3b43b4cd3ce166db38..a3e5504a41b11ef0f2b020710b88dae050b0d9c1 100644
--- a/applications/reconstruct/src/main.cpp
+++ b/applications/reconstruct/src/main.cpp
@@ -30,7 +30,8 @@
 #include <opencv2/opencv.hpp>
 #include <ftl/net/universe.hpp>
 
-#include "filters/smoothing.hpp"
+#include <ftl/filters/smoothing.hpp>
+#include <ftl/cuda/normals.hpp>
 #include <ftl/registration.hpp>
 
 #include <cuda_profiler_api.h>
@@ -245,11 +246,11 @@ static void run(ftl::Configurable *root) {
 
 	bool busy = false;
 
-	auto *filter = ftl::config::create<ftl::Configurable>(root, "filters");
+	auto *smooth = ftl::config::create<ftl::DepthSmoother>(root, "filters");
 
 	group->setLatency(4);
 	group->setName("ReconGroup");
-	group->sync([splat,virt,&busy,&slave,&scene_A,&scene_B,&align,controls,filter](ftl::rgbd::FrameSet &fs) -> bool {
+	group->sync([splat,virt,&busy,&slave,&scene_A,&scene_B,&align,controls,smooth](ftl::rgbd::FrameSet &fs) -> bool {
 		//cudaSetDevice(scene->getCUDADevice());
 
 		//if (slave.isPaused()) return true;
@@ -264,7 +265,7 @@ static void run(ftl::Configurable *root) {
 		// Swap the entire frameset to allow rapid return
 		fs.swapTo(scene_A);
 
-		ftl::pool.push([&scene_B,&scene_A,&busy,&slave,&align, filter](int id) {
+		ftl::pool.push([&scene_B,&scene_A,&busy,&slave,&align, smooth](int id) {
 			//cudaSetDevice(scene->getCUDADevice());
 			// TODO: Release frameset here...
 			//cudaSafeCall(cudaStreamSynchronize(scene->getIntegrationStream()));
@@ -272,13 +273,14 @@ static void run(ftl::Configurable *root) {
 			UNIQUE_LOCK(scene_A.mtx, lk);
 
 			cv::cuda::GpuMat tmp;
-			float factor = filter->value("smooth_factor", 0.4f);
+			/*float factor = filter->value("smooth_factor", 0.4f);
 			float colour_limit = filter->value("colour_limit", 30.0f);
 			bool do_smooth = filter->value("pre_smooth", false);
 			int iters = filter->value("iterations", 3);
 			int radius = filter->value("radius", 5);
+			float var_thesh = filter->value("variance_threshold", 0.02f);*/
 
-			if (do_smooth) {
+			//if (do_smooth) {
 				// Presmooth...
 				for (int i=0; i<scene_A.frames.size(); ++i) {
 					auto &f = scene_A.frames[i];
@@ -294,15 +296,27 @@ static void run(ftl::Configurable *root) {
 						cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA, 0);
 					}
 
-					ftl::cuda::depth_smooth(
+					smooth->smooth(f, s);
+
+					/*ftl::cuda::smoothing_factor(
+						f.createTexture<float>(Channel::Depth),
+						f.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(f.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+						f.createTexture<float>(Channel::Energy, ftl::rgbd::Format<float>(f.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+						//f.createTexture<uchar4>(Channel::Colour),
+						f.createTexture<float>(Channel::Smoothing, ftl::rgbd::Format<float>(f.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+						var_thesh,
+						s->parameters(), 0
+					);*/
+
+					/*ftl::cuda::depth_smooth(
 						f.createTexture<float>(Channel::Depth),
 						f.createTexture<uchar4>(Channel::Colour),
 						f.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(f.get<cv::cuda::GpuMat>(Channel::Depth).size())),
 						s->parameters(),
 						radius, factor, colour_limit, iters, 0
-					);
+					);*/
 				}
-			}
+			//}
 
 			// Send all frames to GPU, block until done?
 			//scene_A.upload(Channel::Colour + Channel::Depth);  // TODO: (Nick) Add scene stream.
diff --git a/components/codecs/include/ftl/codecs/channels.hpp b/components/codecs/include/ftl/codecs/channels.hpp
index 11262579e73d6d62424f2515ce021265d94ea529..85cd9fffc45ddcda1b6cc13a160c74c30797b804 100644
--- a/components/codecs/include/ftl/codecs/channels.hpp
+++ b/components/codecs/include/ftl/codecs/channels.hpp
@@ -24,6 +24,7 @@ enum struct Channel : int {
     Contribution	= 7,	// 32F
     EnergyVector	= 8,	// 32FC4
     Flow			= 9,	// 32F
+    Smoothing       = 9,    // 32F
     Energy			= 10,	// 32F
 	Mask			= 11,	// 32U
 	Density			= 12,	// 32F
@@ -124,6 +125,7 @@ inline bool isFloatChannel(ftl::codecs::Channel chan) {
 	case Channel::Depth		:
     //case Channel::Normals   :
 	case Channel::Confidence:
+    case Channel::Flow      :
 	case Channel::Density:
 	case Channel::Energy	: return true;
 	default					: return false;
diff --git a/components/filters/CMakeLists.txt b/components/filters/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92601b3714ac5e166d4e4f8eecf22c8f04edc58f
--- /dev/null
+++ b/components/filters/CMakeLists.txt
@@ -0,0 +1,16 @@
+add_library(ftlfilter
+    src/smoothing.cpp
+    src/smoothing.cu
+)
+
+# These cause errors in CI build and are being removed from PCL in newer versions
+# target_compile_options(ftlrender PUBLIC ${PCL_DEFINITIONS})
+
+target_include_directories(ftlfilter PUBLIC
+	${PCL_INCLUDE_DIRS}
+	$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+	$<INSTALL_INTERFACE:include>
+	PRIVATE src)
+target_link_libraries(ftlfilter ftlrender ftlrgbd ftlcommon Eigen3::Eigen Threads::Threads ${OpenCV_LIBS})
+
+#ADD_SUBDIRECTORY(test)
diff --git a/components/filters/include/ftl/filters/smoothing.hpp b/components/filters/include/ftl/filters/smoothing.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..5e5035f58981107d94f20ad948afe80297254edb
--- /dev/null
+++ b/components/filters/include/ftl/filters/smoothing.hpp
@@ -0,0 +1,25 @@
+#ifndef _FTL_SMOOTHING_HPP_
+#define _FTL_SMOOTHING_HPP_
+
+#include <ftl/configurable.hpp>
+#include <ftl/cuda_common.hpp>
+#include <ftl/rgbd/source.hpp>
+#include <ftl/rgbd/frame.hpp>
+
+namespace ftl {
+
+class DepthSmoother : public ftl::Configurable {
+    public:
+    explicit DepthSmoother(nlohmann::json &config);
+    ~DepthSmoother();
+
+    void smooth(ftl::rgbd::Frame &frame, ftl::rgbd::Source *src);
+
+    private:
+    cv::cuda::GpuMat temp_;
+    ftl::rgbd::Frame frames_[4];
+};
+
+}
+
+#endif  // _FTL_SMOOTHING_HPP_
diff --git a/components/filters/src/smoothing.cpp b/components/filters/src/smoothing.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..edd6a072fbb51cc498b243c8530f3abdb550e592
--- /dev/null
+++ b/components/filters/src/smoothing.cpp
@@ -0,0 +1,55 @@
+#include <ftl/filters/smoothing.hpp>
+#include "smoothing_cuda.hpp"
+
+using ftl::DepthSmoother;
+using ftl::codecs::Channel;
+using cv::cuda::GpuMat;
+
+DepthSmoother::DepthSmoother(nlohmann::json &config) : ftl::Configurable(config) {
+
+}
+
+DepthSmoother::~DepthSmoother() {
+
+}
+
+void DepthSmoother::smooth(ftl::rgbd::Frame &f, ftl::rgbd::Source *s) {
+    float var_thresh = value("variance_threshold", 0.0002f);
+    bool do_smooth = value("pre_smooth", false);
+    int levels = max(0, min(value("levels",0), 4));
+    int iters = value("iterations",5);
+
+    if (!do_smooth) return;
+
+    for (int i=0; i<iters; ++i) {
+        ftl::cuda::smoothing_factor(
+            f.createTexture<float>(Channel::Depth),
+            f.createTexture<float>(Channel::Energy, ftl::rgbd::Format<float>(f.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+            f.createTexture<float>(Channel::Smoothing, ftl::rgbd::Format<float>(f.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+            var_thresh,
+            s->parameters(), 0
+        );
+    }
+
+    LOG(INFO) << "PARAMS DEPTHS  " << s->parameters().minDepth << "," << s->parameters().maxDepth;
+
+    for (int i=0; i<levels; ++i) {
+        var_thresh *= 2.0f;
+        auto &dmat = f.get<GpuMat>(Channel::Depth); 
+        cv::cuda::resize(dmat, frames_[i].create<GpuMat>(Channel::Depth), cv::Size(dmat.cols / (2*(i+1)), dmat.rows / (2*(i+1))), 0.0, 0.0, cv::INTER_NEAREST);
+
+        ftl::cuda::smoothing_factor(
+            frames_[i].createTexture<float>(Channel::Depth),
+            frames_[i].createTexture<float>(Channel::Energy, ftl::rgbd::Format<float>(frames_[i].get<GpuMat>(Channel::Depth).size())),
+            frames_[i].createTexture<float>(Channel::Smoothing, ftl::rgbd::Format<float>(frames_[i].get<GpuMat>(Channel::Depth).size())),
+            var_thresh,
+            s->parameters(), 0
+        );
+
+        cv::cuda::resize(frames_[i].get<GpuMat>(Channel::Smoothing), temp_, f.get<cv::cuda::GpuMat>(Channel::Depth).size(), 0.0, 0.0, cv::INTER_LINEAR);
+        cv::cuda::add(temp_, f.get<GpuMat>(Channel::Smoothing), f.get<GpuMat>(Channel::Smoothing));
+    }
+
+    //cv::cuda::subtract(f.get<GpuMat>(Channel::Depth), f.get<GpuMat>(Channel::Smoothing), f.get<GpuMat>(Channel::Depth));
+}
+
diff --git a/components/filters/src/smoothing.cu b/components/filters/src/smoothing.cu
new file mode 100644
index 0000000000000000000000000000000000000000..43cb1f6bd0a56760aa2c6d4d3fff4c664631c404
--- /dev/null
+++ b/components/filters/src/smoothing.cu
@@ -0,0 +1,258 @@
+#include "smoothing_cuda.hpp"
+
+#include <ftl/cuda/weighting.hpp>
+
+using ftl::cuda::TextureObject;
+
+#define T_PER_BLOCK 8
+
+template <int RADIUS>
+__global__ void depth_smooth_kernel(
+		ftl::cuda::TextureObject<float> depth_in,
+		ftl::cuda::TextureObject<uchar4> colour_in,
+		ftl::cuda::TextureObject<float> depth_out,
+		ftl::rgbd::Camera camera,
+		float factor, float thresh) {
+
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < depth_in.width() && y < depth_in.height()) {
+		float d = depth_in.tex2D((int)x,(int)y);
+		depth_out(x,y) = 0.0f;
+
+		if (d < camera.minDepth || d > camera.maxDepth) return;
+
+		uchar4 c = colour_in.tex2D((int)x, (int)y);
+		float3 pos = camera.screenToCam(x,y,d);
+
+		float contrib = 0.0f;
+		float new_depth = 0.0f;
+
+		for (int v=-RADIUS; v<=RADIUS; ++v) {
+			for (int u=-RADIUS; u<=RADIUS; ++u) {
+				// Get colour difference to center
+				const uchar4 cN = colour_in.tex2D((int)x+u, (int)y+v);
+				const float colourWeight = ftl::cuda::colourWeighting(c, cN, thresh);
+				const float dN = depth_in.tex2D((int)x + u, (int)y + v);
+				const float3 posN = camera.screenToCam(x+u, y+v, dN);
+				const float weight = ftl::cuda::spatialWeighting(posN, pos, factor * colourWeight);
+				
+				contrib += weight;
+				new_depth += dN * weight;
+			}
+		}
+
+		if (contrib > 0.0f) {
+			depth_out(x,y) = new_depth / contrib;
+		}
+	}
+}
+
+void ftl::cuda::depth_smooth(
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<uchar4> &colour_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		const ftl::rgbd::Camera &camera,
+		int radius, float factor, float thresh, int iters, cudaStream_t stream) {
+
+	const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	for (int n=0; n<iters; ++n) {
+		switch (radius) {
+		case 5 :	depth_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
+		case 4 :	depth_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
+		case 3 :	depth_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
+		case 2 :	depth_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
+		case 1 :	depth_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
+		default:	break;
+		}
+		cudaSafeCall( cudaGetLastError() );
+
+		switch (radius) {
+		case 5 :	depth_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
+		case 4 :	depth_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
+		case 3 :	depth_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
+		case 2 :	depth_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
+		case 1 :	depth_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
+		default:	break;
+		}
+		cudaSafeCall( cudaGetLastError() );
+	}
+
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
+}
+
+// ==== SMOOTHING FACTOR ==========
+
+template <bool DERIV>
+__device__ inline float getAverage(const ftl::rgbd::Camera &cam, float dd, const TextureObject<float> &d, int x, int y, int x1, int y1, int x2, int y2);
+
+template <>
+__device__ inline float getAverage<false>(const ftl::rgbd::Camera &cam, float dd, const TextureObject<float> &d, int x, int y, int x1, int y1, int x2, int y2) {
+	float a = d.tex2D(x+x1,y+y1);
+	float b = d.tex2D(x+x2,y+y2);
+	return (a <= cam.minDepth || a > cam.maxDepth || b <= cam.minDepth || b > cam.maxDepth) ? dd : (a+b) / 2.0f;
+}
+
+template <>
+__device__ inline float getAverage<true>(const ftl::rgbd::Camera &cam, float dd, const TextureObject<float> &d, int x, int y, int x1, int y1, int x2, int y2) {
+	float a = d.tex2D(x+x1,y+y1);
+	float b = d.tex2D(x+x2,y+y2);
+	return (a+b) / 2.0f;
+}
+
+__device__ inline void absmin(float &minvar, float v) {
+	if (fabs(minvar) > fabs(v)) minvar = v;
+}
+
+template <bool DERIV>
+__global__ void smoothing_factor_kernel(
+		ftl::cuda::TextureObject<float> depth_in,
+		//ftl::cuda::TextureObject<uchar4> colour_in,
+		ftl::cuda::TextureObject<float> smoothing,
+		//float thresh,
+		ftl::rgbd::Camera camera) {
+
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < depth_in.width() && y < depth_in.height()) {
+		float d = depth_in.tex2D((int)x,(int)y);
+
+		//if (d < camera.minDepth || d > camera.maxDepth) return;
+
+		float min_var = 10.0f;
+		float max_var = 0.0f;
+
+		float avg = 0.0f;
+		float var;
+
+		var = (d - getAverage<DERIV>(camera, d, depth_in, x, y, -1, -1, 1, 1));
+		//avg += var;
+		absmin(min_var, var);
+		var = (d - getAverage<DERIV>(camera, d, depth_in, x, y, 0, -1, 0, 1));
+		//avg += var;
+		absmin(min_var, var);
+		var = (d - getAverage<DERIV>(camera, d, depth_in, x, y, 1, -1, -1, 1));
+		//avg += var;
+		absmin(min_var, var);
+		var = (d - getAverage<DERIV>(camera, d, depth_in, x, y, -1, 0, 1, 0));
+		//avg += var;
+		absmin(min_var, var);
+
+		// Clamp to threshold
+		//min_var = min(min_var, thresh);
+		//float s = 1.0f - (min_var / thresh);
+		smoothing(x,y) = min_var;
+	}
+}
+
+__global__ void norm_thresh_kernel(
+		ftl::cuda::TextureObject<float> in,
+		ftl::cuda::TextureObject<float> error_value,
+		ftl::cuda::TextureObject<float> out,
+		float thresh) {
+
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < in.width() && y < in.height()) {
+		// Clamp to threshold
+		float min_var = min(in.tex2D((int)x,(int)y), thresh);
+		float s = min(1.0f, (fabs(min_var) / thresh));
+		out(x,y) = s * error_value(x,y);
+	}
+}
+
+__global__ void do_smooth_kernel(
+		ftl::cuda::TextureObject<float> smooth_strength,
+		//ftl::cuda::TextureObject<float> error_value,
+		ftl::cuda::TextureObject<float> depth) {
+
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < depth.width() && y < depth.height()) {
+		depth(x,y) = depth(x,y) - smooth_strength(x,y);
+	}
+}
+
+template <int RADIUS>
+__global__ void sum_neighbors_kernel(
+		ftl::cuda::TextureObject<float> depth_in,
+		ftl::cuda::TextureObject<float> depth_out,
+		ftl::rgbd::Camera camera, float alpha) {
+
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < depth_out.width() && y < depth_out.height()) {
+		float avg = 0.0f;
+		float contrib = 0.0f;
+
+		float d0 = depth_in.tex2D((int)x, (int)y);
+		float3 pos0 = camera.screenToCam(x,y,d0);
+
+		for (int v=-RADIUS; v<=RADIUS; ++v) {
+			#pragma unroll
+			for (int u=-RADIUS; u<=RADIUS; ++u) {
+				float dN = depth_in.tex2D((int)x + u, (int)y + v);
+				float3 posN = camera.screenToCam(x+u,y+v,dN);
+				float weight = ftl::cuda::spatialWeighting(pos0, posN, alpha);
+				avg += weight * dN;
+				contrib += weight;
+			}
+		}
+
+		depth_out(x,y) = avg / contrib;
+	}
+}
+
+void ftl::cuda::smoothing_factor(
+		ftl::cuda::TextureObject<float> &depth_in,
+		//ftl::cuda::TextureObject<float> &depth_tmp,
+		ftl::cuda::TextureObject<float> &temp,
+		//ftl::cuda::TextureObject<uchar4> &colour_in,
+		ftl::cuda::TextureObject<float> &smoothing,
+		float thresh,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream) {
+
+	const dim3 gridSize((smoothing.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (smoothing.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	//smoothing_factor_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, temp, camera);
+
+	//float thresh2 = thresh;
+	//float alpha = 0.04f;
+
+	//for (int i=0; i<10; ++i) {
+		
+		smoothing_factor_kernel<false><<<gridSize, blockSize, 0, stream>>>(depth_in, temp, camera);
+		smoothing_factor_kernel<true><<<gridSize, blockSize, 0, stream>>>(temp, smoothing, camera);
+		norm_thresh_kernel<<<gridSize, blockSize, 0, stream>>>(smoothing, temp, smoothing, thresh);
+		do_smooth_kernel<<<gridSize, blockSize, 0, stream>>>(smoothing, depth_in);
+
+		//do_smooth_kernel<<<gridSize, blockSize, 0, stream>>>(smoothing, bufs[(ix+1)%2], bufs[ix%2]);
+		//if (i == 0) sum_neighbors_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_in, bufs[ix%2], camera, alpha);
+		//else {
+		//	sum_neighbors_kernel<2><<<gridSize, blockSize, 0, stream>>>(bufs[ix%2], bufs[(ix+1)%2], camera, alpha);
+		//	++ix;
+		//}
+		//thresh2 *= 2.0f;
+		//alpha *= 3.0f;
+	//}
+
+	//sum_neighbors_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, temp);
+
+	cudaSafeCall( cudaGetLastError() );
+
+
+	#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	#endif
+}
diff --git a/applications/reconstruct/src/filters/smoothing.hpp b/components/filters/src/smoothing_cuda.hpp
similarity index 59%
rename from applications/reconstruct/src/filters/smoothing.hpp
rename to components/filters/src/smoothing_cuda.hpp
index d5d6a47053140dc33d5ee97fa606f4b00650036d..da800f883f2e39a44253929fbc7675360e3a543d 100644
--- a/applications/reconstruct/src/filters/smoothing.hpp
+++ b/components/filters/src/smoothing_cuda.hpp
@@ -15,6 +15,16 @@ void depth_smooth(
 	int radius, float factor, float thresh, int iters,
 	cudaStream_t stream);
 
+void smoothing_factor(
+	ftl::cuda::TextureObject<float> &depth_in,
+	//ftl::cuda::TextureObject<float> &depth_tmp,
+	ftl::cuda::TextureObject<float> &temp,
+	//ftl::cuda::TextureObject<uchar4> &colour_in,
+	ftl::cuda::TextureObject<float> &smoothing,
+	float thresh,
+	const ftl::rgbd::Camera &camera,
+	cudaStream_t stream);
+
 }
 }
 
diff --git a/components/renderers/cpp/include/ftl/cuda/normals.hpp b/components/renderers/cpp/include/ftl/cuda/normals.hpp
index da2247723206cc9a1167ffbb0bc659ec847208c2..227ab536208b51321131b1e3eb7de98775bed971 100644
--- a/components/renderers/cpp/include/ftl/cuda/normals.hpp
+++ b/components/renderers/cpp/include/ftl/cuda/normals.hpp
@@ -24,6 +24,11 @@ void normals(ftl::cuda::TextureObject<float4> &output,
         const ftl::rgbd::Camera &camera,
         const float3x3 &pose_inv, const float3x3 &pose, cudaStream_t stream);
 
+void normals(ftl::cuda::TextureObject<float4> &output,
+        ftl::cuda::TextureObject<float> &input,  // Integer depth values
+        const ftl::rgbd::Camera &camera,
+        cudaStream_t stream);
+
 void normal_visualise(ftl::cuda::TextureObject<float4> &norm,
         ftl::cuda::TextureObject<uchar4> &output,
         const float3 &light, const uchar4 &diffuse, const uchar4 &ambient,
diff --git a/components/renderers/cpp/include/ftl/render/splat_params.hpp b/components/renderers/cpp/include/ftl/render/splat_params.hpp
index 0509ee37f0d85163fde2b462ad0871f8a824070b..7c6b8416896f10aae816565c52813108c80bf385 100644
--- a/components/renderers/cpp/include/ftl/render/splat_params.hpp
+++ b/components/renderers/cpp/include/ftl/render/splat_params.hpp
@@ -18,6 +18,7 @@ struct __align__(16) SplatParams {
 	uint m_flags;
 	//float voxelSize;
 	float depthThreshold;
+	int triangle_limit;
 
 	ftl::rgbd::Camera camera;
 };
diff --git a/components/renderers/cpp/src/normals.cu b/components/renderers/cpp/src/normals.cu
index 976b5804120bac3e000d493e87a90f2ec4b30d1c..7dcdf2f5fc705f78538cd799a196c1630f9dfc42 100644
--- a/components/renderers/cpp/src/normals.cu
+++ b/components/renderers/cpp/src/normals.cu
@@ -63,6 +63,34 @@ __global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output,
 	}
 }
 
+__global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output,
+        ftl::cuda::TextureObject<float> input, ftl::rgbd::Camera camera) {
+    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if(x >= input.width() || y >= input.height()) return;
+
+    output(x,y) = make_float4(0, 0, 0, 0);
+
+    if(x > 0 && x < input.width()-1 && y > 0 && y < input.height()-1) {
+        const float3 CC = camera.screenToCam(x+0, y+0, input.tex2D((int)x+0, (int)y+0));
+        const float3 PC = camera.screenToCam(x+0, y+1, input.tex2D((int)x+0, (int)y+1));
+        const float3 CP = camera.screenToCam(x+1, y+0, input.tex2D((int)x+1, (int)y+0));
+        const float3 MC = camera.screenToCam(x+0, y-1, input.tex2D((int)x+0, (int)y-1));
+        const float3 CM = camera.screenToCam(x-1, y+0, input.tex2D((int)x-1, (int)y+0));
+
+        //if(CC.z <  && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) {
+        if (isValid(camera,CC) && isValid(camera,PC) && isValid(camera,CP) && isValid(camera,MC) && isValid(camera,CM)) {
+            const float3 n = cross(PC-MC, CP-CM);
+            const float  l = length(n);
+
+            if(l > 0.0f) {
+                output(x,y) = make_float4((n/-l), 1.0f);
+            }
+        }
+    }
+}
+
 template <int RADIUS>
 __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
         ftl::cuda::TextureObject<float4> output,
@@ -238,6 +266,22 @@ void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
 	#endif
 }
 
+void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
+        ftl::cuda::TextureObject<float> &input,
+        const ftl::rgbd::Camera &camera,
+        cudaStream_t stream) {
+    const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(output, input, camera);
+    cudaSafeCall( cudaGetLastError() );
+
+    #ifdef _DEBUG
+    cudaSafeCall(cudaDeviceSynchronize());
+    //cutilCheckMsg(__FUNCTION__);
+    #endif
+}
+
 //==============================================================================
 
 __global__ void vis_normals_kernel(ftl::cuda::TextureObject<float4> norm,
diff --git a/components/renderers/cpp/src/reprojection.cu b/components/renderers/cpp/src/reprojection.cu
index f07b8956369492cb9002b2cc2068f87894fa817e..c11d23dd137972f51628dd844a76b20e3a25993a 100644
--- a/components/renderers/cpp/src/reprojection.cu
+++ b/components/renderers/cpp/src/reprojection.cu
@@ -93,6 +93,8 @@ __global__ void reprojection_kernel(
     
 	const float d2 = depth_src.tex2D((int)screenPos.x, (int)screenPos.y);
 	const A input = in.tex2D((int)screenPos.x, (int)screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos);
+
+	// TODO: Z checks need to interpolate between neighbors if large triangles are used
 	float weight = ftl::cuda::weighting(fabs(camPos.z - d2), 0.02f);
 
 	/* Buehler C. et al. 2001. Unstructured Lumigraph Rendering. */
diff --git a/components/renderers/cpp/src/tri_render.cpp b/components/renderers/cpp/src/tri_render.cpp
index be84e5a64063210a74e4314f86eb52f22dc650e5..7f93a8f683507d6c5c393bb4b9bfbeedf5cab756 100644
--- a/components/renderers/cpp/src/tri_render.cpp
+++ b/components/renderers/cpp/src/tri_render.cpp
@@ -406,6 +406,8 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 
 	// Parameters object to pass to CUDA describing the camera
 	SplatParams &params = params_;
+	params.triangle_limit = value("triangle_limit", 200);
+	params.depthThreshold = value("depth_threshold", 0.04f);
 	params.m_flags = 0;
 	//if () params.m_flags |= ftl::render::kShowDisconMask;
 	if (value("normal_weight_colours", true)) params.m_flags |= ftl::render::kNormalWeightColours;
@@ -478,7 +480,7 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 	if (aligned_source >= 0 && aligned_source < scene_->frames.size()) {
 		// FIXME: Output may not be same resolution as source!
 		cudaSafeCall(cudaStreamSynchronize(stream_));
-		scene_->frames[aligned_source].copyTo(Channel::Depth + Channel::Colour, out);
+		scene_->frames[aligned_source].copyTo(Channel::Depth + Channel::Colour + Channel::Smoothing, out);
 
 		if (chan == Channel::Normals) {
 			// Convert normal to single float value
diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu
index cdf42fff9dff9fd1acf3b1cb2e1ea585b22292ef..7311e50b9bbbd7b7ba78f106e360998b937646d8 100644
--- a/components/renderers/cpp/src/triangle_render.cu
+++ b/components/renderers/cpp/src/triangle_render.cu
@@ -121,7 +121,7 @@ float getZAtCoordinate(const float3 &barycentricCoord, const float (&tri)[3]) {
     d[2] = depth_in.tex2D(x,y+B);
 
     // Is this triangle valid
-	if (fabs(d[0] - d[1]) > 0.04f || fabs(d[0] - d[2]) > 0.04f) return;
+	if (fabs(d[0] - d[1]) > params.depthThreshold || fabs(d[0] - d[2]) > params.depthThreshold) return;
 	if (d[0] < params.camera.minDepth || d[0] > params.camera.maxDepth) return;
 
     short2 v[3];
@@ -138,7 +138,7 @@ float getZAtCoordinate(const float3 &barycentricCoord, const float (&tri)[3]) {
 	const int maxY = max(v[0].y, max(v[1].y, v[2].y));
 
 	// Remove really large triangles
-	if ((maxX - minX) * (maxY - minY) > 200) return;
+	if ((maxX - minX) * (maxY - minY) > params.triangle_limit) return;
 
 	for (int sy=minY; sy <= maxY; ++sy) {
 		for (int sx=minX; sx <= maxX; ++sx) {