diff --git a/components/renderers/cpp/include/ftl/cuda/normals.hpp b/components/renderers/cpp/include/ftl/cuda/normals.hpp
index 50f2e881a7c0550652ff49377382476c53f5bd11..f692e02a44b773ad07b7f5f982788f5ff83c7070 100644
--- a/components/renderers/cpp/include/ftl/cuda/normals.hpp
+++ b/components/renderers/cpp/include/ftl/cuda/normals.hpp
@@ -9,6 +9,7 @@ namespace ftl {
 namespace cuda {
 
 void normals(ftl::cuda::TextureObject<float4> &output,
+        ftl::cuda::TextureObject<float4> &temp,
         ftl::cuda::TextureObject<float4> &input, cudaStream_t stream);
 
 void normal_visualise(ftl::cuda::TextureObject<float4> &norm,
diff --git a/components/renderers/cpp/src/normals.cu b/components/renderers/cpp/src/normals.cu
index 22654a3a4bacbd30f3830b3ba25e628c53fdf5c6..040f761b0f42b92ceacfeeca63be843e55e39ac3 100644
--- a/components/renderers/cpp/src/normals.cu
+++ b/components/renderers/cpp/src/normals.cu
@@ -1,4 +1,5 @@
 #include <ftl/cuda/normals.hpp>
+#include <ftl/cuda/weighting.hpp>
 
 #define T_PER_BLOCK 16
 #define MINF __int_as_float(0xff800000)
@@ -30,14 +31,54 @@ __global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output,
 	}
 }
 
+template <int RADIUS>
+__global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
+        ftl::cuda::TextureObject<float4> output,
+        ftl::cuda::TextureObject<float4> points, float smoothing) {
+    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if(x >= points.width() || y >= points.height()) return;
+
+    const float3 p0 = make_float3(points.tex2D((int)x,(int)y));
+    float3 nsum = make_float3(0.0f);
+    float contrib = 0.0f;
+
+    if (p0.x == MINF) return;
+
+    for (int v=-RADIUS; v<=RADIUS; ++v) {
+        for (int u=-RADIUS; u<=RADIUS; ++u) {
+            const float3 p = make_float3(points.tex2D((int)x+u,(int)y+v));
+            if (p.x == MINF) continue;
+            const float s = ftl::cuda::spatialWeighting(p0, p, smoothing);
+
+            if (s > 0.0f) {
+                const float4 n = norms.tex2D((int)x+u,(int)y+v);
+                if (n.w > 0.0f) {
+                    nsum += make_float3(n) * s;
+                    contrib += s;
+                }
+            }
+        }
+    }
+
+    // FIXME: USE A DIFFERENT OUTPUT BUFFER
+    //__syncthreads();
+    output(x,y) = (contrib > 0.0f) ? make_float4(nsum / contrib, 1.0f) : make_float4(0.0f);
+}
+
 void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
+        ftl::cuda::TextureObject<float4> &temp,
         ftl::cuda::TextureObject<float4> &input, cudaStream_t stream) {
 	const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
 	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
-	computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(output, input);
+	computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(temp, input);
+    cudaSafeCall( cudaGetLastError() );
 
+    smooth_normals_kernel<1><<<gridSize, blockSize, 0, stream>>>(temp, output, input, 0.04f);
     cudaSafeCall( cudaGetLastError() );
+
 #ifdef _DEBUG
 	cudaSafeCall(cudaDeviceSynchronize());
 	//cutilCheckMsg(__FUNCTION__);
@@ -62,7 +103,7 @@ __global__ void vis_normals_kernel(ftl::cuda::TextureObject<float4> norm,
     if (l == 0) return;
     n /= l;
 
-    output(x,y) = (1.0f + dot(ray, n))*3.5f;
+    output(x,y) = (1.0f + dot(ray, n))*3.5f;  // FIXME: Do not hard code these value scalings
 }
 
 void ftl::cuda::normal_visualise(ftl::cuda::TextureObject<float4> &norm,
diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp
index 50264124bc7a04ffd70831aa726f69c4429857a7..ceaff532557ee274ebf5ec6390f7110d07a1b961 100644
--- a/components/renderers/cpp/src/splat_render.cpp
+++ b/components/renderers/cpp/src/splat_render.cpp
@@ -265,11 +265,14 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
 		//ftl::cuda::normal_visualise(temp_.getTexture<float4>(Channel::Normals), temp_.getTexture<float>(Channel::Contribution), camera);
 
 		// First make sure each input has normals
+		temp_.createTexture<float4>(Channel::Normals);
 		for (auto &f : scene_->frames) {
 			if (!f.hasChannel(Channel::Normals)) {
 				auto &g = f.get<GpuMat>(Channel::Colour);
 				LOG(INFO) << "Make normals channel";
-				ftl::cuda::normals(f.createTexture<float4>(Channel::Normals, Format<float4>(g.cols, g.rows)), f.getTexture<float4>(Channel::Points), stream);
+				ftl::cuda::normals(f.createTexture<float4>(Channel::Normals, Format<float4>(g.cols, g.rows)),
+					temp_.getTexture<float4>(Channel::Normals),  // FIXME: Uses assumption of vcam res same as input res
+					f.getTexture<float4>(Channel::Points), stream);
 			}
 		}