diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 512a4d19087bd66c5c895f3f4c5595190122132c..5a81ac8cbd6f27bb4388eab11d6d4b1b15875410 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -25,7 +25,7 @@ linux:
   script:
     - mkdir build
     - cd build
-    - cmake .. -DWITH_OPTFLOW=TRUE -DBUILD_CALIBRATION=TRUE
+    - cmake .. -DWITH_OPTFLOW=TRUE -DBUILD_CALIBRATION=TRUE -DCMAKE_BUILD_TYPE=Release
     - make
     - ctest --output-on-failure
 
diff --git a/applications/gui/src/camera.cpp b/applications/gui/src/camera.cpp
index 32d78a4a0ff8bbccfed60b13293b1277ab599792..abb7fdb80100d85491dbf929909a8f1524365b2b 100644
--- a/applications/gui/src/camera.cpp
+++ b/applications/gui/src/camera.cpp
@@ -376,6 +376,7 @@ const GLTexture &ftl::gui::Camera::captureFrame() {
 		cv::Mat tmp;
 
 		switch(channel_) {
+			case Channel::Normals:
 			case Channel::Energy:
 				if (depth_.rows == 0) { break; }
 				visualizeEnergy(depth_, tmp, 10.0);
@@ -399,7 +400,6 @@ const GLTexture &ftl::gui::Camera::captureFrame() {
 
 		case Channel::Flow:
 		case Channel::Confidence:
-		case Channel::Normals:
 		case Channel::Right:
 				if (depth_.rows == 0 || depth_.type() != CV_8UC3) { break; }
 				texture_.update(depth_);
diff --git a/components/renderers/cpp/CMakeLists.txt b/components/renderers/cpp/CMakeLists.txt
index b575721587262e2e468d6cb48cf8c44c6771e6fc..63cf8f1e75663c61e05f7c68d26217de30e964e6 100644
--- a/components/renderers/cpp/CMakeLists.txt
+++ b/components/renderers/cpp/CMakeLists.txt
@@ -2,6 +2,7 @@ add_library(ftlrender
 	src/splat_render.cpp
 	src/splatter.cu
 	src/points.cu
+	src/normals.cu
 )
 
 # These cause errors in CI build and are being removed from PCL in newer versions
diff --git a/components/renderers/cpp/include/ftl/cuda/normals.hpp b/components/renderers/cpp/include/ftl/cuda/normals.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..f692e02a44b773ad07b7f5f982788f5ff83c7070
--- /dev/null
+++ b/components/renderers/cpp/include/ftl/cuda/normals.hpp
@@ -0,0 +1,23 @@
+#ifndef _FTL_CUDA_NORMALS_HPP_
+#define _FTL_CUDA_NORMALS_HPP_
+
+#include <ftl/cuda_common.hpp>
+#include <ftl/rgbd/camera.hpp>
+#include <ftl/cuda_matrix_util.hpp>
+
+namespace ftl {
+namespace cuda {
+
+void normals(ftl::cuda::TextureObject<float4> &output,
+        ftl::cuda::TextureObject<float4> &temp,
+        ftl::cuda::TextureObject<float4> &input, cudaStream_t stream);
+
+void normal_visualise(ftl::cuda::TextureObject<float4> &norm,
+        ftl::cuda::TextureObject<float> &output,
+        const ftl::rgbd::Camera &camera, const float4x4 &pose,
+        cudaStream_t stream);
+
+}
+}
+
+#endif  // _FTL_CUDA_NORMALS_HPP_
diff --git a/components/renderers/cpp/src/normals.cu b/components/renderers/cpp/src/normals.cu
new file mode 100644
index 0000000000000000000000000000000000000000..040f761b0f42b92ceacfeeca63be843e55e39ac3
--- /dev/null
+++ b/components/renderers/cpp/src/normals.cu
@@ -0,0 +1,124 @@
+#include <ftl/cuda/normals.hpp>
+#include <ftl/cuda/weighting.hpp>
+
+#define T_PER_BLOCK 16
+#define MINF __int_as_float(0xff800000)
+
+__global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output,
+        ftl::cuda::TextureObject<float4> input) {
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if(x >= input.width() || y >= input.height()) return;
+
+	output(x,y) = make_float4(0, 0, 0, 0);
+
+	if(x > 0 && x < input.width()-1 && y > 0 && y < input.height()-1) {
+		const float3 CC = make_float3(input.tex2D((int)x+0, (int)y+0)); //[(y+0)*width+(x+0)];
+		const float3 PC = make_float3(input.tex2D((int)x+0, (int)y+1)); //[(y+1)*width+(x+0)];
+		const float3 CP = make_float3(input.tex2D((int)x+1, (int)y+0)); //[(y+0)*width+(x+1)];
+		const float3 MC = make_float3(input.tex2D((int)x+0, (int)y-1)); //[(y-1)*width+(x+0)];
+		const float3 CM = make_float3(input.tex2D((int)x-1, (int)y+0)); //[(y+0)*width+(x-1)];
+
+		//if(CC.x != MINF && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) {
+			const float3 n = cross(PC-MC, CP-CM);
+			const float  l = length(n);
+
+			if(l > 0.0f) {
+				output(x,y) = make_float4(n/-l, 1.0f);
+			}
+		//}
+	}
+}
+
+template <int RADIUS>
+__global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
+        ftl::cuda::TextureObject<float4> output,
+        ftl::cuda::TextureObject<float4> points, float smoothing) {
+    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if(x >= points.width() || y >= points.height()) return;
+
+    const float3 p0 = make_float3(points.tex2D((int)x,(int)y));
+    float3 nsum = make_float3(0.0f);
+    float contrib = 0.0f;
+
+    if (p0.x == MINF) return;
+
+    for (int v=-RADIUS; v<=RADIUS; ++v) {
+        for (int u=-RADIUS; u<=RADIUS; ++u) {
+            const float3 p = make_float3(points.tex2D((int)x+u,(int)y+v));
+            if (p.x == MINF) continue;
+            const float s = ftl::cuda::spatialWeighting(p0, p, smoothing);
+
+            if (s > 0.0f) {
+                const float4 n = norms.tex2D((int)x+u,(int)y+v);
+                if (n.w > 0.0f) {
+                    nsum += make_float3(n) * s;
+                    contrib += s;
+                }
+            }
+        }
+    }
+
+    // FIXME: USE A DIFFERENT OUTPUT BUFFER
+    //__syncthreads();
+    output(x,y) = (contrib > 0.0f) ? make_float4(nsum / contrib, 1.0f) : make_float4(0.0f);
+}
+
+void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
+        ftl::cuda::TextureObject<float4> &temp,
+        ftl::cuda::TextureObject<float4> &input, cudaStream_t stream) {
+	const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(temp, input);
+    cudaSafeCall( cudaGetLastError() );
+
+    smooth_normals_kernel<1><<<gridSize, blockSize, 0, stream>>>(temp, output, input, 0.04f);
+    cudaSafeCall( cudaGetLastError() );
+
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	//cutilCheckMsg(__FUNCTION__);
+#endif
+}
+
+//==============================================================================
+
+__global__ void vis_normals_kernel(ftl::cuda::TextureObject<float4> norm,
+        ftl::cuda::TextureObject<float> output,
+        ftl::rgbd::Camera camera, float4x4 pose) {
+    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if(x >= norm.width() || y >= norm.height()) return;
+
+    output(x,y) = 0.0f;
+    float3 ray = make_float3(0.0f, 0.0f, 1.0f); //pose * camera.screenToCam(x,y,1.0f);
+    ray = ray / length(ray);
+    float3 n = make_float3(norm.tex2D((int)x,(int)y));
+    float l = length(n);
+    if (l == 0) return;
+    n /= l;
+
+    output(x,y) = (1.0f + dot(ray, n))*3.5f;  // FIXME: Do not hard code these value scalings
+}
+
+void ftl::cuda::normal_visualise(ftl::cuda::TextureObject<float4> &norm,
+        ftl::cuda::TextureObject<float> &output,
+        const ftl::rgbd::Camera &camera, const float4x4 &pose,
+        cudaStream_t stream) {
+
+    const dim3 gridSize((norm.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (norm.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    vis_normals_kernel<<<gridSize, blockSize, 0, stream>>>(norm, output, camera, pose);
+
+    cudaSafeCall( cudaGetLastError() );
+#ifdef _DEBUG
+    cudaSafeCall(cudaDeviceSynchronize());
+    //cutilCheckMsg(__FUNCTION__);
+#endif
+}
diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp
index 425e32b80283abf4036bea6d6938e6c72a7d7105..ceaff532557ee274ebf5ec6390f7110d07a1b961 100644
--- a/components/renderers/cpp/src/splat_render.cpp
+++ b/components/renderers/cpp/src/splat_render.cpp
@@ -2,6 +2,7 @@
 #include <ftl/utility/matrix_conversion.hpp>
 #include "splatter_cuda.hpp"
 #include <ftl/cuda/points.hpp>
+#include <ftl/cuda/normals.hpp>
 
 #include <opencv2/core/cuda_stream_accessor.hpp>
 
@@ -65,7 +66,9 @@ void Splatter::renderChannel(
 	temp_.get<GpuMat>(Channel::Colour).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
 	temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0.0f), cvstream);
 
-	bool is_float = ftl::rgbd::isFloatChannel(channel);
+	if (scene_->frames.size() < 1) return;
+	bool is_float = scene_->frames[0].get<GpuMat>(channel).type() == CV_32F; //ftl::rgbd::isFloatChannel(channel);
+	bool is_4chan = scene_->frames[0].get<GpuMat>(channel).type() == CV_32FC4;
 	
 	// Render each camera into virtual view
 	for (size_t i=0; i < scene_->frames.size(); ++i) {
@@ -127,6 +130,15 @@ void Splatter::renderChannel(
 				temp_.getTexture<float>(Channel::Contribution),
 				params, stream
 			);
+		} else if (is_4chan) {
+			ftl::cuda::dibr_attribute(
+				f.createTexture<float4>(channel),
+				f.createTexture<float4>(Channel::Points),
+				temp_.getTexture<int>(Channel::Depth),
+				temp_.getTexture<float4>(Channel::Colour),
+				temp_.getTexture<float>(Channel::Contribution),
+				params, stream
+			);
 		} else if (channel == Channel::Colour || channel == Channel::Right) {
 			ftl::cuda::dibr_attribute(
 				f.createTexture<uchar4>(Channel::Colour),
@@ -148,7 +160,15 @@ void Splatter::renderChannel(
 		}
 	}
 
-	if (is_float) {
+	if (is_4chan) {
+		// Normalise attribute contributions
+		ftl::cuda::dibr_normalise(
+			temp_.createTexture<float4>(Channel::Colour),
+			out.createTexture<float4>(channel),
+			temp_.createTexture<float>(Channel::Contribution),
+			stream
+		);
+	} else if (is_float) {
 		// Normalise attribute contributions
 		ftl::cuda::dibr_normalise(
 			temp_.createTexture<float4>(Channel::Colour),
@@ -237,6 +257,36 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
 	if (chan == Channel::Depth)
 	{
 		temp_.get<GpuMat>(Channel::Depth).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream);
+	} else if (chan == Channel::Normals) {
+		//temp_.get<GpuMat>(Channel::Depth).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream);
+		//ftl::cuda::point_cloud(temp_.createTexture<float4>(Channel::Points, Format<float4>(camera.width, camera.height)),
+		//	temp_.createTexture<float>(Channel::Depth), camera, params.m_viewMatrixInverse, stream);
+		//ftl::cuda::normals(temp_.getTexture<float4>(Channel::Normals), temp_.getTexture<float4>(Channel::Points), stream);
+		//ftl::cuda::normal_visualise(temp_.getTexture<float4>(Channel::Normals), temp_.getTexture<float>(Channel::Contribution), camera);
+
+		// First make sure each input has normals
+		temp_.createTexture<float4>(Channel::Normals);
+		for (auto &f : scene_->frames) {
+			if (!f.hasChannel(Channel::Normals)) {
+				auto &g = f.get<GpuMat>(Channel::Colour);
+				LOG(INFO) << "Make normals channel";
+				ftl::cuda::normals(f.createTexture<float4>(Channel::Normals, Format<float4>(g.cols, g.rows)),
+					temp_.getTexture<float4>(Channel::Normals),  // FIXME: Uses assumption of vcam res same as input res
+					f.getTexture<float4>(Channel::Points), stream);
+			}
+		}
+
+		out.create<GpuMat>(Channel::Normals, Format<float4>(camera.width, camera.height));
+
+		// Render normal attribute
+		renderChannel(params, out, Channel::Normals, stream);
+
+		// Convert normal to single float value
+		ftl::cuda::normal_visualise(out.getTexture<float4>(Channel::Normals), temp_.getTexture<float>(Channel::Contribution), camera, params.m_viewMatrix, stream);
+
+		// Put in output as single float
+		cv::cuda::swap(temp_.get<GpuMat>(Channel::Contribution), out.create<GpuMat>(Channel::Normals));
+		out.resetTexture(Channel::Normals);
 	}
 	else if (chan == Channel::Contribution)
 	{
diff --git a/components/renderers/cpp/src/splatter.cu b/components/renderers/cpp/src/splatter.cu
index c7456e57bf4425a6f16020a66c885d58dbdda5a4..b89b6083ee25318f294943df9336ca6d1700f148 100644
--- a/components/renderers/cpp/src/splatter.cu
+++ b/components/renderers/cpp/src/splatter.cu
@@ -203,6 +203,80 @@ __global__ void dibr_attribute_contrib_kernel(
     }
 }
 
+/*
+ * Pass 2: Accumulate attribute contributions if the points pass a visibility test.
+ */
+ __global__ void dibr_attribute_contrib_kernel(
+        TextureObject<float4> colour_in,    // Original colour image
+        TextureObject<float4> points,       // Original 3D points
+        TextureObject<int> depth_in,        // Virtual depth map
+        TextureObject<float4> colour_out,   // Accumulated output
+        //TextureObject<float4> normal_out,
+        TextureObject<float> contrib_out,
+        SplatParams params) {
+        
+    //const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
+
+    const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
+    //const int warp = tid / WARP_SIZE;
+    const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    const float3 worldPos = make_float3(points.tex2D(x, y));
+    //const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y));
+    if (worldPos.x == MINF) return;
+    //const float r = (camera.poseInverse * worldPos).z / camera.params.fx;
+
+    const float3 camPos = params.m_viewMatrix * worldPos;
+    if (camPos.z < params.camera.minDepth) return;
+    if (camPos.z > params.camera.maxDepth) return;
+    const uint2 screenPos = params.camera.camToScreen<uint2>(camPos);
+
+    //const int upsample = 8; //min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z));
+
+    // Not on screen so stop now...
+    if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return;
+            
+    // Is this point near the actual surface and therefore a contributor?
+    const float d = ((float)depth_in.tex2D((int)screenPos.x, (int)screenPos.y)/1000.0f);
+    //if (abs(d - camPos.z) > DEPTH_THRESHOLD) return;
+
+    // TODO:(Nick) Should just one thread load these to shared mem?
+    const float4 colour = (colour_in.tex2D(x, y));
+    //const float4 normal = tex2D<float4>(camera.normal, x, y);
+
+    // Each thread in warp takes an upsample point and updates corresponding depth buffer.
+    const int lane = tid % WARP_SIZE;
+    for (int i=lane; i<ACCUM_DIAMETER*ACCUM_DIAMETER; i+=WARP_SIZE) {
+        const float u = (i % ACCUM_DIAMETER) - (ACCUM_DIAMETER / 2);
+        const float v = (i / ACCUM_DIAMETER) - (ACCUM_DIAMETER / 2);
+
+        // Use the depth buffer to determine this pixels 3D position in camera space
+        const float d = ((float)depth_in.tex2D(screenPos.x+u, screenPos.y+v)/1000.0f);
+        const float3 nearest = params.camera.screenToCam((int)(screenPos.x+u),(int)(screenPos.y+v),d);
+
+        // What is contribution of our current point at this pixel?
+        const float weight = ftl::cuda::spatialWeighting(nearest, camPos, SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx));
+        if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) {  // TODO: Use confidence threshold here
+            const float4 wcolour = colour * weight;
+            //const float4 wnormal = normal * weight;
+            
+            //printf("Z %f\n", d);
+
+            // Add this points contribution to the pixel buffer
+            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v), wcolour.x);
+            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+1, wcolour.y);
+            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+2, wcolour.z);
+            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+3, wcolour.w);
+            //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v), wnormal.x);
+            //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+1, wnormal.y);
+            //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+2, wnormal.z);
+            //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+3, wnormal.w);
+            atomicAdd(&contrib_out(screenPos.x+u, screenPos.y+v), weight);
+        }
+    }
+}
+
 void ftl::cuda::dibr_attribute(
         TextureObject<uchar4> &colour_in,    // Original colour image
         TextureObject<float4> &points,       // Original 3D points
@@ -247,6 +321,28 @@ void ftl::cuda::dibr_attribute(
     cudaSafeCall( cudaGetLastError() );
 }
 
+void ftl::cuda::dibr_attribute(
+        TextureObject<float4> &colour_in,    // Original colour image
+        TextureObject<float4> &points,       // Original 3D points
+        TextureObject<int> &depth_in,        // Virtual depth map
+        TextureObject<float4> &colour_out,   // Accumulated output
+        //TextureObject<float4> normal_out,
+        TextureObject<float> &contrib_out,
+        SplatParams &params, cudaStream_t stream) {
+    const dim3 gridSize((depth_in.width() + 2 - 1)/2, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK);
+
+    dibr_attribute_contrib_kernel<<<gridSize, blockSize, 0, stream>>>(
+        colour_in,
+        points,
+        depth_in,
+        colour_out,
+        contrib_out,
+        params
+    );
+    cudaSafeCall( cudaGetLastError() );
+}
+
 //==============================================================================
 
 __global__ void dibr_normalise_kernel(
@@ -289,6 +385,26 @@ __global__ void dibr_normalise_kernel(
     }
 }
 
+__global__ void dibr_normalise_kernel(
+        TextureObject<float4> colour_in,
+        TextureObject<float4> colour_out,
+        //TextureObject<float4> normals,
+        TextureObject<float> contribs) {
+    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x < colour_in.width() && y < colour_in.height()) {
+        const float4 colour = colour_in.tex2D((int)x,(int)y);
+        //const float4 normal = normals.tex2D((int)x,(int)y);
+        const float contrib = contribs.tex2D((int)x,(int)y);
+
+        if (contrib > 0.0f) {
+            colour_out(x,y) = make_float4(colour.x / contrib, colour.y / contrib, colour.z / contrib, 0);
+            //normals(x,y) = normal / contrib;
+        }
+    }
+}
+
 void ftl::cuda::dibr_normalise(TextureObject<float4> &colour_in, TextureObject<uchar4> &colour_out, TextureObject<float> &contribs, cudaStream_t stream) {
     const dim3 gridSize((colour_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
     const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
@@ -304,3 +420,11 @@ void ftl::cuda::dibr_normalise(TextureObject<float4> &colour_in, TextureObject<f
     dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(colour_in, colour_out, contribs);
     cudaSafeCall( cudaGetLastError() );
 }
+
+void ftl::cuda::dibr_normalise(TextureObject<float4> &colour_in, TextureObject<float4> &colour_out, TextureObject<float> &contribs, cudaStream_t stream) {
+    const dim3 gridSize((colour_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(colour_in, colour_out, contribs);
+    cudaSafeCall( cudaGetLastError() );
+}
diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp
index 8c57d58486c04aff5960f215c6a6308719e6af7b..7b39e0991686cc288b8046f3cc01fb3cdb41ddad 100644
--- a/components/renderers/cpp/src/splatter_cuda.hpp
+++ b/components/renderers/cpp/src/splatter_cuda.hpp
@@ -30,6 +30,15 @@ namespace cuda {
 		ftl::cuda::TextureObject<float> &contrib_out,
 		ftl::render::SplatParams &params, cudaStream_t stream);
 
+	void dibr_attribute(
+		ftl::cuda::TextureObject<float4> &in,	// Original colour image
+		ftl::cuda::TextureObject<float4> &points,		// Original 3D points
+		ftl::cuda::TextureObject<int> &depth_in,		// Virtual depth map
+		ftl::cuda::TextureObject<float4> &out,	// Accumulated output
+		//TextureObject<float4> normal_out,
+		ftl::cuda::TextureObject<float> &contrib_out,
+		ftl::render::SplatParams &params, cudaStream_t stream);
+
 	void dibr_normalise(
 		ftl::cuda::TextureObject<float4> &in,
 		ftl::cuda::TextureObject<uchar4> &out,
@@ -41,6 +50,12 @@ namespace cuda {
 		ftl::cuda::TextureObject<float> &out,
 		ftl::cuda::TextureObject<float> &contribs,
 		cudaStream_t stream);
+
+	void dibr_normalise(
+		ftl::cuda::TextureObject<float4> &in,
+		ftl::cuda::TextureObject<float4> &out,
+		ftl::cuda::TextureObject<float> &contribs,
+		cudaStream_t stream);
 }
 }
 
diff --git a/components/rgbd-sources/include/ftl/rgbd/channels.hpp b/components/rgbd-sources/include/ftl/rgbd/channels.hpp
index 9bf731a5319fa47c501a91e09f1e2acc48c5a4a8..d1d8dcdbf2f66841f5e12da01d04a4438e145d23 100644
--- a/components/rgbd-sources/include/ftl/rgbd/channels.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/channels.hpp
@@ -103,6 +103,7 @@ static const Channels kAllChannels(0xFFFFFFFFu);
 inline bool isFloatChannel(ftl::rgbd::Channel chan) {
 	switch (chan) {
 	case Channel::Depth		:
+    case Channel::Normals   :
 	case Channel::Energy	: return true;
 	default					: return false;
 	}
diff --git a/components/rgbd-sources/include/ftl/rgbd/frame.hpp b/components/rgbd-sources/include/ftl/rgbd/frame.hpp
index 252ff271de36336938d51d616cdd5a9e87d52187..3d7703fe10173008de03d9fa2c308ddc3237d848 100644
--- a/components/rgbd-sources/include/ftl/rgbd/frame.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/frame.hpp
@@ -83,6 +83,8 @@ public:
 	template <typename T>
 	ftl::cuda::TextureObject<T> &createTexture(ftl::rgbd::Channel c);
 
+	void resetTexture(ftl::rgbd::Channel c);
+
 	/**
 	 * Reset all channels without releasing memory.
 	 */
@@ -203,6 +205,7 @@ ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::rgbd::Channel c, const ft
 	}
 
 	if (m.gpu.type() != ftl::traits::OpenCVType<T>::value) {
+		LOG(ERROR) << "Texture type mismatch: " << (int)c << " " << m.gpu.type() << " != " << ftl::traits::OpenCVType<T>::value;
 		throw ftl::exception("Texture type does not match underlying data type");
 	}
 
@@ -235,6 +238,7 @@ ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::rgbd::Channel c) {
 	}
 
 	if (m.gpu.type() != ftl::traits::OpenCVType<T>::value) {
+		LOG(ERROR) << "Texture type mismatch: " << (int)c << " " << m.gpu.type() << " != " << ftl::traits::OpenCVType<T>::value;
 		throw ftl::exception("Texture type does not match underlying data type");
 	}
 
diff --git a/components/rgbd-sources/src/frame.cpp b/components/rgbd-sources/src/frame.cpp
index a56a19355526d9cdcdf25faaecdc67c05d09469d..d18ff36cec7b3182a77c05f7b3591d9c759c7320 100644
--- a/components/rgbd-sources/src/frame.cpp
+++ b/components/rgbd-sources/src/frame.cpp
@@ -199,3 +199,8 @@ template <> cv::cuda::GpuMat &Frame::create(ftl::rgbd::Channel c) {
 	return m;
 }
 
+void Frame::resetTexture(ftl::rgbd::Channel c) {
+	auto &m = _get(c);
+	m.tex.free();
+}
+