diff --git a/components/renderers/cpp/include/ftl/cuda/warp.hpp b/components/renderers/cpp/include/ftl/cuda/warp.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9164b0eeeb8b3ef606aef4930f55b38a1afacdc4
--- /dev/null
+++ b/components/renderers/cpp/include/ftl/cuda/warp.hpp
@@ -0,0 +1,48 @@
+#ifndef _FTL_CUDA_WARP_HPP_
+#define _FTL_CUDA_WARP_HPP_
+
+#ifndef WARP_SIZE
+#define WARP_SIZE 32
+#endif
+
+#define FULL_MASK 0xffffffff
+
+namespace ftl {
+namespace cuda {
+
+__device__ inline float warpMin(float e) {
+	for (int i = WARP_SIZE/2; i > 0; i /= 2) {
+		const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
+		e = min(e, other);
+	}
+	return e;
+}
+
+__device__ inline float warpMax(float e) {
+	for (int i = WARP_SIZE/2; i > 0; i /= 2) {
+		const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
+		e = max(e, other);
+	}
+	return e;
+}
+
+__device__ inline float warpSum(float e) {
+	for (int i = WARP_SIZE/2; i > 0; i /= 2) {
+		const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
+		e += other;
+	}
+	return e;
+}
+
+__device__ inline int warpSum(int e) {
+	for (int i = WARP_SIZE/2; i > 0; i /= 2) {
+		const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
+		e += other;
+	}
+	return e;
+}
+
+}
+}
+
+#endif  // _FTL_CUDA_WARP_HPP_
diff --git a/components/renderers/cpp/include/ftl/render/splat_render.hpp b/components/renderers/cpp/include/ftl/render/splat_render.hpp
index 40bfa459b35e850d2147f74e33eabfaaddb3a261..5f8a8ba4940e910efd1e9b6f5a0f0e900fde1ff7 100644
--- a/components/renderers/cpp/include/ftl/render/splat_render.hpp
+++ b/components/renderers/cpp/include/ftl/render/splat_render.hpp
@@ -45,6 +45,8 @@ class Splatter : public ftl::render::Renderer {
 	bool clipping_;
 	float norm_filter_;
 	bool backcull_;
+	cv::Scalar background_;
+	bool splat_;
 };
 
 }
diff --git a/components/renderers/cpp/src/normals.cu b/components/renderers/cpp/src/normals.cu
index ad6c34cf4cadcda6f46fa677f48502099ef1a013..eec540ccd1ef022b91d6fb003b3cfa1d62a0a0a6 100644
--- a/components/renderers/cpp/src/normals.cu
+++ b/components/renderers/cpp/src/normals.cu
@@ -20,7 +20,7 @@ __global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output,
 		const float3 MC = make_float3(input.tex2D((int)x+0, (int)y-1)); //[(y-1)*width+(x+0)];
 		const float3 CM = make_float3(input.tex2D((int)x-1, (int)y+0)); //[(y+0)*width+(x-1)];
 
-		if(CC.x != MINF) { // && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) {
+		if(CC.x != MINF && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) {
 			const float3 n = cross(PC-MC, CP-CM);
 			const float  l = length(n);
 
diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp
index e66c11b651a6def941ed82ee767d76285ff86e34..26f8e893ee3846a06adadfa6d0fd42393d1e4c83 100644
--- a/components/renderers/cpp/src/splat_render.cpp
+++ b/components/renderers/cpp/src/splat_render.cpp
@@ -6,11 +6,14 @@
 
 #include <opencv2/core/cuda_stream_accessor.hpp>
 
+#include <string>
+
 using ftl::render::Splatter;
 using ftl::rgbd::Channel;
 using ftl::rgbd::Channels;
 using ftl::rgbd::Format;
 using cv::cuda::GpuMat;
+using std::stoul;
 
 static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) {
   Eigen::Affine3d rx =
@@ -22,6 +25,22 @@ static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) {
   return rz * rx * ry;
 }
 
+static cv::Scalar parseColour(const std::string &colour) {
+	std::string c = colour;
+	if (c[0] == '#') {
+		c.erase(0, 1);
+		unsigned long value = stoul(c.c_str(), nullptr, 16);
+		return cv::Scalar(
+			(value >> 0) & 0xff,
+			(value >> 8) & 0xff,
+			(value >> 16) & 0xff,
+			(value >> 24) & 0xff
+		);
+	}
+
+	return cv::Scalar(0,0,0,0);
+}
+
 Splatter::Splatter(nlohmann::json &config, ftl::rgbd::FrameSet *fs) : ftl::render::Renderer(config), scene_(fs) {
 	if (config["clipping"].is_object()) {
 		auto &c = config["clipping"];
@@ -59,6 +78,16 @@ Splatter::Splatter(nlohmann::json &config, ftl::rgbd::FrameSet *fs) : ftl::rende
 	on("back_cull", [this](const ftl::config::Event &e) {
 		backcull_ = value("back_cull", true);
 	});
+
+	splat_ = value("splatting", true);
+	on("splatting", [this](const ftl::config::Event &e) {
+		splat_ = value("splatting", true);
+	});
+
+	background_ = parseColour(value("background", std::string("#e0e0e0")));
+	on("background", [this](const ftl::config::Event &e) {
+		background_ = parseColour(value("background", std::string("#e0e0e0")));
+	});
 }
 
 Splatter::~Splatter() {
@@ -73,12 +102,12 @@ void Splatter::renderChannel(
 	cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
 	temp_.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(0x7FFFFFFF), cvstream);
 	temp_.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0x7FFFFFFF), cvstream);
-	temp_.get<GpuMat>(Channel::Colour).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
-	temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0.0f), cvstream);
+	//temp_.get<GpuMat>(Channel::Colour).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
+	//temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0.0f), cvstream);
 
 	if (scene_->frames.size() < 1) return;
-	bool is_float = scene_->frames[0].get<GpuMat>(channel).type() == CV_32F; //ftl::rgbd::isFloatChannel(channel);
-	bool is_4chan = scene_->frames[0].get<GpuMat>(channel).type() == CV_32FC4;
+	bool is_float = out.get<GpuMat>(channel).type() == CV_32F; //ftl::rgbd::isFloatChannel(channel);
+	bool is_4chan = out.get<GpuMat>(channel).type() == CV_32FC4;
 	
 	// Render each camera into virtual view
 	// TODO: Move out of renderChannel, this is a common step to all channels
@@ -94,19 +123,43 @@ void Splatter::renderChannel(
 		ftl::cuda::dibr_merge(
 			f.createTexture<float4>(Channel::Points),
 			f.createTexture<float4>(Channel::Normals),
-			temp_.getTexture<int>(Channel::Depth),
+			temp_.createTexture<int>(Channel::Depth2),
 			params, backcull_, stream
 		);
 
 		//LOG(INFO) << "DIBR DONE";
 	}
 
-	// TODO: Add the depth splatting step..
+	//temp_.createTexture<float4>(Channel::Colour);
+	//temp_.createTexture<float>(Channel::Contribution);
+	out.create<GpuMat>(Channel::Normals, Format<float4>(params.camera.width, params.camera.height));
 
-	temp_.createTexture<float4>(Channel::Colour);
-	temp_.createTexture<float>(Channel::Contribution);
+	// Create normals first
+	for (auto &f : scene_->frames) {
+		ftl::cuda::dibr_attribute(
+			f.createTexture<float4>(Channel::Normals),
+			f.createTexture<float4>(Channel::Points),
+			temp_.getTexture<int>(Channel::Depth2),
+			out.createTexture<float4>(Channel::Normals),
+			params, stream
+		);
+	}
+
+	//temp_.get<GpuMat>(Channel::Colour).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
+	//temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0.0f), cvstream);
+
+	if (is_4chan) {
+		temp_.create<GpuMat>(Channel::Colour2, Format<float4>(params.camera.width, params.camera.height));
+		temp_.get<GpuMat>(Channel::Colour2).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
+	} else if (is_float) {
+		temp_.create<GpuMat>(Channel::Colour2, Format<float>(params.camera.width, params.camera.height));
+		temp_.get<GpuMat>(Channel::Colour2).setTo(cv::Scalar(0.0f), cvstream);
+	} else {
+		temp_.create<GpuMat>(Channel::Colour2, Format<uchar4>(params.camera.width, params.camera.height));
+		temp_.get<GpuMat>(Channel::Colour2).setTo(cv::Scalar(0,0,0,0), cvstream);
+	}
 
-	// Accumulate attribute contributions for each pixel
+	// Create attribute first
 	for (auto &f : scene_->frames) {
 		// Convert colour from BGR to BGRA if needed
 		if (f.get<GpuMat>(channel).type() == CV_8UC3) {
@@ -116,70 +169,66 @@ void Splatter::renderChannel(
 			cv::cuda::swap(col, tmp);
 			cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA);
 		}
-	
-		if (is_float) {
-			ftl::cuda::dibr_attribute(
-				f.createTexture<float>(channel),
-				f.createTexture<float4>(Channel::Points),
-				temp_.getTexture<int>(Channel::Depth),
-				temp_.getTexture<float4>(Channel::Colour),
-				temp_.getTexture<float>(Channel::Contribution),
-				params, stream
-			);
-		} else if (is_4chan) {
+		
+		if (is_4chan) {
 			ftl::cuda::dibr_attribute(
 				f.createTexture<float4>(channel),
 				f.createTexture<float4>(Channel::Points),
-				temp_.getTexture<int>(Channel::Depth),
-				temp_.getTexture<float4>(Channel::Colour),
-				temp_.getTexture<float>(Channel::Contribution),
+				temp_.getTexture<int>(Channel::Depth2),
+				(splat_) ? temp_.createTexture<float4>(Channel::Colour2) : out.createTexture<float4>(channel),
 				params, stream
 			);
-		} else if (channel == Channel::Colour || channel == Channel::Right) {
+		} else if (is_float) {
 			ftl::cuda::dibr_attribute(
-				f.createTexture<uchar4>(Channel::Colour),
+				f.createTexture<float>(channel),
 				f.createTexture<float4>(Channel::Points),
-				temp_.getTexture<int>(Channel::Depth),
-				temp_.getTexture<float4>(Channel::Colour),
-				temp_.getTexture<float>(Channel::Contribution),
+				temp_.getTexture<int>(Channel::Depth2),
+				(splat_) ? temp_.createTexture<float>(Channel::Colour2) : out.createTexture<float>(channel),
 				params, stream
 			);
 		} else {
 			ftl::cuda::dibr_attribute(
 				f.createTexture<uchar4>(channel),
 				f.createTexture<float4>(Channel::Points),
-				temp_.getTexture<int>(Channel::Depth),
-				temp_.getTexture<float4>(Channel::Colour),
-				temp_.getTexture<float>(Channel::Contribution),
+				temp_.getTexture<int>(Channel::Depth2),
+				(splat_) ? temp_.createTexture<uchar4>(Channel::Colour2) : out.createTexture<uchar4>(channel),
 				params, stream
 			);
 		}
 	}
 
-	if (is_4chan) {
-		// Normalise attribute contributions
-		ftl::cuda::dibr_normalise(
-			temp_.createTexture<float4>(Channel::Colour),
-			out.createTexture<float4>(channel),
-			temp_.createTexture<float>(Channel::Contribution),
-			stream
-		);
-	} else if (is_float) {
-		// Normalise attribute contributions
-		ftl::cuda::dibr_normalise(
-			temp_.createTexture<float4>(Channel::Colour),
-			out.createTexture<float>(channel),
-			temp_.createTexture<float>(Channel::Contribution),
-			stream
-		);
-	} else {
-		// Normalise attribute contributions
-		ftl::cuda::dibr_normalise(
-			temp_.createTexture<float4>(Channel::Colour),
-			out.createTexture<uchar4>(channel),
-			temp_.createTexture<float>(Channel::Contribution),
-			stream
-		);
+	//out.get<GpuMat>(Channel::Left).setTo(cv::Scalar(0,0,0,0), cvstream);
+
+	// Now splat the points
+	if (splat_) {
+		if (is_4chan) {
+			ftl::cuda::splat(
+				out.getTexture<float4>(Channel::Normals),
+				temp_.getTexture<float4>(Channel::Colour2),
+				temp_.getTexture<int>(Channel::Depth2),
+				out.createTexture<float>(Channel::Depth),
+				out.createTexture<float4>(channel),
+				params, stream
+			);
+		} else if (is_float) {
+			ftl::cuda::splat(
+				out.getTexture<float4>(Channel::Normals),
+				temp_.getTexture<float>(Channel::Colour2),
+				temp_.getTexture<int>(Channel::Depth2),
+				out.createTexture<float>(Channel::Depth),
+				out.createTexture<float>(channel),
+				params, stream
+			);
+		} else {
+			ftl::cuda::splat(
+				out.getTexture<float4>(Channel::Normals),
+				temp_.getTexture<uchar4>(Channel::Colour2),
+				temp_.getTexture<int>(Channel::Depth2),
+				out.createTexture<float>(Channel::Depth),
+				out.createTexture<uchar4>(channel),
+				params, stream
+			);
+		}
 	}
 }
 
@@ -196,38 +245,14 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
 	out.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height));
 
 	// FIXME: Use source resolutions, not virtual resolution
-	temp_.create<GpuMat>(Channel::Colour, Format<float4>(camera.width, camera.height));
-	temp_.create<GpuMat>(Channel::Colour2, Format<uchar4>(camera.width, camera.height));
-	temp_.create<GpuMat>(Channel::Contribution, Format<float>(camera.width, camera.height));
+	//temp_.create<GpuMat>(Channel::Colour, Format<float4>(camera.width, camera.height));
+	//temp_.create<GpuMat>(Channel::Contribution, Format<float>(camera.width, camera.height));
 	temp_.create<GpuMat>(Channel::Depth, Format<int>(camera.width, camera.height));
 	temp_.create<GpuMat>(Channel::Depth2, Format<int>(camera.width, camera.height));
 	temp_.create<GpuMat>(Channel::Normals, Format<float4>(camera.width, camera.height));
 
 	cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
 
-	// Create buffers if they don't exist
-	/*if ((unsigned int)depth1_.width() != camera.width || (unsigned int)depth1_.height() != camera.height) {
-		depth1_ = ftl::cuda::TextureObject<int>(camera.width, camera.height);
-	}
-	if ((unsigned int)depth3_.width() != camera.width || (unsigned int)depth3_.height() != camera.height) {
-		depth3_ = ftl::cuda::TextureObject<int>(camera.width, camera.height);
-	}
-	if ((unsigned int)colour1_.width() != camera.width || (unsigned int)colour1_.height() != camera.height) {
-		colour1_ = ftl::cuda::TextureObject<uchar4>(camera.width, camera.height);
-	}
-	if ((unsigned int)colour_tmp_.width() != camera.width || (unsigned int)colour_tmp_.height() != camera.height) {
-		colour_tmp_ = ftl::cuda::TextureObject<float4>(camera.width, camera.height);
-	}
-	if ((unsigned int)normal1_.width() != camera.width || (unsigned int)normal1_.height() != camera.height) {
-		normal1_ = ftl::cuda::TextureObject<float4>(camera.width, camera.height);
-	}
-	if ((unsigned int)depth2_.width() != camera.width || (unsigned int)depth2_.height() != camera.height) {
-		depth2_ = ftl::cuda::TextureObject<float>(camera.width, camera.height);
-	}
-	if ((unsigned int)colour2_.width() != camera.width || (unsigned int)colour2_.height() != camera.height) {
-		colour2_ = ftl::cuda::TextureObject<uchar4>(camera.width, camera.height);
-	}*/
-
 	// Parameters object to pass to CUDA describing the camera
 	SplatParams params;
 	params.m_flags = 0;
@@ -242,7 +267,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
 	// Clear all channels to 0 or max depth
 
 	out.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(1000.0f), cvstream);
-	out.get<GpuMat>(Channel::Colour).setTo(cv::Scalar(76,76,76), cvstream);
+	out.get<GpuMat>(Channel::Colour).setTo(background_, cvstream);
 
 	//LOG(INFO) << "Render ready: " << camera.width << "," << camera.height;
 
@@ -289,7 +314,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
 	Channel chan = src->getChannel();
 	if (chan == Channel::Depth)
 	{
-		temp_.get<GpuMat>(Channel::Depth).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream);
+		//temp_.get<GpuMat>(Channel::Depth).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream);
 	} else if (chan == Channel::Normals) {
 		out.create<GpuMat>(Channel::Normals, Format<float4>(camera.width, camera.height));
 
@@ -297,7 +322,8 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
 		renderChannel(params, out, Channel::Normals, stream);
 
 		// Convert normal to single float value
-		ftl::cuda::normal_visualise(out.getTexture<float4>(Channel::Normals), temp_.getTexture<float>(Channel::Contribution), camera, params.m_viewMatrixInverse, stream);
+		temp_.create<GpuMat>(Channel::Contribution, Format<float>(camera.width, camera.height));
+		ftl::cuda::normal_visualise(out.getTexture<float4>(Channel::Normals), temp_.createTexture<float>(Channel::Contribution), camera, params.m_viewMatrixInverse, stream);
 
 		// Put in output as single float
 		cv::cuda::swap(temp_.get<GpuMat>(Channel::Contribution), out.create<GpuMat>(Channel::Normals));
@@ -315,7 +341,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
 		params.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix);
 		
 		out.create<GpuMat>(Channel::Right, Format<uchar4>(camera.width, camera.height));
-		out.get<GpuMat>(Channel::Right).setTo(cv::Scalar(76,76,76), cvstream);
+		out.get<GpuMat>(Channel::Right).setTo(background_, cvstream);
 		renderChannel(params, out, Channel::Right, stream);
 	} else if (chan != Channel::None) {
 		if (ftl::rgbd::isFloatChannel(chan)) {
@@ -323,7 +349,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
 			out.get<GpuMat>(chan).setTo(cv::Scalar(0.0f), cvstream);
 		} else {
 			out.create<GpuMat>(chan, Format<uchar4>(camera.width, camera.height));
-			out.get<GpuMat>(chan).setTo(cv::Scalar(76,76,76,255), cvstream);
+			out.get<GpuMat>(chan).setTo(background_, cvstream);
 		}
 		renderChannel(params, out, chan, stream);
 	}
diff --git a/components/renderers/cpp/src/splatter.cu b/components/renderers/cpp/src/splatter.cu
index c91673756663d1e2c4aefea5e785ff52d25a487d..a4f5d97ff91b62561cfa4fda990c44fc7790c72f 100644
--- a/components/renderers/cpp/src/splatter.cu
+++ b/components/renderers/cpp/src/splatter.cu
@@ -4,6 +4,8 @@
 #include <ftl/cuda_common.hpp>
 
 #include <ftl/cuda/weighting.hpp>
+#include <ftl/cuda/intersections.hpp>
+#include <ftl/cuda/warp.hpp>
 
 #define T_PER_BLOCK 8
 #define UPSAMPLE_FACTOR 1.8f
@@ -13,8 +15,17 @@
 #define MAX_ITERATIONS 32  // Note: Must be multiple of 32
 #define SPATIAL_SMOOTHING 0.005f
 
+#define ENERGY_THRESHOLD 0.1f
+#define SMOOTHING_MULTIPLIER_A 10.0f	// For surface search
+#define SMOOTHING_MULTIPLIER_B 4.0f		// For z contribution
+#define SMOOTHING_MULTIPLIER_C 2.0f		// For colour contribution
+
+#define ACCUM_DIAMETER 8
+
 using ftl::cuda::TextureObject;
 using ftl::render::SplatParams;
+using ftl::cuda::warpMin;
+using ftl::cuda::warpSum;
 
 /*
  * Pass 1: Directly render each camera into virtual view but with no upsampling
@@ -76,163 +87,59 @@ __device__ inline float4 make_float4(const uchar4 &c) {
     return make_float4(c.x,c.y,c.z,c.w);
 }
 
-
-#define ENERGY_THRESHOLD 0.1f
-#define SMOOTHING_MULTIPLIER_A 10.0f	// For surface search
-#define SMOOTHING_MULTIPLIER_B 4.0f		// For z contribution
-#define SMOOTHING_MULTIPLIER_C 1.0f		// For colour contribution
-
-#define ACCUM_DIAMETER 8
-
-/*
- * Pass 2: Accumulate attribute contributions if the points pass a visibility test.
- */
-__global__ void dibr_attribute_contrib_kernel(
-        TextureObject<uchar4> colour_in,    // Original colour image
-        TextureObject<float4> points,       // Original 3D points
-        TextureObject<int> depth_in,        // Virtual depth map
-        TextureObject<float4> colour_out,   // Accumulated output
-        //TextureObject<float4> normal_out,
-        TextureObject<float> contrib_out,
-        SplatParams params) {
-        
-	//const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
-
-	const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
-	//const int warp = tid / WARP_SIZE;
-	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
-	const int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	const float4 worldPos = points.tex2D(x, y);
-	//const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y));
-	if (worldPos.x == MINF) return;
-    //const float r = (camera.poseInverse * worldPos).z / camera.params.fx;
-
-	const float3 camPos = params.m_viewMatrix * make_float3(worldPos);
-	if (camPos.z < params.camera.minDepth) return;
-	if (camPos.z > params.camera.maxDepth) return;
-	const uint2 screenPos = params.camera.camToScreen<uint2>(camPos);
-
-    //const int upsample = 8; //min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z));
-
-	// Not on screen so stop now...
-	if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return;
-            
-    // Is this point near the actual surface and therefore a contributor?
-    const float d = ((float)depth_in.tex2D((int)screenPos.x, (int)screenPos.y)/1000.0f);
-    //if (abs(d - camPos.z) > DEPTH_THRESHOLD) return;
-
-	const float4 colour = (params.m_flags & ftl::render::kShowDisconMask && worldPos.w < 0.0f) ?
-			make_float4(0.0f,0.0f,255.0f,255.0f) :  // Show discontinuity mask in red
-			make_float4(colour_in.tex2D(x, y));
-    //const float4 normal = tex2D<float4>(camera.normal, x, y);
-
-	// Each thread in warp takes an upsample point and updates corresponding depth buffer.
-	const int lane = tid % WARP_SIZE;
-	for (int i=lane; i<ACCUM_DIAMETER*ACCUM_DIAMETER; i+=WARP_SIZE) {
-		const float u = (i % ACCUM_DIAMETER) - (ACCUM_DIAMETER / 2);
-		const float v = (i / ACCUM_DIAMETER) - (ACCUM_DIAMETER / 2);
-
-        // Use the depth buffer to determine this pixels 3D position in camera space
-        const float d = ((float)depth_in.tex2D(screenPos.x+u, screenPos.y+v)/1000.0f);
-		const float3 nearest = params.camera.screenToCam((int)(screenPos.x+u),(int)(screenPos.y+v),d);
-
-        // What is contribution of our current point at this pixel?
-        const float weight = ftl::cuda::spatialWeighting(nearest, camPos, SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx));
-        if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) {  // TODO: Use confidence threshold here
-            const float4 wcolour = colour * weight;
-			//const float4 wnormal = normal * weight;
-			
-			//printf("Z %f\n", d);
-
-            // Add this points contribution to the pixel buffer
-            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v), wcolour.x);
-            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+1, wcolour.y);
-            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+2, wcolour.z);
-            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+3, wcolour.w);
-            //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v), wnormal.x);
-            //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+1, wnormal.y);
-            //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+2, wnormal.z);
-            //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+3, wnormal.w);
-            atomicAdd(&contrib_out(screenPos.x+u, screenPos.y+v), weight);
-        }
-	}
+__device__ inline float4 make_float4(const float4 &v) {
+	return v;
 }
 
-__global__ void dibr_attribute_contrib_kernel(
-    TextureObject<float> colour_in,    // Original colour image
-    TextureObject<float4> points,       // Original 3D points
-    TextureObject<int> depth_in,        // Virtual depth map
-    TextureObject<float4> colour_out,   // Accumulated output
-    //TextureObject<float4> normal_out,
-    TextureObject<float> contrib_out,
-    SplatParams params) {
-    
-    //const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
-
-    const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
-    //const int warp = tid / WARP_SIZE;
-    const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
-    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+template <typename T>
+__device__ inline T make();
 
-    const float3 worldPos = make_float3(points.tex2D(x, y));
-    //const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y));
-    if (worldPos.x == MINF) return;
-    //const float r = (camera.poseInverse * worldPos).z / camera.params.fx;
-
-    const float3 camPos = params.m_viewMatrix * worldPos;
-    if (camPos.z < params.camera.minDepth) return;
-    if (camPos.z > params.camera.maxDepth) return;
-    const uint2 screenPos = params.camera.camToScreen<uint2>(camPos);
+template <>
+__device__ inline uchar4 make() {
+	return make_uchar4(0,0,0,0);
+}
 
-    const int upsample = 8; //min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z));
+template <>
+__device__ inline float4 make() {
+	return make_float4(0.0f,0.0f,0.0f,0.0f);
+}
 
-    // Not on screen so stop now...
-    if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return;
-            
-    // Is this point near the actual surface and therefore a contributor?
-    const float d = ((float)depth_in.tex2D((int)screenPos.x, (int)screenPos.y)/1000.0f);
-    //if (abs(d - camPos.z) > DEPTH_THRESHOLD) return;
+template <>
+__device__ inline float make() {
+	return 0.0f;
+}
 
-    // TODO:(Nick) Should just one thread load these to shared mem?
-    const float colour = (colour_in.tex2D(x, y));
-    //const float4 normal = tex2D<float4>(camera.normal, x, y);
+template <typename T>
+__device__ inline T make(const float4 &);
 
-    // Each thread in warp takes an upsample point and updates corresponding depth buffer.
-    const int lane = tid % WARP_SIZE;
-    for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
-        const float u = (i % upsample) - (upsample / 2);
-        const float v = (i / upsample) - (upsample / 2);
+template <>
+__device__ inline uchar4 make(const float4 &v) {
+	return make_uchar4((int)v.x, (int)v.y, (int)v.z, (int)v.w);
+}
 
-        // Use the depth buffer to determine this pixels 3D position in camera space
-        const float d = ((float)depth_in.tex2D(screenPos.x+u, screenPos.y+v)/1000.0f);
-        const float3 nearest = params.camera.screenToCam((int)(screenPos.x+u),(int)(screenPos.y+v),d);
-
-        // What is contribution of our current point at this pixel?
-        const float weight = ftl::cuda::spatialWeighting(nearest, camPos, SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx));
-        if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) {  // TODO: Use confidence threshold here
-            const float wcolour = colour * weight;
-            //const float4 wnormal = normal * weight;
-            
-            //printf("Z %f\n", d);
+template <>
+__device__ inline float4 make(const float4 &v) {
+	return v;
+}
 
-            // Add this points contribution to the pixel buffer
-            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v), wcolour);
-            atomicAdd(&contrib_out(screenPos.x+u, screenPos.y+v), weight);
-        }
-    }
+template <>
+__device__ inline float make(const float4 &v) {
+	return v.x;
 }
 
 /*
- * Pass 2: Accumulate attribute contributions if the points pass a visibility test.
+ * Pass 1b: Expand splats to full size and merge
  */
- __global__ void dibr_attribute_contrib_kernel(
-        TextureObject<float4> colour_in,    // Original colour image
-        TextureObject<float4> points,       // Original 3D points
+ template <int SEARCH_DIAMETER, typename T>
+ __global__ void splat_kernel(
+        //TextureObject<float4> points,       // Original 3D points
+        TextureObject<float4> normals,
+        TextureObject<T> in,
         TextureObject<int> depth_in,        // Virtual depth map
-        TextureObject<float4> colour_out,   // Accumulated output
-        //TextureObject<float4> normal_out,
-        TextureObject<float> contrib_out,
+        TextureObject<float> depth_out,   // Accumulated output
+        TextureObject<T> out,
+        //ftl::rgbd::Camera camera,
+        //float4x4 pose_inv,
         SplatParams params) {
         
     //const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
@@ -242,130 +149,247 @@ __global__ void dibr_attribute_contrib_kernel(
     const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
     const int y = blockIdx.y*blockDim.y + threadIdx.y;
 
-    const float3 worldPos = make_float3(points.tex2D(x, y));
-    //const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y));
-    if (worldPos.x == MINF) return;
-    //const float r = (camera.poseInverse * worldPos).z / camera.params.fx;
+    if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return;
 
-    const float3 camPos = params.m_viewMatrix * worldPos;
-    if (camPos.z < params.camera.minDepth) return;
-    if (camPos.z > params.camera.maxDepth) return;
-    const uint2 screenPos = params.camera.camToScreen<uint2>(camPos);
+    const float3 origin = params.m_viewMatrixInverse * make_float3(0.0f);
+    float3 ray = params.camera.screenToCam(x,y,1.0f);
+    ray = ray / length(ray);
+    const float scale = ray.z;
+    ray = params.m_viewMatrixInverse.getFloat3x3() * ray;
 
-    //const int upsample = 8; //min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z));
+    //float depth = 0.0f;
+    //float contrib = 0.0f;
+    float depth = 1000.0f;
 
-    // Not on screen so stop now...
-    if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return;
-            
-    // Is this point near the actual surface and therefore a contributor?
-    const float d = ((float)depth_in.tex2D((int)screenPos.x, (int)screenPos.y)/1000.0f);
-    //if (abs(d - camPos.z) > DEPTH_THRESHOLD) return;
+    struct Result {
+        float weight;
+        float depth;
+        T attr;
+    };
 
-    // TODO:(Nick) Should just one thread load these to shared mem?
-    const float4 colour = (colour_in.tex2D(x, y));
-    //const float4 normal = tex2D<float4>(camera.normal, x, y);
+    Result results[(SEARCH_DIAMETER*SEARCH_DIAMETER) / WARP_SIZE];
 
     // Each thread in warp takes an upsample point and updates corresponding depth buffer.
     const int lane = tid % WARP_SIZE;
-    for (int i=lane; i<ACCUM_DIAMETER*ACCUM_DIAMETER; i+=WARP_SIZE) {
-        const float u = (i % ACCUM_DIAMETER) - (ACCUM_DIAMETER / 2);
-        const float v = (i / ACCUM_DIAMETER) - (ACCUM_DIAMETER / 2);
+    for (int i=lane; i<SEARCH_DIAMETER*SEARCH_DIAMETER; i+=WARP_SIZE) {
+        const float u = (i % SEARCH_DIAMETER) - (SEARCH_DIAMETER / 2);
+        const float v = (i / SEARCH_DIAMETER) - (SEARCH_DIAMETER / 2);
+
+        results[i/WARP_SIZE] = {0.0f, 0.0f, make<T>()};
 
         // Use the depth buffer to determine this pixels 3D position in camera space
-        const float d = ((float)depth_in.tex2D(screenPos.x+u, screenPos.y+v)/1000.0f);
-        const float3 nearest = params.camera.screenToCam((int)(screenPos.x+u),(int)(screenPos.y+v),d);
-
-        // What is contribution of our current point at this pixel?
-        const float weight = ftl::cuda::spatialWeighting(nearest, camPos, SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx));
-        if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) {  // TODO: Use confidence threshold here
-            const float4 wcolour = colour * weight;
-            //const float4 wnormal = normal * weight;
-            
-            //printf("Z %f\n", d);
-
-            // Add this points contribution to the pixel buffer
-            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v), wcolour.x);
-            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+1, wcolour.y);
-            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+2, wcolour.z);
-            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+3, wcolour.w);
-            //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v), wnormal.x);
-            //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+1, wnormal.y);
-            //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+2, wnormal.z);
-            //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+3, wnormal.w);
-            atomicAdd(&contrib_out(screenPos.x+u, screenPos.y+v), weight);
+        const float d = ((float)depth_in.tex2D(x+u, y+v)/1000.0f);
+
+        if (d < params.camera.minDepth || d > params.camera.maxDepth) continue;
+
+        const float3 camPos = params.camera.screenToCam((int)(x+u),(int)(y+v),d);
+        const float3 camPos2 = params.camera.screenToCam((int)(x),(int)(y),d);
+        //if (length(camPos - camPos2) > 2.0f*(camPos.z/params.camera.fx)) continue;
+        const float3 worldPos = params.m_viewMatrixInverse * camPos;
+        //const float3 camPos2 = pose_inv * worldPos;
+        //const uint2 screenPos = camera.camToScreen<uint2>(camPos2);
+
+        //if (screenPos.x < points.width() && screenPos.y < points.height()) {
+            // Can now read points, normals and colours from source cam
+
+            // What is contribution of our current point at this pixel?
+            //const float3 p = make_float3(points.tex2D((int)screenPos.x, (int)screenPos.y));
+            //const float weight = ftl::cuda::spatialWeighting(worldPos, p, (camPos.z/params.camera.fx)); //*(camPos2.z/camera.fx));
+            //if (weight <= 0.0f) continue;
+
+            float3 n = make_float3(normals.tex2D((int)(x+u), (int)(y+v)));
+            const float l = length(n);
+            if (l == 0.0f) continue;
+            n /= l;
+
+            // Does the ray intersect plane of splat?
+            float t = 1000.0f;
+            if (ftl::cuda::intersectPlane(n, worldPos, origin, ray, t)) { //} && fabs(t-camPos.z) < 0.01f) {
+                //t *= (params.m_viewMatrix.getFloat3x3() * ray).z;
+                t *= scale;
+                const float3 camPos3 = params.camera.screenToCam((int)(x),(int)(y),t);
+                const float weight = ftl::cuda::spatialWeighting(camPos, camPos3, 2.0f*(camPos3.z/params.camera.fx)); //*(camPos2.z/camera.fx));
+                if (weight == 0.0f) continue;
+                //depth += t * weight;
+                //contrib += weight;
+                depth = min(depth, t);
+                results[i/WARP_SIZE] = {weight, t, in.tex2D((int)x+u, (int)y+v)}; //make_float2(t, weight);
+                //atomicMin(&depth_out(x,y), (int)(depth * 1000.0f));
+            }
+        //}
+    }
+
+    depth = warpMin(depth);
+
+    float adepth = 0.0f;
+    float contrib = 0.0f;
+    float4 attr = make_float4(0.0f);
+
+    // Loop over results array
+    for (int i=0; i<(SEARCH_DIAMETER*SEARCH_DIAMETER) / WARP_SIZE; ++i) {
+        if (results[i].depth - depth < 0.04f) {
+            adepth += results[i].depth * results[i].weight;
+            attr += make_float4(results[i].attr) * results[i].weight;
+            contrib += results[i].weight;
         }
     }
+
+    // Sum all attributes and contributions
+    adepth = warpSum(adepth);
+    attr.x = warpSum(attr.x);
+    attr.y = warpSum(attr.y);
+    attr.z = warpSum(attr.z);
+    contrib = warpSum(contrib);
+
+    if (lane == 0 && contrib > 0.0f) {
+        depth_out(x,y) = adepth / contrib;
+        out(x,y) = make<T>(attr / contrib);
+    }
 }
 
-void ftl::cuda::dibr_attribute(
-        TextureObject<uchar4> &colour_in,    // Original colour image
-        TextureObject<float4> &points,       // Original 3D points
+template <typename T>
+void ftl::cuda::splat(
+        TextureObject<float4> &normals,
+        TextureObject<T> &colour_in,
         TextureObject<int> &depth_in,        // Virtual depth map
-        TextureObject<float4> &colour_out,   // Accumulated output
-        //TextureObject<float4> normal_out,
-        TextureObject<float> &contrib_out,
-        SplatParams &params, cudaStream_t stream) {
+        TextureObject<float> &depth_out,
+        TextureObject<T> &colour_out,
+        const SplatParams &params, cudaStream_t stream) {
     const dim3 gridSize((depth_in.width() + 2 - 1)/2, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK);
+    const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK);
 
-    dibr_attribute_contrib_kernel<<<gridSize, blockSize, 0, stream>>>(
+    splat_kernel<8,T><<<gridSize, blockSize, 0, stream>>>(
+        normals,
         colour_in,
-        points,
         depth_in,
+        depth_out,
         colour_out,
-        contrib_out,
         params
     );
     cudaSafeCall( cudaGetLastError() );
 }
 
-void ftl::cuda::dibr_attribute(
-        TextureObject<float> &colour_in,    // Original colour image
-        TextureObject<float4> &points,       // Original 3D points
+template void ftl::cuda::splat<uchar4>(
+        TextureObject<float4> &normals,
+        TextureObject<uchar4> &colour_in,
         TextureObject<int> &depth_in,        // Virtual depth map
-        TextureObject<float4> &colour_out,   // Accumulated output
-        //TextureObject<float4> normal_out,
-        TextureObject<float> &contrib_out,
-        SplatParams &params, cudaStream_t stream) {
-    const dim3 gridSize((depth_in.width() + 2 - 1)/2, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-    const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK);
+        TextureObject<float> &depth_out,
+        TextureObject<uchar4> &colour_out,
+		const SplatParams &params, cudaStream_t stream);
+		
+template void ftl::cuda::splat<float4>(
+	TextureObject<float4> &normals,
+	TextureObject<float4> &colour_in,
+	TextureObject<int> &depth_in,        // Virtual depth map
+	TextureObject<float> &depth_out,
+	TextureObject<float4> &colour_out,
+	const SplatParams &params, cudaStream_t stream);
+
+template void ftl::cuda::splat<float>(
+	TextureObject<float4> &normals,
+	TextureObject<float> &colour_in,
+	TextureObject<int> &depth_in,        // Virtual depth map
+	TextureObject<float> &depth_out,
+	TextureObject<float> &colour_out,
+	const SplatParams &params, cudaStream_t stream);
 
-    dibr_attribute_contrib_kernel<<<gridSize, blockSize, 0, stream>>>(
-        colour_in,
-        points,
-        depth_in,
-        colour_out,
-        contrib_out,
-        params
-    );
-    cudaSafeCall( cudaGetLastError() );
+//==============================================================================
+
+template <typename T>
+__device__ inline T generateInput(const T &in, const SplatParams &params, const float4 &worldPos) {
+	return in;
+}
+
+template <>
+__device__ inline uchar4 generateInput(const uchar4 &in, const SplatParams &params, const float4 &worldPos) {
+	return (params.m_flags & ftl::render::kShowDisconMask && worldPos.w < 0.0f) ?
+		make_uchar4(0,0,255,255) :  // Show discontinuity mask in red
+		in;
 }
 
+/*
+ * Pass 2: Accumulate attribute contributions if the points pass a visibility test.
+ */
+ template <typename T>
+__global__ void dibr_attribute_contrib_kernel(
+        TextureObject<T> in,				// Attribute input
+        TextureObject<float4> points,       // Original 3D points
+        TextureObject<int> depth_in,        // Virtual depth map
+        TextureObject<T> out,			// Accumulated output
+        SplatParams params) {
+        
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x);
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	const float4 worldPos = points.tex2D(x, y);
+	if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return;
+
+	const float3 camPos = params.m_viewMatrix * make_float3(worldPos);
+	if (camPos.z < params.camera.minDepth) return;
+	if (camPos.z > params.camera.maxDepth) return;
+	const uint2 screenPos = params.camera.camToScreen<uint2>(camPos);
+
+	// Not on screen so stop now...
+	if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return;
+            
+    // Is this point near the actual surface and therefore a contributor?
+    const int d = depth_in.tex2D((int)screenPos.x, (int)screenPos.y);
+
+	const T input = generateInput(in.tex2D(x, y), params, worldPos);
+
+	//const float3 nearest = params.camera.screenToCam((int)(screenPos.x),(int)(screenPos.y),d);
+
+	//const float l = length(nearest - camPos);
+	if (d == (int)(camPos.z*1000.0f)) {
+		out(screenPos.x, screenPos.y) = input;
+	}
+}
+
+
+template <typename T>
 void ftl::cuda::dibr_attribute(
-        TextureObject<float4> &colour_in,    // Original colour image
+        TextureObject<T> &in,
         TextureObject<float4> &points,       // Original 3D points
         TextureObject<int> &depth_in,        // Virtual depth map
-        TextureObject<float4> &colour_out,   // Accumulated output
-        //TextureObject<float4> normal_out,
-        TextureObject<float> &contrib_out,
+        TextureObject<T> &out,   // Accumulated output
         SplatParams &params, cudaStream_t stream) {
-    const dim3 gridSize((depth_in.width() + 2 - 1)/2, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-    const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK);
+	const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
     dibr_attribute_contrib_kernel<<<gridSize, blockSize, 0, stream>>>(
-        colour_in,
+        in,
         points,
         depth_in,
-        colour_out,
-        contrib_out,
+        out,
         params
     );
     cudaSafeCall( cudaGetLastError() );
 }
 
+template void ftl::cuda::dibr_attribute<uchar4>(
+	ftl::cuda::TextureObject<uchar4> &in,	// Original colour image
+	ftl::cuda::TextureObject<float4> &points,		// Original 3D points
+	ftl::cuda::TextureObject<int> &depth_in,		// Virtual depth map
+	ftl::cuda::TextureObject<uchar4> &out,	// Accumulated output
+	ftl::render::SplatParams &params, cudaStream_t stream);
+
+template void ftl::cuda::dibr_attribute<float>(
+	ftl::cuda::TextureObject<float> &in,	// Original colour image
+	ftl::cuda::TextureObject<float4> &points,		// Original 3D points
+	ftl::cuda::TextureObject<int> &depth_in,		// Virtual depth map
+	ftl::cuda::TextureObject<float> &out,	// Accumulated output
+	ftl::render::SplatParams &params, cudaStream_t stream);
+
+template void ftl::cuda::dibr_attribute<float4>(
+	ftl::cuda::TextureObject<float4> &in,	// Original colour image
+	ftl::cuda::TextureObject<float4> &points,		// Original 3D points
+	ftl::cuda::TextureObject<int> &depth_in,		// Virtual depth map
+	ftl::cuda::TextureObject<float4> &out,	// Accumulated output
+	ftl::render::SplatParams &params, cudaStream_t stream);
+
 //==============================================================================
 
-__global__ void dibr_normalise_kernel(
+/*__global__ void dibr_normalise_kernel(
         TextureObject<float4> colour_in,
         TextureObject<uchar4> colour_out,
         //TextureObject<float4> normals,
@@ -447,4 +471,4 @@ void ftl::cuda::dibr_normalise(TextureObject<float4> &colour_in, TextureObject<f
 
     dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(colour_in, colour_out, contribs);
     cudaSafeCall( cudaGetLastError() );
-}
+}*/
diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp
index a90e1445b2bdd45235efff301b16652e5b496f16..1888a586720d388567a6c4d48f4a29b6b50848ac 100644
--- a/components/renderers/cpp/src/splatter_cuda.hpp
+++ b/components/renderers/cpp/src/splatter_cuda.hpp
@@ -14,50 +14,22 @@ namespace cuda {
 		bool culling,
 		cudaStream_t stream);
 
+	template <typename T>
+	void splat(
+        ftl::cuda::TextureObject<float4> &normals,
+		ftl::cuda::TextureObject<T> &colour_in,
+        ftl::cuda::TextureObject<int> &depth_in,        // Virtual depth map
+        ftl::cuda::TextureObject<float> &depth_out,
+		ftl::cuda::TextureObject<T> &colour_out,
+        const ftl::render::SplatParams &params, cudaStream_t stream);
+
+	template <typename T>
 	void dibr_attribute(
-		ftl::cuda::TextureObject<uchar4> &in,	// Original colour image
+		ftl::cuda::TextureObject<T> &in,	// Original colour image
 		ftl::cuda::TextureObject<float4> &points,		// Original 3D points
 		ftl::cuda::TextureObject<int> &depth_in,		// Virtual depth map
-		ftl::cuda::TextureObject<float4> &out,	// Accumulated output
-		//TextureObject<float4> normal_out,
-		ftl::cuda::TextureObject<float> &contrib_out,
+		ftl::cuda::TextureObject<T> &out,	// Accumulated output
 		ftl::render::SplatParams &params, cudaStream_t stream);
-
-	void dibr_attribute(
-		ftl::cuda::TextureObject<float> &in,	// Original colour image
-		ftl::cuda::TextureObject<float4> &points,		// Original 3D points
-		ftl::cuda::TextureObject<int> &depth_in,		// Virtual depth map
-		ftl::cuda::TextureObject<float4> &out,	// Accumulated output
-		//TextureObject<float4> normal_out,
-		ftl::cuda::TextureObject<float> &contrib_out,
-		ftl::render::SplatParams &params, cudaStream_t stream);
-
-	void dibr_attribute(
-		ftl::cuda::TextureObject<float4> &in,	// Original colour image
-		ftl::cuda::TextureObject<float4> &points,		// Original 3D points
-		ftl::cuda::TextureObject<int> &depth_in,		// Virtual depth map
-		ftl::cuda::TextureObject<float4> &out,	// Accumulated output
-		//TextureObject<float4> normal_out,
-		ftl::cuda::TextureObject<float> &contrib_out,
-		ftl::render::SplatParams &params, cudaStream_t stream);
-
-	void dibr_normalise(
-		ftl::cuda::TextureObject<float4> &in,
-		ftl::cuda::TextureObject<uchar4> &out,
-		ftl::cuda::TextureObject<float> &contribs,
-		cudaStream_t stream);
-
-	void dibr_normalise(
-		ftl::cuda::TextureObject<float4> &in,
-		ftl::cuda::TextureObject<float> &out,
-		ftl::cuda::TextureObject<float> &contribs,
-		cudaStream_t stream);
-
-	void dibr_normalise(
-		ftl::cuda::TextureObject<float4> &in,
-		ftl::cuda::TextureObject<float4> &out,
-		ftl::cuda::TextureObject<float> &contribs,
-		cudaStream_t stream);
 }
 }