From 90aec218a5b7d41e7ed8387928b024ade65a67e8 Mon Sep 17 00:00:00 2001
From: Nicolas Pope <nicolas.pope@utu.fi>
Date: Tue, 1 Oct 2019 12:24:24 +0300
Subject: [PATCH] Ongoing #133 improvements

---
 applications/reconstruct/CMakeLists.txt       |   4 +-
 applications/reconstruct/src/depth_camera.cpp |  55 ---
 applications/reconstruct/src/depth_camera.cu  | 355 -------------
 .../reconstruct/src/depth_camera_cuda.hpp     |  37 --
 applications/reconstruct/src/ilw.cpp          |   5 +
 applications/reconstruct/src/ilw.cu           |  14 +-
 applications/reconstruct/src/ilw/ilw.cpp      | 223 +++++++++
 applications/reconstruct/src/ilw/ilw.cu       | 205 ++++++++
 .../reconstruct/src/{ => ilw}/ilw.hpp         |  14 +-
 .../reconstruct/src/{ => ilw}/ilw_cuda.hpp    |  23 +
 applications/reconstruct/src/main.cpp         |  23 +-
 applications/reconstruct/src/{ => mls}/mls.cu |   0
 applications/reconstruct/src/point_cloud.hpp  |  36 --
 applications/reconstruct/src/ray_cast_sdf.cpp | 106 ----
 applications/reconstruct/src/ray_cast_sdf.cu  | 189 -------
 .../reconstruct/src/scene_rep_hash_sdf.cu     | 282 -----------
 .../reconstruct/src/virtual_source.cpp        |  89 ----
 applications/reconstruct/src/voxel_scene.cpp  | 466 ------------------
 components/codecs/src/nvpipe_encoder.cpp      |   4 +
 .../renderers/cpp/include/ftl/cuda/points.hpp |   6 +
 .../cpp/include/ftl/cuda/weighting.hpp        |  24 +-
 components/renderers/cpp/src/splat_render.cpp |  12 +-
 components/renderers/cpp/src/splatter.cu      |  14 +-
 components/rgbd-sources/src/streamer.cpp      |   2 +-
 components/rgbd-sources/src/virtual.cpp       |   2 -
 25 files changed, 552 insertions(+), 1638 deletions(-)
 delete mode 100644 applications/reconstruct/src/depth_camera.cpp
 delete mode 100644 applications/reconstruct/src/depth_camera.cu
 delete mode 100644 applications/reconstruct/src/depth_camera_cuda.hpp
 create mode 100644 applications/reconstruct/src/ilw/ilw.cpp
 create mode 100644 applications/reconstruct/src/ilw/ilw.cu
 rename applications/reconstruct/src/{ => ilw}/ilw.hpp (81%)
 rename applications/reconstruct/src/{ => ilw}/ilw_cuda.hpp (52%)
 rename applications/reconstruct/src/{ => mls}/mls.cu (100%)
 delete mode 100644 applications/reconstruct/src/point_cloud.hpp
 delete mode 100644 applications/reconstruct/src/ray_cast_sdf.cpp
 delete mode 100644 applications/reconstruct/src/ray_cast_sdf.cu
 delete mode 100644 applications/reconstruct/src/scene_rep_hash_sdf.cu
 delete mode 100644 applications/reconstruct/src/virtual_source.cpp
 delete mode 100644 applications/reconstruct/src/voxel_scene.cpp

diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt
index dcee7afa0..a2d3b43e3 100644
--- a/applications/reconstruct/CMakeLists.txt
+++ b/applications/reconstruct/CMakeLists.txt
@@ -15,8 +15,8 @@ set(REPSRC
 	#src/mls.cu
 	#src/depth_camera.cu
 	#src/depth_camera.cpp
-	src/ilw.cpp
-	src/ilw.cu
+	src/ilw/ilw.cpp
+	src/ilw/ilw.cu
 )
 
 add_executable(ftl-reconstruct ${REPSRC})
diff --git a/applications/reconstruct/src/depth_camera.cpp b/applications/reconstruct/src/depth_camera.cpp
deleted file mode 100644
index de310f592..000000000
--- a/applications/reconstruct/src/depth_camera.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-#include <loguru.hpp>
-#include <ftl/depth_camera.hpp>
-#include "depth_camera_cuda.hpp"
-#include <opencv2/core/cuda_stream_accessor.hpp>
-
-using ftl::voxhash::DepthCamera;
-using ftl::voxhash::DepthCameraCUDA;
-
-DepthCamera::DepthCamera() {
-	depth_tex_ = nullptr;
-	depth2_tex_ = nullptr;
-	points_tex_ = nullptr;
-	colour_tex_ = nullptr;
-	normal_tex_ = nullptr;
-}
-
-void DepthCamera::alloc(const DepthCameraParams& params, bool withNormals) { //! todo resizing???
-	depth_tex_ = new ftl::cuda::TextureObject<float>(params.m_imageWidth, params.m_imageHeight);
-	depth2_tex_ = new ftl::cuda::TextureObject<int>(params.m_imageWidth, params.m_imageHeight);
-	points_tex_ = new ftl::cuda::TextureObject<float4>(params.m_imageWidth, params.m_imageHeight);
-	colour_tex_ = new ftl::cuda::TextureObject<uchar4>(params.m_imageWidth, params.m_imageHeight);
-	data.depth = depth_tex_->cudaTexture();
-	data.depth2 = depth2_tex_->cudaTexture();
-	data.points = points_tex_->cudaTexture();
-	data.colour = colour_tex_->cudaTexture();
-	data.params = params;
-
-	//if (withNormals) {
-		normal_tex_ = new ftl::cuda::TextureObject<float4>(params.m_imageWidth, params.m_imageHeight);
-		data.normal = normal_tex_->cudaTexture();
-	//} else {
-	//	data.normal = 0;
-	//}
-}
-
-void DepthCamera::free() {
-	delete depth_tex_;
-	delete colour_tex_;
-	delete depth2_tex_;
-	delete points_tex_;
-	if (normal_tex_) delete normal_tex_;
-}
-
-void DepthCamera::updateData(const cv::Mat &depth, const cv::Mat &rgb, cv::cuda::Stream &stream) {
-	depth_tex_->upload(depth, cv::cuda::StreamAccessor::getStream(stream));
-	colour_tex_->upload(rgb, cv::cuda::StreamAccessor::getStream(stream));
-	//if (normal_mat_) {
-		//computeNormals(cv::cuda::StreamAccessor::getStream(stream));
-	//}
-}
-
-void DepthCamera::computeNormals(cudaStream_t stream) {
-	//ftl::cuda::point_cloud(*points_tex_, data, stream);
-	ftl::cuda::compute_normals(*depth_tex_, *normal_tex_, data, stream);
-}
diff --git a/applications/reconstruct/src/depth_camera.cu b/applications/reconstruct/src/depth_camera.cu
deleted file mode 100644
index 7a322eaf7..000000000
--- a/applications/reconstruct/src/depth_camera.cu
+++ /dev/null
@@ -1,355 +0,0 @@
-#include <ftl/cuda_common.hpp>
-#include <ftl/cuda_util.hpp>
-#include <ftl/depth_camera.hpp>
-#include "depth_camera_cuda.hpp"
-
-#define T_PER_BLOCK 16
-#define MINF __int_as_float(0xff800000)
-
-using ftl::voxhash::DepthCameraCUDA;
-using ftl::voxhash::HashData;
-using ftl::voxhash::HashParams;
-using ftl::cuda::TextureObject;
-using ftl::render::SplatParams;
-
-extern __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS];
-extern __constant__ HashParams c_hashParams;
-
-__global__ void clear_depth_kernel(ftl::cuda::TextureObject<float> depth) {
-	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	if (x < depth.width() && y < depth.height()) {
-		depth(x,y) = 1000.0f; //PINF;
-		//colour(x,y) = make_uchar4(76,76,82,0);
-	}
-}
-
-void ftl::cuda::clear_depth(const ftl::cuda::TextureObject<float> &depth, cudaStream_t stream) {
-	const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK);
-	clear_depth_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth);
-}
-
-__global__ void clear_depth_kernel(ftl::cuda::TextureObject<int> depth) {
-	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	if (x < depth.width() && y < depth.height()) {
-		depth(x,y) = 0x7FFFFFFF; //PINF;
-		//colour(x,y) = make_uchar4(76,76,82,0);
-	}
-}
-
-void ftl::cuda::clear_depth(const ftl::cuda::TextureObject<int> &depth, cudaStream_t stream) {
-	const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK);
-	clear_depth_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth);
-}
-
-__global__ void clear_to_zero_kernel(ftl::cuda::TextureObject<float> depth) {
-	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	if (x < depth.width() && y < depth.height()) {
-		depth(x,y) = 0.0f; //PINF;
-	}
-}
-
-void ftl::cuda::clear_to_zero(const ftl::cuda::TextureObject<float> &depth, cudaStream_t stream) {
-	const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK);
-	clear_to_zero_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth);
-}
-
-__global__ void clear_points_kernel(ftl::cuda::TextureObject<float4> depth) {
-	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	if (x < depth.width() && y < depth.height()) {
-		depth(x,y) = make_float4(MINF,MINF,MINF,MINF);
-		//colour(x,y) = make_uchar4(76,76,82,0);
-	}
-}
-
-void ftl::cuda::clear_points(const ftl::cuda::TextureObject<float4> &depth, cudaStream_t stream) {
-	const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK);
-	clear_points_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth);
-}
-
-__global__ void clear_colour_kernel(ftl::cuda::TextureObject<uchar4> depth) {
-	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	if (x < depth.width() && y < depth.height()) {
-		depth(x,y) = make_uchar4(76,76,76,76);
-		//colour(x,y) = make_uchar4(76,76,82,0);
-	}
-}
-
-void ftl::cuda::clear_colour(const ftl::cuda::TextureObject<uchar4> &depth, cudaStream_t stream) {
-	const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK);
-	clear_colour_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth);
-}
-
-__global__ void clear_colour_kernel(ftl::cuda::TextureObject<float4> depth) {
-	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	if (x < depth.width() && y < depth.height()) {
-		depth(x,y) = make_float4(0.0f);
-	}
-}
-
-void ftl::cuda::clear_colour(const ftl::cuda::TextureObject<float4> &depth, cudaStream_t stream) {
-	const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK);
-	clear_colour_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth);
-}
-
-// ===== Type convert =====
-
-template <typename A, typename B>
-__global__ void convert_kernel(const ftl::cuda::TextureObject<A> in, ftl::cuda::TextureObject<B> out, float scale) {
-	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	if (x < in.width() && y < in.height()) {
-		out(x,y) = ((float)in.tex2D((int)x,(int)y)) * scale;
-	}
-}
-
-void ftl::cuda::float_to_int(const ftl::cuda::TextureObject<float> &in, ftl::cuda::TextureObject<int> &out, float scale, cudaStream_t stream) {
-	const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-	convert_kernel<float,int><<<gridSize, blockSize, 0, stream>>>(in, out, scale);
-}
-
-void ftl::cuda::int_to_float(const ftl::cuda::TextureObject<int> &in, ftl::cuda::TextureObject<float> &out, float scale, cudaStream_t stream) {
-	const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-	convert_kernel<int,float><<<gridSize, blockSize, 0, stream>>>(in, out, scale);
-}
-
-/// ===== Median Filter ======
-
-#define WINDOW_SIZE 3
-#define MEDIAN_RADIUS 3
-#define MEDIAN_SIZE (((MEDIAN_RADIUS*2)+1)*((MEDIAN_RADIUS*2)+1))
-
-__global__ void medianFilterKernel(TextureObject<int> inputImageKernel, TextureObject<float> outputImagekernel)
-{
-	// Set row and colum for thread.
-	int row = blockIdx.y * blockDim.y + threadIdx.y;
-	int col = blockIdx.x * blockDim.x + threadIdx.x;
-	int filterVector[MEDIAN_SIZE] = {0};   //Take fiter window
-	if((row<=MEDIAN_RADIUS) || (col<=MEDIAN_RADIUS) || (row>=inputImageKernel.height()-MEDIAN_RADIUS) || (col>=inputImageKernel.width()-MEDIAN_RADIUS))
-				outputImagekernel(col, row) = 0.0f; //Deal with boundry conditions
-	else {
-		for (int v = -MEDIAN_RADIUS; v <= MEDIAN_RADIUS; v++) { 
-			for (int u = -MEDIAN_RADIUS; u <= MEDIAN_RADIUS; u++){
-				filterVector[(v+MEDIAN_RADIUS)*(2*MEDIAN_RADIUS+1)+u+MEDIAN_RADIUS] = inputImageKernel((col+u), (row+v));   // setup the filterign window.
-			}
-		}
-		for (int i = 0; i < MEDIAN_SIZE; i++) {
-			for (int j = i + 1; j < MEDIAN_SIZE; j++) {
-				if (filterVector[i] > filterVector[j]) { 
-					//Swap the variables.
-					char tmp = filterVector[i];
-					filterVector[i] = filterVector[j];
-					filterVector[j] = tmp;
-				}
-			}
-		}
-		outputImagekernel(col, row) = (float)filterVector[MEDIAN_SIZE/2+1] / 1000.0f;   //Set the output variables.
-	}
-}
-
-void ftl::cuda::median_filter(const ftl::cuda::TextureObject<int> &in, ftl::cuda::TextureObject<float> &out, cudaStream_t stream) {
-	const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-	medianFilterKernel<<<gridSize, blockSize, 0, stream>>>(in, out);
-}
-
-
-/// ===== Hole Fill =====
-
-__device__ inline float distance2(float3 a, float3 b) {
-	const float x = a.x-b.x;
-	const float y = a.y-b.y;
-	const float z = a.z-b.z;
-	return x*x+y*y+z*z;
-}
-
-#define SPLAT_RADIUS 7
-#define SPLAT_BOUNDS (2*SPLAT_RADIUS+T_PER_BLOCK+1)
-#define SPLAT_BUFFER_SIZE (SPLAT_BOUNDS*SPLAT_BOUNDS)
-
-__global__ void hole_fill_kernel(
-	TextureObject<int> depth_in,
-	TextureObject<float> depth_out, DepthCameraParams params) {
-	// Read an NxN region and
-	// - interpolate a depth value for this pixel
-	// - interpolate an rgb value for this pixel
-	// Must respect depth discontinuities.
-	// How much influence a given neighbour has depends on its depth value
-
-	__shared__ float3 positions[SPLAT_BUFFER_SIZE];
-
-	const float voxelSize = c_hashParams.m_virtualVoxelSize;
-
-	const int i = threadIdx.y*blockDim.y + threadIdx.x;
-	const int bx = blockIdx.x*blockDim.x;
-	const int by = blockIdx.y*blockDim.y;
-	const int x = bx + threadIdx.x;
-	const int y = by + threadIdx.y;
-
-	// const float camMinDepth = params.camera.m_sensorDepthWorldMin;
-	// const float camMaxDepth = params.camera.m_sensorDepthWorldMax;
-
-	for (int j=i; j<SPLAT_BUFFER_SIZE; j+=T_PER_BLOCK) {
-		const unsigned int sx = (j % SPLAT_BOUNDS)+bx-SPLAT_RADIUS;
-		const unsigned int sy = (j / SPLAT_BOUNDS)+by-SPLAT_RADIUS;
-		if (sx >= depth_in.width() || sy >= depth_in.height()) {
-			positions[j] = make_float3(1000.0f,1000.0f, 1000.0f);
-		} else {
-			positions[j] = params.kinectDepthToSkeleton(sx, sy, (float)depth_in.tex2D((int)sx,(int)sy) / 1000.0f);
-		}
-	}
-
-	__syncthreads();
-
-	if (x >= depth_in.width() || y >= depth_in.height()) return;
-
-	const float voxelSquared = voxelSize*voxelSize;
-	float mindepth = 1000.0f;
-	//int minidx = -1;
-	// float3 minpos;
-
-	//float3 validPos[MAX_VALID];
-	//int validIndices[MAX_VALID];
-	//int validix = 0;
-
-	for (int v=-SPLAT_RADIUS; v<=SPLAT_RADIUS; ++v) {
-		for (int u=-SPLAT_RADIUS; u<=SPLAT_RADIUS; ++u) {
-			//const int idx = (threadIdx.y+v)*SPLAT_BOUNDS+threadIdx.x+u;
-			const int idx = (threadIdx.y+v+SPLAT_RADIUS)*SPLAT_BOUNDS+threadIdx.x+u+SPLAT_RADIUS;
-
-			float3 posp = positions[idx];
-			const float d = posp.z;
-			//if (d < camMinDepth || d > camMaxDepth) continue;
-
-			float3 pos = params.kinectDepthToSkeleton(x, y, d);
-			float dist = distance2(pos, posp);
-
-			if (dist < voxelSquared) {
-				// Valid so check for minimum
-				//validPos[validix] = pos;
-				//validIndices[validix++] = idx;
-				if (d < mindepth) {
-					mindepth = d;
-					//minidx = idx;
-					// minpos = pos;
-				}
-			}
-		}
-	}
-
-	depth_out(x,y) = mindepth;
-}
-
-void ftl::cuda::hole_fill(const TextureObject<int> &depth_in,
-	const TextureObject<float> &depth_out, const DepthCameraParams &params, cudaStream_t stream) 
-{
-
-	const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-	hole_fill_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, params);
-	cudaSafeCall( cudaGetLastError() );
-
-	#ifdef _DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-	#endif
-}
-
-
-/// ===== Point cloud from depth =====
-
-__global__ void point_cloud_kernel(ftl::cuda::TextureObject<float4> output, DepthCameraCUDA depthCameraData)
-{
-	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	const int width = depthCameraData.params.m_imageWidth;
-	const int height = depthCameraData.params.m_imageHeight;
-
-	if (x < width && y < height) {
-		float depth = tex2D<float>(depthCameraData.depth, x, y);
-
-		output(x,y) = (depth >= depthCameraData.params.m_sensorDepthWorldMin && depth <= depthCameraData.params.m_sensorDepthWorldMax) ?
-			make_float4(depthCameraData.pose * depthCameraData.params.kinectDepthToSkeleton(x, y, depth), 0.0f) :
-			make_float4(MINF, MINF, MINF, MINF);
-	}
-}
-
-void ftl::cuda::point_cloud(ftl::cuda::TextureObject<float4> &output, const DepthCameraCUDA &depthCameraData, cudaStream_t stream) {
-	const dim3 gridSize((depthCameraData.params.m_imageWidth + T_PER_BLOCK - 1)/T_PER_BLOCK, (depthCameraData.params.m_imageHeight + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-	point_cloud_kernel<<<gridSize, blockSize, 0, stream>>>(output, depthCameraData);
-
-#ifdef _DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-#endif
-}
-
-/// ===== NORMALS =====
-
-
-__global__ void compute_normals_kernel(const ftl::cuda::TextureObject<float> input, ftl::cuda::TextureObject<float4> output, DepthCameraCUDA camera)
-{
-	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	const int width = output.width();
-
-	if(x >= output.width() || y >= output.height()) return;
-
-	output(x,y) = make_float4(MINF, MINF, MINF, MINF);
-
-	if(x > 0 && x < output.width()-1 && y > 0 && y < output.height()-1)
-	{
-		const float3 CC = camera.pose * camera.params.kinectDepthToSkeleton(x,y,input(x,y)); //input[(y+0)*width+(x+0)];
-		const float3 PC = camera.pose * camera.params.kinectDepthToSkeleton(x,y,input(x,y+1)); //input[(y+1)*width+(x+0)];
-		const float3 CP = camera.pose * camera.params.kinectDepthToSkeleton(x,y,input(x+1,y)); //input[(y+0)*width+(x+1)];
-		const float3 MC = camera.pose * camera.params.kinectDepthToSkeleton(x,y,input(x,y-1)); //input[(y-1)*width+(x+0)];
-		const float3 CM = camera.pose * camera.params.kinectDepthToSkeleton(x,y,input(x-1,y)); //input[(y+0)*width+(x-1)];
-
-		if(CC.x != MINF && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF)
-		{
-			const float3 n = cross(PC-MC, CP-CM);
-			const float  l = length(n);
-
-			if(l > 0.0f)
-			{
-				//if (x == 400 && y == 200) printf("Cam NORMX: %f\n", (n/-l).x);
-				output(x,y) = make_float4(n.x/-l, n.y/-l, n.z/-l, 0.0f); //make_float4(n/-l, 1.0f);
-			}
-		}
-	}
-}
-
-void ftl::cuda::compute_normals(const ftl::cuda::TextureObject<float> &input, ftl::cuda::TextureObject<float4> &output, const DepthCameraCUDA &camera, cudaStream_t stream) {
-	const dim3 gridSize((output.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (output.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-	compute_normals_kernel<<<gridSize, blockSize, 0, stream>>>(input, output, camera);
-
-#ifdef _DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-	//cutilCheckMsg(__FUNCTION__);
-#endif
-}
\ No newline at end of file
diff --git a/applications/reconstruct/src/depth_camera_cuda.hpp b/applications/reconstruct/src/depth_camera_cuda.hpp
deleted file mode 100644
index 26bfcad73..000000000
--- a/applications/reconstruct/src/depth_camera_cuda.hpp
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef _FTL_RECONSTRUCTION_CAMERA_CUDA_HPP_
-#define _FTL_RECONSTRUCTION_CAMERA_CUDA_HPP_
-
-#include <ftl/depth_camera.hpp>
-#include <ftl/voxel_hash.hpp>
-#include "splat_params.hpp"
-
-namespace ftl {
-namespace cuda {
-
-void clear_depth(const TextureObject<float> &depth, cudaStream_t stream);
-void clear_depth(const TextureObject<int> &depth, cudaStream_t stream);
-void clear_to_zero(const ftl::cuda::TextureObject<float> &depth, cudaStream_t stream);
-void clear_points(const ftl::cuda::TextureObject<float4> &depth, cudaStream_t stream);
-void clear_colour(const ftl::cuda::TextureObject<uchar4> &depth, cudaStream_t stream);
-void clear_colour(const ftl::cuda::TextureObject<float4> &depth, cudaStream_t stream);
-
-void median_filter(const ftl::cuda::TextureObject<int> &in, ftl::cuda::TextureObject<float> &out, cudaStream_t stream);
-
-void int_to_float(const ftl::cuda::TextureObject<int> &in, ftl::cuda::TextureObject<float> &out, float scale, cudaStream_t stream);
-
-void float_to_int(const ftl::cuda::TextureObject<float> &in, ftl::cuda::TextureObject<int> &out, float scale, cudaStream_t stream);
-
-void mls_smooth(TextureObject<float4> &output, const ftl::voxhash::HashParams &hashParams, int numcams, int cam, cudaStream_t stream);
-
-void mls_render_depth(const TextureObject<int> &input, TextureObject<int> &output, const ftl::render::SplatParams &params, int numcams, cudaStream_t stream);
-
-void hole_fill(const TextureObject<int> &depth_in, const TextureObject<float> &depth_out, const DepthCameraParams &params, cudaStream_t stream);
-
-void point_cloud(ftl::cuda::TextureObject<float4> &output, const ftl::voxhash::DepthCameraCUDA &depthCameraData, cudaStream_t stream);
-
-void compute_normals(const ftl::cuda::TextureObject<float> &depth, ftl::cuda::TextureObject<float4> &normals, const ftl::voxhash::DepthCameraCUDA &camera, cudaStream_t stream);
-
-}
-}
-
-#endif  // _FTL_RECONSTRUCTION_CAMERA_CUDA_HPP_
diff --git a/applications/reconstruct/src/ilw.cpp b/applications/reconstruct/src/ilw.cpp
index 86a4cca5e..c1bf8499a 100644
--- a/applications/reconstruct/src/ilw.cpp
+++ b/applications/reconstruct/src/ilw.cpp
@@ -66,6 +66,11 @@ bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
         f.createTexture<float4>(Channel::EnergyVector, Format<float4>(f.get<GpuMat>(Channel::Colour).size()));
         f.createTexture<float>(Channel::Energy, Format<float>(f.get<GpuMat>(Channel::Colour).size()));
         f.createTexture<uchar4>(Channel::Colour);
+
+		cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+
+		f.get<GpuMat>(Channel::EnergyVector).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
+		f.get<GpuMat>(Channel::Energy).setTo(cv::Scalar(0.0f), cvstream);
     }
 
     return true;
diff --git a/applications/reconstruct/src/ilw.cu b/applications/reconstruct/src/ilw.cu
index 90133a3a5..999b5ec90 100644
--- a/applications/reconstruct/src/ilw.cu
+++ b/applications/reconstruct/src/ilw.cu
@@ -30,9 +30,14 @@ __global__ void correspondence_energy_vector_kernel(
 	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
     const int y = blockIdx.y*blockDim.y + threadIdx.y;
     
-    const float3 world1 = make_float3(p1.tex2D(x, y));
+	const float3 world1 = make_float3(p1.tex2D(x, y));
+	if (world1.x == MINF) {
+        vout(x,y) = make_float4(0.0f);
+        eout(x,y) = 0.0f;
+        return;
+    }
     const float3 camPos2 = pose2 * world1;
-    const uint2 screen2 = cam2.camToScreen<uint2>(camPos2);
+	const uint2 screen2 = cam2.camToScreen<uint2>(camPos2);
 
     const int upsample = 8;
 
@@ -43,12 +48,11 @@ __global__ void correspondence_energy_vector_kernel(
 		const float u = (i % upsample) - (upsample / 2);
         const float v = (i / upsample) - (upsample / 2);
         
-        const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v));
+		const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v));
+		if (world2.x == MINF) continue;
 
         // Determine degree of correspondence
         const float confidence = 1.0f / length(world1 - world2);
-
-        printf("conf %f\n", confidence);
         const float maxconf = warpMax(confidence);
 
         // This thread has best confidence value
diff --git a/applications/reconstruct/src/ilw/ilw.cpp b/applications/reconstruct/src/ilw/ilw.cpp
new file mode 100644
index 000000000..65e1a19df
--- /dev/null
+++ b/applications/reconstruct/src/ilw/ilw.cpp
@@ -0,0 +1,223 @@
+#include "ilw.hpp"
+#include <ftl/utility/matrix_conversion.hpp>
+#include <ftl/rgbd/source.hpp>
+#include <ftl/cuda/points.hpp>
+#include <loguru.hpp>
+
+#include "ilw_cuda.hpp"
+
+using ftl::ILW;
+using ftl::detail::ILWData;
+using ftl::rgbd::Channel;
+using ftl::rgbd::Channels;
+using ftl::rgbd::Format;
+using cv::cuda::GpuMat;
+
+ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) {
+    enabled_ = value("ilw_align", true);
+    iterations_ = value("iterations", 1);
+    motion_rate_ = value("motion_rate", 0.4f);
+    motion_window_ = value("motion_window", 3);
+    use_lab_ = value("use_Lab", false);
+    params_.colour_smooth = value("colour_smooth", 50.0f);
+    params_.spatial_smooth = value("spatial_smooth", 0.04f);
+    params_.cost_ratio = value("cost_ratio", 0.75f);
+
+    on("ilw_align", [this](const ftl::config::Event &e) {
+        enabled_ = value("ilw_align", true);
+    });
+
+    on("iterations", [this](const ftl::config::Event &e) {
+        iterations_ = value("iterations", 1);
+    });
+
+    on("motion_rate", [this](const ftl::config::Event &e) {
+        motion_rate_ = value("motion_rate", 0.4f);
+    });
+
+    on("motion_window", [this](const ftl::config::Event &e) {
+        motion_window_ = value("motion_window", 3);
+    });
+
+    on("use_Lab", [this](const ftl::config::Event &e) {
+        use_lab_ = value("use_Lab", false);
+    });
+
+    on("colour_smooth", [this](const ftl::config::Event &e) {
+        params_.colour_smooth = value("colour_smooth", 50.0f);
+    });
+
+    on("spatial_smooth", [this](const ftl::config::Event &e) {
+        params_.spatial_smooth = value("spatial_smooth", 0.04f);
+    });
+
+    on("cost_ratio", [this](const ftl::config::Event &e) {
+        params_.cost_ratio = value("cost_ratio", 75.0f);
+    });
+
+    params_.flags = 0;
+    if (value("ignore_bad", false)) params_.flags |= ftl::cuda::kILWFlag_IgnoreBad;
+    if (value("ignore_bad_colour", false)) params_.flags |= ftl::cuda::kILWFlag_SkipBadColour;
+    if (value("restrict_z", true)) params_.flags |= ftl::cuda::kILWFlag_RestrictZ;
+
+    on("ignore_bad", [this](const ftl::config::Event &e) {
+        if (value("ignore_bad", false)) params_.flags |= ftl::cuda::kILWFlag_IgnoreBad;
+        else params_.flags &= ~ftl::cuda::kILWFlag_IgnoreBad;
+    });
+
+    on("ignore_bad_colour", [this](const ftl::config::Event &e) {
+        if (value("ignore_bad_colour", false)) params_.flags |= ftl::cuda::kILWFlag_SkipBadColour;
+        else params_.flags &= ~ftl::cuda::kILWFlag_SkipBadColour;
+    });
+
+    on("restrict_z", [this](const ftl::config::Event &e) {
+        if (value("restrict_z", false)) params_.flags |= ftl::cuda::kILWFlag_RestrictZ;
+        else params_.flags &= ~ftl::cuda::kILWFlag_RestrictZ;
+    });
+}
+
+ILW::~ILW() {
+
+}
+
+bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
+    if (!enabled_) return false;
+
+    _phase0(fs, stream);
+
+    for (int i=0; i<iterations_; ++i) {
+        int win;
+        switch (i) {
+        case 0: win = 17; break;
+        case 1: win = 9; break;
+        default: win = 5; break;
+        }
+        _phase1(fs, win, stream);
+        //for (int j=0; j<3; ++j) {
+            _phase2(fs, motion_rate_, stream);
+        //}
+
+		// TODO: Break if no time left
+    }
+
+    return true;
+}
+
+bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
+    // Make points channel...
+    for (size_t i=0; i<fs.frames.size(); ++i) {
+		auto &f = fs.frames[i];
+		auto *s = fs.sources[i];
+
+		if (f.empty(Channel::Depth + Channel::Colour)) {
+			LOG(ERROR) << "Missing required channel";
+			continue;
+		}
+			
+        auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size()));
+        auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse());
+        ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, stream);
+
+        // TODO: Create energy vector texture and clear it
+        // Create energy and clear it
+
+        // Convert colour from BGR to BGRA if needed
+		if (f.get<GpuMat>(Channel::Colour).type() == CV_8UC3) {
+            cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+			// Convert to 4 channel colour
+			auto &col = f.get<GpuMat>(Channel::Colour);
+			GpuMat tmp(col.size(), CV_8UC4);
+			cv::cuda::swap(col, tmp);
+            if (use_lab_) cv::cuda::cvtColor(tmp,tmp, cv::COLOR_BGR2Lab, 0, cvstream);
+			cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA, 0, cvstream);
+		}
+
+        f.createTexture<float4>(Channel::EnergyVector, Format<float4>(f.get<GpuMat>(Channel::Colour).size()));
+        f.createTexture<float>(Channel::Energy, Format<float>(f.get<GpuMat>(Channel::Colour).size()));
+        f.createTexture<uchar4>(Channel::Colour);
+    }
+
+    return true;
+}
+
+bool ILW::_phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream) {
+    // Run correspondence kernel to create an energy vector
+    cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+
+	// For each camera combination
+    for (size_t i=0; i<fs.frames.size(); ++i) {
+        auto &f1 = fs.frames[i];
+        f1.get<GpuMat>(Channel::EnergyVector).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
+		f1.get<GpuMat>(Channel::Energy).setTo(cv::Scalar(0.0f), cvstream);
+
+		Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0);
+		d1 = fs.sources[i]->getPose() * d1;
+
+        for (size_t j=0; j<fs.frames.size(); ++j) {
+            if (i == j) continue;
+
+            //LOG(INFO) << "Running phase1";
+
+            auto &f2 = fs.frames[j];
+            auto s1 = fs.sources[i];
+            auto s2 = fs.sources[j];
+
+			// Are cameras facing similar enough direction?
+			Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0);
+			d2 = fs.sources[j]->getPose() * d2;
+			// No, so skip this combination
+			if (d1.dot(d2) <= 0.0) continue;
+
+            auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse());
+            auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse());
+
+            try {
+            //Calculate energy vector to best correspondence
+            ftl::cuda::correspondence_energy_vector(
+                f1.getTexture<float4>(Channel::Points),
+                f2.getTexture<float4>(Channel::Points),
+                f1.getTexture<uchar4>(Channel::Colour),
+                f2.getTexture<uchar4>(Channel::Colour),
+                // TODO: Add normals and other things...
+                f1.getTexture<float4>(Channel::EnergyVector),
+                f1.getTexture<float>(Channel::Energy),
+                pose1,
+                pose2,
+                s2->parameters(),
+                params_,
+                win,
+                stream
+            );
+            } catch (ftl::exception &e) {
+                LOG(ERROR) << "Exception in correspondence: " << e.what();
+            }
+
+            //LOG(INFO) << "Correspondences done... " << i;
+        }
+    }
+
+    return true;
+}
+
+bool ILW::_phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream) {
+    // Run energies and motion kernel
+
+	// Smooth vectors across a window and iteratively
+	// strongly disagreeing vectors should cancel out
+	// A weak vector is overriden by a stronger one.
+
+    for (size_t i=0; i<fs.frames.size(); ++i) {
+        auto &f = fs.frames[i];
+
+        ftl::cuda::move_points(
+             f.getTexture<float4>(Channel::Points),
+             f.getTexture<float4>(Channel::EnergyVector),
+             fs.sources[i]->parameters(),
+             rate,
+             motion_window_,
+             stream
+        );
+    }
+
+    return true;
+}
diff --git a/applications/reconstruct/src/ilw/ilw.cu b/applications/reconstruct/src/ilw/ilw.cu
new file mode 100644
index 000000000..67c1c9d8d
--- /dev/null
+++ b/applications/reconstruct/src/ilw/ilw.cu
@@ -0,0 +1,205 @@
+#include "ilw_cuda.hpp"
+#include <ftl/cuda/weighting.hpp>
+
+using ftl::cuda::TextureObject;
+using ftl::rgbd::Camera;
+
+#define WARP_SIZE 32
+#define T_PER_BLOCK 8
+#define FULL_MASK 0xffffffff
+
+__device__ inline float warpMin(float e) {
+	for (int i = WARP_SIZE/2; i > 0; i /= 2) {
+		const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
+		e = min(e, other);
+	}
+	return e;
+}
+
+__device__ inline float warpSum(float e) {
+	for (int i = WARP_SIZE/2; i > 0; i /= 2) {
+		const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
+		e += other;
+	}
+	return e;
+}
+
+//#define COR_WIN_RADIUS 17
+//#define COR_WIN_SIZE (COR_WIN_RADIUS * COR_WIN_RADIUS)
+
+template<int COR_WIN_RADIUS> 
+__global__ void correspondence_energy_vector_kernel(
+        TextureObject<float4> p1,
+        TextureObject<float4> p2,
+        TextureObject<uchar4> c1,
+        TextureObject<uchar4> c2,
+        TextureObject<float4> vout,
+        TextureObject<float> eout,
+        float4x4 pose1,  // Inverse
+        float4x4 pose2,  // Inverse
+        Camera cam2, ftl::cuda::ILWParams params) {
+
+    // Each warp picks point in p1
+    const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+    
+    const float3 world1 = make_float3(p1.tex2D(x, y));
+    const uchar4 colour1 = c1.tex2D(x, y);
+	if (world1.x == MINF) return;
+    const float3 camPos2 = pose2 * world1;
+    const uint2 screen2 = cam2.camToScreen<uint2>(camPos2);
+    
+    float bestcost = 1.1f;
+    float avgcost = 0.0f;
+	float3 bestpoint;
+	int count = 0;
+
+    // Project to p2 using cam2
+    // Each thread takes a possible correspondence and calculates a weighting
+    const int lane = tid % WARP_SIZE;
+	for (int i=lane; i<COR_WIN_RADIUS*COR_WIN_RADIUS; i+=WARP_SIZE) {
+		const float u = (i % COR_WIN_RADIUS) - (COR_WIN_RADIUS / 2);
+        const float v = (i / COR_WIN_RADIUS) - (COR_WIN_RADIUS / 2);
+        
+        const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v));
+        if ((params.flags & ftl::cuda::kILWFlag_IgnoreBad) && world2.x == MINF) continue;
+        const uchar4 colour2 = c2.tex2D(screen2.x+u, screen2.y+v);
+
+        // Determine degree of correspondence
+		float cost = 1.0f - ftl::cuda::spatialWeighting(world1, world2, params.spatial_smooth);
+		// Point is too far away to even count
+		if (world2.x != MINF && cost == 1.0f) continue;
+
+        // Mix ratio of colour and distance costs
+        const float ccost = 1.0f - ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth);
+        if ((params.flags & ftl::cuda::kILWFlag_SkipBadColour) && ccost == 1.0f) continue;
+        cost = params.cost_ratio * (ccost) + (1.0f - params.cost_ratio) * cost;
+        //cost /= 2.0f;
+
+		++count;
+		avgcost += cost;
+        if (world2.x != MINF && cost < bestcost) {
+            bestpoint = world2;
+            bestcost = cost;
+        }
+    }
+
+	count = warpSum(count);
+    const float mincost = warpMin(bestcost);
+	bool best = mincost == bestcost;
+	avgcost = warpSum(avgcost) / count;
+    const float confidence = (avgcost - mincost);
+
+    if (best && mincost < 1.0f) {
+        float3 tvecA = pose1 * bestpoint;
+        float3 tvecB = pose1 * world1;
+        if (params.flags & ftl::cuda::kILWFlag_RestrictZ) {
+            tvecA.x = tvecB.x;
+            tvecA.y = tvecB.y;
+        }
+        tvecA = (pose1.getInverse() * tvecA) - world1;
+        vout(x,y) = vout.tex2D(x, y) + make_float4(
+            tvecA.x, // * (1.0f - mincost) * confidence,
+            tvecA.y, // * (1.0f - mincost) * confidence,
+            tvecA.z, // * (1.0f - mincost) * confidence,
+			(1.0f - mincost) * confidence);
+			
+		//eout(x,y) = max(eout(x,y), (length(bestpoint-world1) / 0.04f) * 7.0f);
+		//eout(x,y) = max(eout(x,y), (1.0f - mincost) * 7.0f);
+		//eout(x,y) = max(eout(x, y), (1.0f - mincost) * confidence * (length(bestpoint-world1) / 0.04f) * 12.0f);
+		eout(x,y) = max(eout(x, y), (1.0f - mincost) * confidence * 12.0f);
+		//eout(x,y) = max(eout(x, y), confidence * 12.0f);
+    } else if (mincost >= 1.0f && lane == 0) {
+        //vout(x,y) = make_float4(0.0f);
+        //eout(x,y) = 0.0f;
+    }
+}
+
+void ftl::cuda::correspondence_energy_vector(
+        TextureObject<float4> &p1,
+        TextureObject<float4> &p2,
+        TextureObject<uchar4> &c1,
+        TextureObject<uchar4> &c2,
+        TextureObject<float4> &vout,
+        TextureObject<float> &eout,
+        float4x4 &pose1,
+        float4x4 &pose2,
+        const Camera &cam2, const ILWParams &params, int win,
+        cudaStream_t stream) {
+
+    const dim3 gridSize((p1.width() + 2 - 1)/2, (p1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK);
+
+    //printf("COR SIZE %d,%d\n", p1.width(), p1.height());
+
+    switch (win) {
+    case 17     : correspondence_energy_vector_kernel<17><<<gridSize, blockSize, 0, stream>>>(p1, p2, c1, c2, vout, eout, pose1, pose2, cam2, params); break;
+    case 9      : correspondence_energy_vector_kernel<9><<<gridSize, blockSize, 0, stream>>>(p1, p2, c1, c2, vout, eout, pose1, pose2, cam2, params); break;
+    case 5      : correspondence_energy_vector_kernel<5><<<gridSize, blockSize, 0, stream>>>(p1, p2, c1, c2, vout, eout, pose1, pose2, cam2, params); break;
+    }
+    cudaSafeCall( cudaGetLastError() );
+}
+
+//==============================================================================
+
+//#define MOTION_RADIUS 9
+
+template <int MOTION_RADIUS>
+__global__ void move_points_kernel(
+    ftl::cuda::TextureObject<float4> p,
+    ftl::cuda::TextureObject<float4> ev,
+    ftl::rgbd::Camera camera,
+    float rate) {
+
+    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+    
+    if (x < p.width() && y < p.height()) {
+		const float4 world = p(x,y);
+		if (world.x == MINF) return;
+
+		float4 vec = make_float4(0.0f, 0.0f, 0.0f, 0.0f); //ev.tex2D((int)x,(int)y);
+		float contrib = 0.0f;
+
+		// Calculate screen space distortion with neighbours
+		for (int v=-MOTION_RADIUS; v<=MOTION_RADIUS; ++v) {
+			for (int u=-MOTION_RADIUS; u<=MOTION_RADIUS; ++u) {
+				const float4 vecn = ev.tex2D((int)x+u,(int)y+v);
+				const float3 pn = make_float3(p.tex2D((int)x+u,(int)y+v));
+				if (pn.x == MINF) continue;
+
+				const float s = ftl::cuda::spatialWeighting(pn, make_float3(world), 0.01f);
+				contrib += vecn.w * s;
+				vec += vecn.w * s * vecn;
+			}
+		}
+
+        if (vec.w > 0.0f) {
+            p(x,y) = world + rate * (vec / contrib);
+        }
+    }
+}
+
+
+void ftl::cuda::move_points(
+        ftl::cuda::TextureObject<float4> &p,
+        ftl::cuda::TextureObject<float4> &v,
+        const ftl::rgbd::Camera &camera,
+        float rate,
+        int radius,
+        cudaStream_t stream) {
+
+    const dim3 gridSize((p.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (p.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    switch (radius) {
+    case 9 : move_points_kernel<9><<<gridSize, blockSize, 0, stream>>>(p,v,camera,rate); break;
+    case 5 : move_points_kernel<5><<<gridSize, blockSize, 0, stream>>>(p,v,camera,rate); break;
+    case 3 : move_points_kernel<3><<<gridSize, blockSize, 0, stream>>>(p,v,camera,rate); break;
+    case 1 : move_points_kernel<1><<<gridSize, blockSize, 0, stream>>>(p,v,camera,rate); break;
+    case 0 : move_points_kernel<0><<<gridSize, blockSize, 0, stream>>>(p,v,camera,rate); break;
+    }
+
+    cudaSafeCall( cudaGetLastError() );
+}
diff --git a/applications/reconstruct/src/ilw.hpp b/applications/reconstruct/src/ilw/ilw.hpp
similarity index 81%
rename from applications/reconstruct/src/ilw.hpp
rename to applications/reconstruct/src/ilw/ilw.hpp
index 0be45d015..66de927a2 100644
--- a/applications/reconstruct/src/ilw.hpp
+++ b/applications/reconstruct/src/ilw/ilw.hpp
@@ -6,6 +6,8 @@
 #include <ftl/configurable.hpp>
 #include <vector>
 
+#include "ilw_cuda.hpp"
+
 namespace ftl {
 
 namespace detail {
@@ -42,6 +44,8 @@ class ILW : public ftl::Configurable {
      */
     bool process(ftl::rgbd::FrameSet &fs, cudaStream_t stream=0);
 
+    inline bool isLabColour() const { return use_lab_; }
+
     private:
     /*
      * Initialise data.
@@ -51,14 +55,20 @@ class ILW : public ftl::Configurable {
     /*
      * Find possible correspondences and a confidence value.
      */
-    bool _phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream);
+    bool _phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream);
 
     /*
      * Calculate energies and move the points.
      */
-    bool _phase2(ftl::rgbd::FrameSet &fs);
+    bool _phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream);
 
     std::vector<detail::ILWData> data_;
+    bool enabled_;
+    ftl::cuda::ILWParams params_;
+    int iterations_;
+    float motion_rate_;
+    int motion_window_;
+    bool use_lab_;
 };
 
 }
diff --git a/applications/reconstruct/src/ilw_cuda.hpp b/applications/reconstruct/src/ilw/ilw_cuda.hpp
similarity index 52%
rename from applications/reconstruct/src/ilw_cuda.hpp
rename to applications/reconstruct/src/ilw/ilw_cuda.hpp
index a01af7514..8c6c925b1 100644
--- a/applications/reconstruct/src/ilw_cuda.hpp
+++ b/applications/reconstruct/src/ilw/ilw_cuda.hpp
@@ -8,6 +8,18 @@
 namespace ftl {
 namespace cuda {
 
+struct ILWParams {
+    float spatial_smooth;
+    float colour_smooth;
+    float cost_ratio;
+    float threshold;
+    uint flags;
+};
+
+static const uint kILWFlag_IgnoreBad = 0x0001;
+static const uint kILWFlag_RestrictZ = 0x0002;
+static const uint kILWFlag_SkipBadColour = 0x0004;
+
 void correspondence_energy_vector(
     ftl::cuda::TextureObject<float4> &p1,
     ftl::cuda::TextureObject<float4> &p2,
@@ -15,8 +27,19 @@ void correspondence_energy_vector(
     ftl::cuda::TextureObject<uchar4> &c2,
     ftl::cuda::TextureObject<float4> &vout,
     ftl::cuda::TextureObject<float> &eout,
+    float4x4 &pose1,
     float4x4 &pose2,
     const ftl::rgbd::Camera &cam2,
+    const ILWParams &params, int win,
+    cudaStream_t stream
+);
+
+void move_points(
+    ftl::cuda::TextureObject<float4> &p,
+    ftl::cuda::TextureObject<float4> &v,
+    const ftl::rgbd::Camera &camera,
+    float rate,
+    int radius,
     cudaStream_t stream
 );
 
diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp
index f6548cf32..92a65ea44 100644
--- a/applications/reconstruct/src/main.cpp
+++ b/applications/reconstruct/src/main.cpp
@@ -16,7 +16,7 @@
 #include <ftl/rgbd/group.hpp>
 #include <ftl/threads.hpp>
 
-#include "ilw.hpp"
+#include "ilw/ilw.hpp"
 #include <ftl/render/splat_render.hpp>
 
 #include <string>
@@ -29,6 +29,8 @@
 
 #include <ftl/registration.hpp>
 
+#include <cuda_profiler_api.h>
+
 #ifdef WIN32
 #pragma comment(lib, "Rpcrt4.lib")
 #endif
@@ -135,8 +137,15 @@ static void run(ftl::Configurable *root) {
 	ftl::ILW *align = ftl::create<ftl::ILW>(root, "merge");
 
 	// Generate virtual camera render when requested by streamer
-	virt->onRender([splat,virt,&scene_B](ftl::rgbd::Frame &out) {
+	virt->onRender([splat,virt,&scene_B,align](ftl::rgbd::Frame &out) {
 		virt->setTimestamp(scene_B.timestamp);
+		// Do we need to convert Lab to BGR?
+		if (align->isLabColour()) {
+			for (auto &f : scene_B.frames) {
+				auto &col = f.get<cv::cuda::GpuMat>(Channel::Colour);
+				cv::cuda::cvtColor(col,col, cv::COLOR_Lab2BGR);
+			}
+		}
 		splat->render(virt, out);
 	});
 	stream->add(virt);
@@ -177,7 +186,7 @@ static void run(ftl::Configurable *root) {
 
 			// Send all frames to GPU, block until done?
 			scene_A.upload(Channel::Colour + Channel::Depth);  // TODO: (Nick) Add scene stream.
-			//align->process(scene_A);
+			align->process(scene_A);
 
 			// TODO: To use second GPU, could do a download, swap, device change,
 			// then upload to other device. Or some direct device-2-device copy.
@@ -188,14 +197,22 @@ static void run(ftl::Configurable *root) {
 		return true;
 	});
 
+	LOG(INFO) << "Shutting down...";
 	ftl::timer::stop();
 	slave.stop();
 	net->shutdown();
+	ftl::pool.stop();
+
+	cudaProfilerStop();
+
+	LOG(INFO) << "Deleting...";
+
 	delete align;
 	delete splat;
 	delete stream;
 	delete virt;
 	delete net;
+	LOG(INFO) << "Done.";
 }
 
 int main(int argc, char **argv) {
diff --git a/applications/reconstruct/src/mls.cu b/applications/reconstruct/src/mls/mls.cu
similarity index 100%
rename from applications/reconstruct/src/mls.cu
rename to applications/reconstruct/src/mls/mls.cu
diff --git a/applications/reconstruct/src/point_cloud.hpp b/applications/reconstruct/src/point_cloud.hpp
deleted file mode 100644
index 7b5f49c07..000000000
--- a/applications/reconstruct/src/point_cloud.hpp
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef _FTL_POINT_CLOUD_HPP_
-#define _FTL_POINT_CLOUD_HPP_
-
-namespace ftl {
-namespace pointcloud {
-
-
-struct VoxelPoint {
-	union {
-	uint64_t val;
-	struct {
-	uint16_t x : 12;  // Block X
-	uint16_t y : 12;  // Block Y
-	uint16_t z : 12;  // Block Z
-	uint16_t v : 9;   // Voxel offset in block 0-511
-	};
-	};
-};
-
-struct VoxelColour {
-	union {
-	uint32_t val;
-	struct {
-	uint8_t b;
-	uint8_t g;
-	uint8_t r;
-	uint8_t a;
-	};
-	};
-};
-
-
-}
-}
-
-#endif  // _FTL_POINT_CLOUD_HPP_
diff --git a/applications/reconstruct/src/ray_cast_sdf.cpp b/applications/reconstruct/src/ray_cast_sdf.cpp
deleted file mode 100644
index 1c12a762c..000000000
--- a/applications/reconstruct/src/ray_cast_sdf.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-//#include <stdafx.h>
-
-#include <ftl/voxel_hash.hpp>
-#include "compactors.hpp"
-#include "splat_render_cuda.hpp"
-
-//#include "Util.h"
-
-#include <ftl/ray_cast_sdf.hpp>
-
-
-extern "C" void renderCS(
-	const ftl::voxhash::HashData& hashData,
-	const RayCastData &rayCastData,
-	const RayCastParams &rayCastParams, cudaStream_t stream);
-
-extern "C" void computeNormals(float4* d_output, float3* d_input, unsigned int width, unsigned int height);
-extern "C" void convertDepthFloatToCameraSpaceFloat3(float3* d_output, float* d_input, float4x4 intrinsicsInv, unsigned int width, unsigned int height, const DepthCameraData& depthCameraData);
-
-extern "C" void resetRayIntervalSplatCUDA(RayCastData& data, const RayCastParams& params);
-extern "C" void rayIntervalSplatCUDA(const ftl::voxhash::HashData& hashData,
-								 const RayCastData &rayCastData, const RayCastParams &rayCastParams);
-
-extern "C" void nickRenderCUDA(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const RayCastData &rayCastData, const RayCastParams &params, cudaStream_t stream);
-
-
-
-void CUDARayCastSDF::create(const RayCastParams& params)
-{
-	m_params = params;
-	m_data.allocate(m_params);
-	//m_rayIntervalSplatting.OnD3D11CreateDevice(DXUTGetD3D11Device(), params.m_width, params.m_height);
-}
-
-void CUDARayCastSDF::destroy(void)
-{
-	m_data.free();
-	//m_rayIntervalSplatting.OnD3D11DestroyDevice();
-}
-
-//extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
-
-
-void CUDARayCastSDF::compactifyHashEntries(ftl::voxhash::HashData& hashData, ftl::voxhash::HashParams& hashParams, cudaStream_t stream) { //const DepthCameraData& depthCameraData) {
-		
-	ftl::cuda::compactifyVisible(hashData, hashParams, m_params.camera, stream);		//this version uses atomics over prefix sums, which has a much better performance
-	//std::cout << "Ray blocks = " << hashParams.m_numOccupiedBlocks << std::endl;
-	//hashData.updateParams(hashParams);	//make sure numOccupiedBlocks is updated on the GPU
-}
-
-void CUDARayCastSDF::render(ftl::voxhash::HashData& hashData, ftl::voxhash::HashParams& hashParams, const DepthCameraParams& cameraParams, const Eigen::Matrix4f& lastRigidTransform, cudaStream_t stream)
-{
-	//updateConstantDepthCameraParams(cameraParams);
-	//rayIntervalSplatting(hashData, hashParams, lastRigidTransform);
-	//m_data.d_rayIntervalSplatMinArray = m_rayIntervalSplatting.mapMinToCuda();
-	//m_data.d_rayIntervalSplatMaxArray = m_rayIntervalSplatting.mapMaxToCuda();
-
-	m_params.camera = cameraParams;
-	//m_params.m_numOccupiedSDFBlocks = hashParams.m_numOccupiedBlocks;
-	m_params.m_viewMatrix = MatrixConversion::toCUDA(lastRigidTransform.inverse());
-	m_params.m_viewMatrixInverse = MatrixConversion::toCUDA(lastRigidTransform);
-	//m_data.updateParams(m_params);
-
-	compactifyHashEntries(hashData, hashParams, stream);
-
-	if (hash_render_) {
-		//ftl::cuda::isosurface_point_image(hashData, hashParams, m_data, m_params, stream);
-	} else renderCS(hashData, m_data, m_params, stream);
-
-	//convertToCameraSpace(cameraData);
-	//if (!m_params.m_useGradients)
-	//{
-	//	computeNormals(m_data.d_normals, m_data.d_depth3, m_params.m_width, m_params.m_height);
-	//}
-
-	//m_rayIntervalSplatting.unmapCuda();
-
-}
-
-
-/*void CUDARayCastSDF::convertToCameraSpace(const DepthCameraData& cameraData)
-{
-	convertDepthFloatToCameraSpaceFloat3(m_data.d_depth3, m_data.d_depth, m_params.m_intrinsicsInverse, m_params.m_width, m_params.m_height, cameraData);
-	
-	if(!m_params.m_useGradients) {
-		computeNormals(m_data.d_normals, m_data.d_depth3, m_params.m_width, m_params.m_height);
-	}
-}*/
-
-/*void CUDARayCastSDF::rayIntervalSplatting(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const Eigen::Matrix4f& lastRigidTransform)
-{
-	if (hashParams.m_numOccupiedBlocks == 0)	return;
-
-	if (m_params.m_maxNumVertices <= 6*hashParams.m_numOccupiedBlocks) { // 6 verts (2 triangles) per block
-		throw std::runtime_error("not enough space for vertex buffer for ray interval splatting");
-	}
-
-	m_params.m_numOccupiedSDFBlocks = hashParams.m_numOccupiedBlocks;
-	m_params.m_viewMatrix = MatrixConversion::toCUDA(lastRigidTransform.inverse());
-	m_params.m_viewMatrixInverse = MatrixConversion::toCUDA(lastRigidTransform);
-
-	m_data.updateParams(m_params); // !!! debugging
-
-	//don't use ray interval splatting (cf CUDARayCastSDF.cu -> line 40
-	//m_rayIntervalSplatting.rayIntervalSplatting(DXUTGetD3D11DeviceContext(), hashData, cameraData, m_data, m_params, m_params.m_numOccupiedSDFBlocks*6);
-}*/
\ No newline at end of file
diff --git a/applications/reconstruct/src/ray_cast_sdf.cu b/applications/reconstruct/src/ray_cast_sdf.cu
deleted file mode 100644
index 10fd3e0b7..000000000
--- a/applications/reconstruct/src/ray_cast_sdf.cu
+++ /dev/null
@@ -1,189 +0,0 @@
-
-//#include <cuda_runtime.h>
-
-#include <ftl/cuda_matrix_util.hpp>
-
-#include <ftl/depth_camera.hpp>
-#include <ftl/voxel_hash.hpp>
-#include <ftl/ray_cast_util.hpp>
-
-#define T_PER_BLOCK 8
-#define NUM_GROUPS_X 1024
-
-#define NUM_CUDA_BLOCKS  10000
-
-//texture<float, cudaTextureType2D, cudaReadModeElementType> rayMinTextureRef;
-//texture<float, cudaTextureType2D, cudaReadModeElementType> rayMaxTextureRef;
-
-__global__ void renderKernel(ftl::voxhash::HashData hashData, RayCastData rayCastData, RayCastParams rayCastParams) 
-{
-	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	//const RayCastParams& rayCastParams = c_rayCastParams;
-
-	if (x < rayCastParams.m_width && y < rayCastParams.m_height) {
-		rayCastData.d_depth[y*rayCastParams.m_width+x] = MINF;
-		rayCastData.d_depth3[y*rayCastParams.m_width+x] = make_float3(MINF,MINF,MINF);
-		rayCastData.d_normals[y*rayCastParams.m_width+x] = make_float4(MINF,MINF,MINF,MINF);
-		rayCastData.d_colors[y*rayCastParams.m_width+x] = make_uchar3(0,0,0);
-
-		float3 camDir = normalize(rayCastParams.camera.kinectProjToCamera(x, y, 1.0f));
-		float3 worldCamPos = rayCastParams.m_viewMatrixInverse * make_float3(0.0f, 0.0f, 0.0f);
-		float4 w = rayCastParams.m_viewMatrixInverse * make_float4(camDir, 0.0f);
-		float3 worldDir = normalize(make_float3(w.x, w.y, w.z));
-
-		////use ray interval splatting
-		//float minInterval = tex2D(rayMinTextureRef, x, y);
-		//float maxInterval = tex2D(rayMaxTextureRef, x, y);
-
-		//don't use ray interval splatting
-		float minInterval = rayCastParams.m_minDepth;
-		float maxInterval = rayCastParams.m_maxDepth;
-
-		//if (minInterval == 0 || minInterval == MINF) minInterval = rayCastParams.m_minDepth;
-		//if (maxInterval == 0 || maxInterval == MINF) maxInterval = rayCastParams.m_maxDepth;
-		//TODO MATTHIAS: shouldn't this return in the case no interval is found?
-		if (minInterval == 0 || minInterval == MINF) return;
-		if (maxInterval == 0 || maxInterval == MINF) return;
-
-		// debugging 
-		//if (maxInterval < minInterval) {
-		//	printf("ERROR (%d,%d): [ %f, %f ]\n", x, y, minInterval, maxInterval);
-		//}
-
-		rayCastData.traverseCoarseGridSimpleSampleAll(hashData, rayCastParams, worldCamPos, worldDir, camDir, make_int3(x,y,1), minInterval, maxInterval);
-	} 
-}
-
-extern "C" void renderCS(const ftl::voxhash::HashData& hashData, const RayCastData &rayCastData, const RayCastParams &rayCastParams, cudaStream_t stream) 
-{
-
-	const dim3 gridSize((rayCastParams.m_width + T_PER_BLOCK - 1)/T_PER_BLOCK, (rayCastParams.m_height + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-	//cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
-	//cudaBindTextureToArray(rayMinTextureRef, rayCastData.d_rayIntervalSplatMinArray, channelDesc);
-	//cudaBindTextureToArray(rayMaxTextureRef, rayCastData.d_rayIntervalSplatMaxArray, channelDesc);
-
-	//printf("Ray casting render...\n");
-
-	renderKernel<<<gridSize, blockSize, 0, stream>>>(hashData, rayCastData, rayCastParams);
-
-#ifdef _DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-	//cutilCheckMsg(__FUNCTION__);
-#endif
-}
-
-
-/////////////////////////////////////////////////////////////////////////
-// ray interval splatting
-/////////////////////////////////////////////////////////////////////////
-
-/*__global__ void resetRayIntervalSplatKernel(RayCastData data) 
-{
-	uint idx = blockIdx.x + blockIdx.y * NUM_GROUPS_X;
-	data.point_cloud_[idx] = make_float3(MINF);
-}
-
-extern "C" void resetRayIntervalSplatCUDA(RayCastData& data, const RayCastParams& params)
-{
-	const dim3 gridSize(NUM_GROUPS_X, (params.m_maxNumVertices + NUM_GROUPS_X - 1) / NUM_GROUPS_X, 1); // ! todo check if need third dimension?
-	const dim3 blockSize(1, 1, 1);
-
-	resetRayIntervalSplatKernel<<<gridSize, blockSize>>>(data);
-
-#ifdef _DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-	//cutilCheckMsg(__FUNCTION__);
-#endif
-}*/
-
-/*__global__ void rayIntervalSplatKernel(ftl::voxhash::HashData hashData, DepthCameraData depthCameraData, RayCastData rayCastData, DepthCameraData cameraData) 
-{
-	uint idx = blockIdx.x + blockIdx.y * NUM_GROUPS_X;
-
-	const ftl::voxhash::HashEntry& entry = hashData.d_hashCompactified[idx];
-	if (entry.ptr != ftl::voxhash::FREE_ENTRY) {
-		//if (!hashData.isSDFBlockInCameraFrustumApprox(entry.pos)) return;
-		const RayCastParams &params = c_rayCastParams;
-		const float4x4& viewMatrix = params.m_viewMatrix;
-
-		float3 worldCurrentVoxel = hashData.SDFBlockToWorld(entry.pos);
-
-		float3 MINV = worldCurrentVoxel - c_hashParams.m_virtualVoxelSize / 2.0f;
-
-		float3 maxv = MINV+SDF_BLOCK_SIZE*c_hashParams.m_virtualVoxelSize;
-
-		float3 proj000 = cameraData.cameraToKinectProj(viewMatrix * make_float3(MINV.x, MINV.y, MINV.z));
-		float3 proj100 = cameraData.cameraToKinectProj(viewMatrix * make_float3(maxv.x, MINV.y, MINV.z));
-		float3 proj010 = cameraData.cameraToKinectProj(viewMatrix * make_float3(MINV.x, maxv.y, MINV.z));
-		float3 proj001 = cameraData.cameraToKinectProj(viewMatrix * make_float3(MINV.x, MINV.y, maxv.z));
-		float3 proj110 = cameraData.cameraToKinectProj(viewMatrix * make_float3(maxv.x, maxv.y, MINV.z));
-		float3 proj011 = cameraData.cameraToKinectProj(viewMatrix * make_float3(MINV.x, maxv.y, maxv.z));
-		float3 proj101 = cameraData.cameraToKinectProj(viewMatrix * make_float3(maxv.x, MINV.y, maxv.z));
-		float3 proj111 = cameraData.cameraToKinectProj(viewMatrix * make_float3(maxv.x, maxv.y, maxv.z));
-
-		// Tree Reduction Min
-		float3 min00 = fminf(proj000, proj100);
-		float3 min01 = fminf(proj010, proj001);
-		float3 min10 = fminf(proj110, proj011);
-		float3 min11 = fminf(proj101, proj111);
-
-		float3 min0 = fminf(min00, min01);
-		float3 min1 = fminf(min10, min11);
-
-		float3 minFinal = fminf(min0, min1);
-
-		// Tree Reduction Max
-		float3 max00 = fmaxf(proj000, proj100);
-		float3 max01 = fmaxf(proj010, proj001);
-		float3 max10 = fmaxf(proj110, proj011);
-		float3 max11 = fmaxf(proj101, proj111);
-
-		float3 max0 = fmaxf(max00, max01);
-		float3 max1 = fmaxf(max10, max11);
-
-		float3 maxFinal = fmaxf(max0, max1);
-
-		float depth = maxFinal.z;
-		if(params.m_splatMinimum == 1) {
-			depth = minFinal.z;
-		}
-		float depthWorld = cameraData.kinectProjToCameraZ(depth);
-
-		//uint addr = idx*4;
-		//rayCastData.d_vertexBuffer[addr] = make_float4(maxFinal.x, minFinal.y, depth, depthWorld);
-		//rayCastData.d_vertexBuffer[addr+1] = make_float4(minFinal.x, minFinal.y, depth, depthWorld);
-		//rayCastData.d_vertexBuffer[addr+2] = make_float4(maxFinal.x, maxFinal.y, depth, depthWorld);
-		//rayCastData.d_vertexBuffer[addr+3] = make_float4(minFinal.x, maxFinal.y, depth, depthWorld);
-
-		// Note (Nick) : Changed to create point cloud instead of vertex.
-		uint addr = idx;
-		rayCastData.point_cloud_[addr] = make_float3(maxFinal.x, maxFinal.y, depth);
-		//printf("Ray: %f\n", depth);
-
-		uint addr = idx*6;
-		rayCastData.d_vertexBuffer[addr] = make_float4(maxFinal.x, minFinal.y, depth, depthWorld);
-		rayCastData.d_vertexBuffer[addr+1] = make_float4(minFinal.x, minFinal.y, depth, depthWorld);
-		rayCastData.d_vertexBuffer[addr+2] = make_float4(maxFinal.x, maxFinal.y, depth, depthWorld);
-		rayCastData.d_vertexBuffer[addr+3] = make_float4(minFinal.x, minFinal.y, depth, depthWorld);
-		rayCastData.d_vertexBuffer[addr+4] = make_float4(maxFinal.x, maxFinal.y, depth, depthWorld);
-		rayCastData.d_vertexBuffer[addr+5] = make_float4(minFinal.x, maxFinal.y, depth, depthWorld);
-	}
-}
-
-extern "C" void rayIntervalSplatCUDA(const ftl::voxhash::HashData& hashData, const DepthCameraData& cameraData, const RayCastData &rayCastData, const RayCastParams &rayCastParams) 
-{
-	//printf("Ray casting...\n");
-	const dim3 gridSize(NUM_GROUPS_X, (rayCastParams.m_numOccupiedSDFBlocks + NUM_GROUPS_X - 1) / NUM_GROUPS_X, 1);
-	const dim3 blockSize(1, 1, 1);
-
-	rayIntervalSplatKernel<<<gridSize, blockSize>>>(hashData, cameraData, rayCastData, cameraData);
-
-#ifdef _DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-	//cutilCheckMsg(__FUNCTION__);
-#endif
-}  */
diff --git a/applications/reconstruct/src/scene_rep_hash_sdf.cu b/applications/reconstruct/src/scene_rep_hash_sdf.cu
deleted file mode 100644
index 4750d3e7e..000000000
--- a/applications/reconstruct/src/scene_rep_hash_sdf.cu
+++ /dev/null
@@ -1,282 +0,0 @@
-// From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/CUDASceneRepHashSDF.cu
-
-//#include <cutil_inline.h>
-//#include <cutil_math.h>
-//#include <vector_types.h>
-//#include <cuda_runtime.h>
-
-#include <ftl/cuda_matrix_util.hpp>
-
-#include <ftl/voxel_hash.hpp>
-#include <ftl/depth_camera.hpp>
-#include <ftl/ray_cast_params.hpp>
-
-#define T_PER_BLOCK 8
-
-using ftl::voxhash::HashData;
-using ftl::voxhash::HashParams;
-using ftl::voxhash::Voxel;
-using ftl::voxhash::HashEntry;
-using ftl::voxhash::FREE_ENTRY;
-
-// TODO (Nick) Use ftl::cuda::Texture (texture objects)
-//texture<float, cudaTextureType2D, cudaReadModeElementType> depthTextureRef;
-//texture<float4, cudaTextureType2D, cudaReadModeElementType> colorTextureRef;
-
-__device__ __constant__ HashParams c_hashParams;
-__device__ __constant__ RayCastParams c_rayCastParams;
-//__device__ __constant__ DepthCameraParams c_depthCameraParams;
-__device__ __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS];
-
-extern "C" void updateConstantHashParams(const HashParams& params) {
-
-	size_t size;
-	cudaSafeCall(cudaGetSymbolSize(&size, c_hashParams));
-	cudaSafeCall(cudaMemcpyToSymbol(c_hashParams, &params, size, 0, cudaMemcpyHostToDevice));
-	
-#ifdef DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-	//cutilCheckMsg(__FUNCTION__);
-#endif
-	}
-
-
-/*extern "C" void updateConstantRayCastParams(const RayCastParams& params) {
-	//printf("Update ray cast params\n");
-	size_t size;
-	cudaSafeCall(cudaGetSymbolSize(&size, c_rayCastParams));
-	cudaSafeCall(cudaMemcpyToSymbol(c_rayCastParams, &params, size, 0, cudaMemcpyHostToDevice));
-	
-#ifdef DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-	//cutilCheckMsg(__FUNCTION__);
-#endif
-
-}*/
-
-extern "C" void updateCUDACameraConstant(ftl::voxhash::DepthCameraCUDA *data, int count) {
-	if (count == 0 || count >= MAX_CAMERAS) return;
-	//printf("Update depth camera params\n");
-	size_t size;
-	cudaSafeCall(cudaGetSymbolSize(&size, c_cameras));
-	cudaSafeCall(cudaMemcpyToSymbol(c_cameras, data, sizeof(ftl::voxhash::DepthCameraCUDA)*count, 0, cudaMemcpyHostToDevice));
-	
-#ifdef DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-	//cutilCheckMsg(__FUNCTION__);
-#endif
-
-}
-
-/*__global__ void resetHeapKernel(HashData hashData) 
-{
-	const HashParams& hashParams = c_hashParams;
-	unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
-
-	if (idx == 0) {
-		hashData.d_heapCounter[0] = hashParams.m_numSDFBlocks - 1;	//points to the last element of the array
-	}
-	
-	if (idx < hashParams.m_numSDFBlocks) {
-
-		hashData.d_heap[idx] = hashParams.m_numSDFBlocks - idx - 1;
-		uint blockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE;
-		uint base_idx = idx * blockSize;
-		for (uint i = 0; i < blockSize; i++) {
-			hashData.deleteVoxel(base_idx+i);
-		}
-	}
-}*/
-
-__global__ void resetHashKernel(HashData hashData) 
-{
-	const HashParams& hashParams = c_hashParams;
-	const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
-	if (idx < hashParams.m_hashNumBuckets) {
-		hashData.deleteHashEntry(hashData.d_hash[idx]);
-		//hashData.deleteHashEntry(hashData.d_hashCompactified[idx]);
-	}
-}
-
-
-__global__ void resetHashBucketMutexKernel(HashData hashData) 
-{
-	const HashParams& hashParams = c_hashParams;
-	const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
-	if (idx < hashParams.m_hashNumBuckets) {
-		hashData.d_hashBucketMutex[idx] = FREE_ENTRY;
-	}
-}
-
-extern "C" void resetCUDA(HashData& hashData, const HashParams& hashParams)
-{
-	//{
-		//resetting the heap and SDF blocks
-		//const dim3 gridSize((hashParams.m_numSDFBlocks + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1);
-		//const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1);
-
-		//resetHeapKernel<<<gridSize, blockSize>>>(hashData);
-
-
-		//#ifdef _DEBUG
-		//	cudaSafeCall(cudaDeviceSynchronize());
-			//cutilCheckMsg(__FUNCTION__);
-		//#endif
-
-	//}
-
-	{
-		//resetting the hash
-		const dim3 gridSize((hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1);
-		const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1);
-
-		resetHashKernel<<<gridSize, blockSize>>>(hashData);
-
-		#ifdef _DEBUG
-			cudaSafeCall(cudaDeviceSynchronize());
-			//cutilCheckMsg(__FUNCTION__);
-		#endif
-	}
-
-	{
-		//resetting the mutex
-		const dim3 gridSize((hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1);
-		const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1);
-
-		resetHashBucketMutexKernel<<<gridSize, blockSize>>>(hashData);
-
-		#ifdef _DEBUG
-			cudaSafeCall(cudaDeviceSynchronize());
-			//cutilCheckMsg(__FUNCTION__);
-		#endif
-	}
-
-
-}
-
-extern "C" void resetHashBucketMutexCUDA(HashData& hashData, const HashParams& hashParams, cudaStream_t stream)
-{
-	const dim3 gridSize((hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1);
-	const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1);
-
-	resetHashBucketMutexKernel<<<gridSize, blockSize, 0, stream>>>(hashData);
-
-#ifdef _DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-	//cutilCheckMsg(__FUNCTION__);
-#endif
-}
-
-
-// Note: bitMask used for Streaming out code... could be set to nullptr if not streaming out
-// Note: Allocations might need to be around fat rays since multiple voxels could correspond
-// to same depth map pixel at larger distances.
-__global__ void allocKernel(HashData hashData, HashParams hashParams, int camnum) 
-{
-	//const HashParams& hashParams = c_hashParams;
-	const ftl::voxhash::DepthCameraCUDA camera = c_cameras[camnum];
-	const DepthCameraParams &cameraParams = camera.params;
-
-	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	if (x < cameraParams.m_imageWidth && y < cameraParams.m_imageHeight)
-	{
-		float d = tex2D<float>(camera.depth, x, y);
-		//if (d == MINF || d < cameraParams.m_sensorDepthWorldMin || d > cameraParams.m_sensorDepthWorldMax)	return;
-		if (d == MINF || d == 0.0f)	return;
-
-		if (d >= hashParams.m_maxIntegrationDistance) return;
-
-		// TODO (Nick) Use covariance to include a frustrum of influence
-		float t = hashData.getTruncation(d);
-		float minDepth = min(hashParams.m_maxIntegrationDistance, d-t);
-		float maxDepth = min(hashParams.m_maxIntegrationDistance, d+t);
-		if (minDepth >= maxDepth) return;
-
-		// Convert ray from image coords to world
-		// Does kinectDepthToSkeleton convert pixel values to coordinates using
-		// camera intrinsics? Same as what reprojectTo3D does in OpenCV?
-		float3 rayMin = cameraParams.kinectDepthToSkeleton(x, y, minDepth);
-		// Is the rigid transform then the estimated camera pose?
-		rayMin = camera.pose * rayMin;
-		//printf("Ray min: %f,%f,%f\n", rayMin.x, rayMin.y, rayMin.z);
-		float3 rayMax = cameraParams.kinectDepthToSkeleton(x, y, maxDepth);
-		rayMax = camera.pose * rayMax;
-
-		float3 rayDir = normalize(rayMax - rayMin);
-	
-		// Only ray cast from min possible depth to max depth
-		int3 idCurrentVoxel = hashData.worldToSDFBlock(rayMin);
-		int3 idEnd = hashData.worldToSDFBlock(rayMax);
-		
-		float3 step = make_float3(sign(rayDir));
-		float3 boundaryPos = hashData.SDFBlockToWorld(idCurrentVoxel+make_int3(clamp(step, 0.0, 1.0f)))-0.5f*hashParams.m_virtualVoxelSize;
-		float3 tMax = (boundaryPos-rayMin)/rayDir;
-		float3 tDelta = (step*SDF_BLOCK_SIZE*hashParams.m_virtualVoxelSize)/rayDir;
-		int3 idBound = make_int3(make_float3(idEnd)+step);
-
-		//#pragma unroll
-		//for(int c = 0; c < 3; c++) {
-		//	if (rayDir[c] == 0.0f) { tMax[c] = PINF; tDelta[c] = PINF; }
-		//	if (boundaryPos[c] - rayMin[c] == 0.0f) { tMax[c] = PINF; tDelta[c] = PINF; }
-		//}
-		if (rayDir.x == 0.0f) { tMax.x = PINF; tDelta.x = PINF; }
-		if (boundaryPos.x - rayMin.x == 0.0f) { tMax.x = PINF; tDelta.x = PINF; }
-
-		if (rayDir.y == 0.0f) { tMax.y = PINF; tDelta.y = PINF; }
-		if (boundaryPos.y - rayMin.y == 0.0f) { tMax.y = PINF; tDelta.y = PINF; }
-
-		if (rayDir.z == 0.0f) { tMax.z = PINF; tDelta.z = PINF; }
-		if (boundaryPos.z - rayMin.z == 0.0f) { tMax.z = PINF; tDelta.z = PINF; }
-
-
-		unsigned int iter = 0; // iter < g_MaxLoopIterCount
-		unsigned int g_MaxLoopIterCount = 1024;
-#pragma unroll 1
-		while(iter < g_MaxLoopIterCount) {
-
-			//check if it's in the frustum and not checked out
-			if (hashData.isInBoundingBox(hashParams, idCurrentVoxel)) { //} && !isSDFBlockStreamedOut(idCurrentVoxel, hashData, d_bitMask)) {		
-				hashData.allocBlock(idCurrentVoxel);
-				//printf("Allocate block: %d\n",idCurrentVoxel.x);
-			}
-
-			// Traverse voxel grid
-			if(tMax.x < tMax.y && tMax.x < tMax.z)	{
-				idCurrentVoxel.x += step.x;
-				if(idCurrentVoxel.x == idBound.x) return;
-				tMax.x += tDelta.x;
-			}
-			else if(tMax.z < tMax.y) {
-				idCurrentVoxel.z += step.z;
-				if(idCurrentVoxel.z == idBound.z) return;
-				tMax.z += tDelta.z;
-			}
-			else	{
-				idCurrentVoxel.y += step.y;
-				if(idCurrentVoxel.y == idBound.y) return;
-				tMax.y += tDelta.y;
-			}
-
-			iter++;
-		}
-	}
-}
-
-extern "C" void allocCUDA(HashData& hashData, const HashParams& hashParams, int camnum, const DepthCameraParams &depthCameraParams, cudaStream_t stream) 
-{
-
-	//printf("Allocating: %d\n",depthCameraParams.m_imageWidth);
-
-	const dim3 gridSize((depthCameraParams.m_imageWidth + T_PER_BLOCK - 1)/T_PER_BLOCK, (depthCameraParams.m_imageHeight + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-	allocKernel<<<gridSize, blockSize, 0, stream>>>(hashData, hashParams, camnum);
-
-
-	#ifdef _DEBUG
-		cudaSafeCall(cudaDeviceSynchronize());
-		//cutilCheckMsg(__FUNCTION__);
-	#endif
-}
diff --git a/applications/reconstruct/src/virtual_source.cpp b/applications/reconstruct/src/virtual_source.cpp
deleted file mode 100644
index c8dc38074..000000000
--- a/applications/reconstruct/src/virtual_source.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-#include <ftl/virtual_source.hpp>
-#include <ftl/depth_camera.hpp>
-#include <ftl/voxel_scene.hpp>
-#include <ftl/ray_cast_sdf.hpp>
-
-#define LOGURU_WITH_STREAMS 1
-#include <loguru.hpp>
-
-using ftl::rgbd::VirtualSource;
-
-VirtualSource::VirtualSource(ftl::rgbd::Source *host)
-		: ftl::rgbd::detail::Source(host) {
-	rays_ = ftl::create<CUDARayCastSDF>(host, "raycaster"); //new CUDARayCastSDF(host->getConfig());
-	scene_ = nullptr;
-
-	params_.fx = rays_->value("focal", 430.0f);
-	params_.fy = params_.fx;
-	params_.width = rays_->value("width", 640);
-	params_.height = rays_->value("height", 480);
-	params_.cx =  -((double)params_.width / 2);
-	params_.cy = -((double)params_.height / 2);
-	params_.maxDepth = rays_->value("max_depth", 10.0f);
-	params_.minDepth = rays_->value("min_depth", 0.1f);
-
-	capabilities_ = kCapMovable | kCapVideo | kCapStereo;
-
-	rgb_ = cv::Mat(cv::Size(params_.width,params_.height), CV_8UC3);
-	idepth_ = cv::Mat(cv::Size(params_.width,params_.height), CV_32SC1);
-	depth_ = cv::Mat(cv::Size(params_.width,params_.height), CV_32FC1);
-
-	rays_->on("focal", [this](const ftl::config::Event &e) {
-		params_.fx = rays_->value("focal", 430.0f);
-		params_.fy = params_.fx;
-	});
-
-	rays_->on("width", [this](const ftl::config::Event &e) {
-		params_.width = rays_->value("width", 640);
-	});
-
-	rays_->on("height", [this](const ftl::config::Event &e) {
-		params_.height = rays_->value("height", 480);
-	});
-}
-
-VirtualSource::~VirtualSource() {
-	if (scene_) delete scene_;
-	if (rays_) delete rays_;
-}
-
-void VirtualSource::setScene(ftl::voxhash::SceneRep *scene) {
-	scene_ = scene;
-}
-
-bool VirtualSource::grab(int n, int b) {
-	if (scene_) {
-		// Ensure this host thread is using correct GPU.
-
-		cudaSafeCall(cudaSetDevice(scene_->getCUDADevice()));
-		DepthCameraParams params;
-		params.fx = params_.fx;
-		params.fy = params_.fy;
-		params.mx = -params_.cx;
-		params.my = -params_.cy;
-		params.m_imageWidth = params_.width;
-		params.m_imageHeight = params_.height;
-		params.m_sensorDepthWorldMin = params_.minDepth;
-		params.m_sensorDepthWorldMax = params_.maxDepth;
-
-		// TODO(Nick) Use double precision pose here
-		rays_->render(scene_->getHashData(), scene_->getHashParams(), params, host_->getPose().cast<float>(), scene_->getIntegrationStream());
-
-		//unique_lock<mutex> lk(mutex_);
-		if (rays_->isIntegerDepth()) {
-			rays_->getRayCastData().download((int*)idepth_.data, (uchar3*)rgb_.data, rays_->getRayCastParams(), scene_->getIntegrationStream());
-
-			cudaSafeCall(cudaStreamSynchronize(scene_->getIntegrationStream()));
-			idepth_.convertTo(depth_, CV_32FC1, 1.0f / 100.0f);
-		} else {
-			rays_->getRayCastData().download((int*)depth_.data, (uchar3*)rgb_.data, rays_->getRayCastParams(), scene_->getIntegrationStream());
-			cudaSafeCall(cudaStreamSynchronize(scene_->getIntegrationStream()));
-		}
-	}
-
-	return true;
-}
-
-bool VirtualSource::isReady() {
-	return true;
-}
diff --git a/applications/reconstruct/src/voxel_scene.cpp b/applications/reconstruct/src/voxel_scene.cpp
deleted file mode 100644
index 09067b1f2..000000000
--- a/applications/reconstruct/src/voxel_scene.cpp
+++ /dev/null
@@ -1,466 +0,0 @@
-#include <ftl/voxel_scene.hpp>
-#include "depth_camera_cuda.hpp"
-
-#include <opencv2/core/cuda_stream_accessor.hpp>
-
-#include <vector>
-
-using namespace ftl::voxhash;
-using ftl::rgbd::Source;
-using ftl::rgbd::Channel;
-using ftl::Configurable;
-using cv::Mat;
-using std::vector;
-
-#define 	SAFE_DELETE_ARRAY(a)   { delete [] (a); (a) = NULL; }
-
-//extern "C" void resetCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
-//extern "C" void resetHashBucketMutexCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t);
-//extern "C" void allocCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, int camid, const DepthCameraParams &depthCameraParams, cudaStream_t);
-//extern "C" void fillDecisionArrayCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData);
-//extern "C" void compactifyHashCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
-//extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
-//extern "C" void integrateDepthMapCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t);
-//extern "C" void bindInputDepthColorTextures(const DepthCameraData& depthCameraData);
-
-
-SceneRep::SceneRep(nlohmann::json &config) : Configurable(config), m_frameCount(0), do_reset_(false) {
-	_initCUDA();
-
-	// Allocates voxel structure on GPU
-	_create(_parametersFromConfig());
-
-	on("SDFVoxelSize", [this](const ftl::config::Event &e) {
-		do_reset_ = true;
-	});
-	on("hashNumSDFBlocks", [this](const ftl::config::Event &e) {
-		do_reset_ = true;
-	});
-	on("hashNumBuckets", [this](const ftl::config::Event &e) {
-		do_reset_ = true;
-	});
-	on("hashMaxCollisionLinkedListSize", [this](const ftl::config::Event &e) {
-		do_reset_ = true;
-	});
-	on("SDFTruncation", [this](const ftl::config::Event &e) {
-		m_hashParams.m_truncation = value("SDFTruncation", 0.1f);
-	});
-	on("SDFTruncationScale", [this](const ftl::config::Event &e) {
-		m_hashParams.m_truncScale = value("SDFTruncationScale", 0.01f);
-	});
-	on("SDFMaxIntegrationDistance", [this](const ftl::config::Event &e) {
-		m_hashParams.m_maxIntegrationDistance = value("SDFMaxIntegrationDistance", 10.0f);
-	});
-	on("showRegistration", [this](const ftl::config::Event &e) {
-		reg_mode_ = value("showRegistration", false);
-	});
-
-	reg_mode_ = value("showRegistration", false);
-
-	cudaSafeCall(cudaStreamCreate(&integ_stream_));
-	//integ_stream_ = 0;
-}
-
-SceneRep::~SceneRep() {
-	_destroy();
-	cudaStreamDestroy(integ_stream_);
-}
-
-bool SceneRep::_initCUDA() {
-	// Do an initial CUDA check
-	int cuda_device_count = 0;
-	cudaSafeCall(cudaGetDeviceCount(&cuda_device_count));
-	CHECK_GE(cuda_device_count, 1) << "No CUDA devices found";
-
-	LOG(INFO) << "CUDA Devices (" << cuda_device_count << "):";
-
-	vector<cudaDeviceProp> properties(cuda_device_count);
-	for (int i=0; i<cuda_device_count; i++) {
-		cudaSafeCall(cudaGetDeviceProperties(&properties[i], i));
-		LOG(INFO) << " - " << properties[i].name;
-	}
-
-	int desired_device = value("cudaDevice", 0);
-	cuda_device_ = (desired_device < cuda_device_count) ? desired_device : cuda_device_count-1;
-	cudaSafeCall(cudaSetDevice(cuda_device_));
-
-	// TODO:(Nick) Check memory is sufficient
-	// TODO:(Nick) Find out what our compute capability should be.
-
-	LOG(INFO) << "CUDA Compute: " << properties[cuda_device_].major << "." << properties[cuda_device_].minor;
-
-	return true;
-}
-
-void SceneRep::addSource(ftl::rgbd::Source *src) {
-	auto &cam = cameras_.emplace_back();
-	cam.source = src;
-	cam.params.m_imageWidth = 0;
-	src->setChannel(Channel::Depth);
-}
-
-extern "C" void updateCUDACameraConstant(ftl::voxhash::DepthCameraCUDA *data, int count);
-
-void SceneRep::_updateCameraConstant() {
-	std::vector<ftl::voxhash::DepthCameraCUDA> cams(cameras_.size());
-	for (size_t i=0; i<cameras_.size(); ++i) {
-		cameras_[i].gpu.data.pose = MatrixConversion::toCUDA(cameras_[i].source->getPose().cast<float>());
-		cameras_[i].gpu.data.poseInverse = MatrixConversion::toCUDA(cameras_[i].source->getPose().cast<float>().inverse());
-		cams[i] = cameras_[i].gpu.data;
-	}
-	updateCUDACameraConstant(cams.data(), cams.size());
-}
-
-int SceneRep::upload() {
-	int active = 0;
-
-	for (size_t i=0; i<cameras_.size(); ++i) {
-		cameras_[i].source->grab();
-	}
-
-	for (size_t i=0; i<cameras_.size(); ++i) {
-		auto &cam = cameras_[i];
-
-		if (!cam.source->isReady()) {
-			cam.params.m_imageWidth = 0;
-			// TODO(Nick) : Free gpu allocs if was ready before
-			LOG(INFO) << "Source not ready: " << cam.source->getURI();
-			continue;
-		} else {
-			auto in = cam.source;
-
-			cam.params.fx = in->parameters().fx;
-			cam.params.fy = in->parameters().fy;
-			cam.params.mx = -in->parameters().cx;
-			cam.params.my = -in->parameters().cy;
-
-			// Only now do we have camera parameters for allocations...
-			if (cam.params.m_imageWidth == 0) {
-				cam.params.m_imageWidth = in->parameters().width;
-				cam.params.m_imageHeight = in->parameters().height;
-				cam.params.m_sensorDepthWorldMax = in->parameters().maxDepth;
-				cam.params.m_sensorDepthWorldMin = in->parameters().minDepth;
-				cam.gpu.alloc(cam.params, true);
-				LOG(INFO) << "GPU Allocated camera " << i;
-			}
-		}
-
-		cam.params.flags = m_frameCount;
-	}
-
-	_updateCameraConstant();
-	//cudaSafeCall(cudaDeviceSynchronize());
-
-	for (size_t i=0; i<cameras_.size(); ++i) {
-		auto &cam = cameras_[i];
-
-		// Get the RGB-Depth frame from input
-		Source *input = cam.source;
-		Mat rgb, depth;
-
-		// TODO(Nick) Direct GPU upload to save copy
-		input->getFrames(rgb,depth);
-		
-		active += 1;
-
-		if (depth.cols == 0) continue;
-
-		// Must be in RGBA for GPU
-		Mat rgbt, rgba;
-		cv::cvtColor(rgb,rgbt, cv::COLOR_BGR2Lab);
-		cv::cvtColor(rgbt,rgba, cv::COLOR_BGR2BGRA);
-
-		// Send to GPU and merge view into scene
-		//cam.gpu.updateParams(cam.params);
-		cam.gpu.updateData(depth, rgba, cam.stream);
-
-		//setLastRigidTransform(input->getPose().cast<float>());
-
-		//make the rigid transform available on the GPU
-		//m_hashData.updateParams(m_hashParams, cv::cuda::StreamAccessor::getStream(cam.stream));
-
-		//if (i > 0) cudaSafeCall(cudaStreamSynchronize(cv::cuda::StreamAccessor::getStream(cameras_[i-1].stream)));
-
-		//allocate all hash blocks which are corresponding to depth map entries
-		//if (value("voxels", false)) _alloc(i, cv::cuda::StreamAccessor::getStream(cam.stream));
-
-		// Calculate normals
-	}
-
-	// Must have finished all allocations and rendering before next integration
-	cudaSafeCall(cudaDeviceSynchronize());
-
-	return active;
-}
-
-int SceneRep::upload(ftl::rgbd::FrameSet &fs) {
-	int active = 0;
-
-	for (size_t i=0; i<cameras_.size(); ++i) {
-		auto &cam = cameras_[i];
-
-		if (!cam.source->isReady()) {
-			cam.params.m_imageWidth = 0;
-			// TODO(Nick) : Free gpu allocs if was ready before
-			LOG(INFO) << "Source not ready: " << cam.source->getURI();
-			continue;
-		} else {
-			auto in = cam.source;
-
-			cam.params.fx = in->parameters().fx;
-			cam.params.fy = in->parameters().fy;
-			cam.params.mx = -in->parameters().cx;
-			cam.params.my = -in->parameters().cy;
-
-			// Only now do we have camera parameters for allocations...
-			if (cam.params.m_imageWidth == 0) {
-				cam.params.m_imageWidth = in->parameters().width;
-				cam.params.m_imageHeight = in->parameters().height;
-				cam.params.m_sensorDepthWorldMax = in->parameters().maxDepth;
-				cam.params.m_sensorDepthWorldMin = in->parameters().minDepth;
-				cam.gpu.alloc(cam.params, true);
-				LOG(INFO) << "GPU Allocated camera " << i;
-			}
-		}
-
-		cam.params.flags = m_frameCount;
-	}
-
-	_updateCameraConstant();
-	//cudaSafeCall(cudaDeviceSynchronize());
-
-	for (size_t i=0; i<cameras_.size(); ++i) {
-		auto &cam = cameras_[i];
-		auto &chan1 = fs.frames[i].get<cv::Mat>(Channel::Colour);
-		auto &chan2 = fs.frames[i].get<cv::Mat>(fs.sources[i]->getChannel());
-
-		auto test = fs.frames[i].createTexture<uchar4>(Channel::Flow, ftl::rgbd::Format<uchar4>(100,100));
-
-		// Get the RGB-Depth frame from input
-		Source *input = cam.source;
-		//Mat rgb, depth;
-
-		// TODO(Nick) Direct GPU upload to save copy
-		//input->getFrames(rgb,depth);
-		
-		active += 1;
-
-		//if (depth.cols == 0) continue;
-
-		// Must be in RGBA for GPU
-		Mat rgbt, rgba;
-		cv::cvtColor(chan1,rgbt, cv::COLOR_BGR2Lab);
-		cv::cvtColor(rgbt,rgba, cv::COLOR_BGR2BGRA);
-
-		// Send to GPU and merge view into scene
-		//cam.gpu.updateParams(cam.params);
-		cam.gpu.updateData(chan2, rgba, cam.stream);
-
-		//setLastRigidTransform(input->getPose().cast<float>());
-
-		//make the rigid transform available on the GPU
-		//m_hashData.updateParams(m_hashParams, cv::cuda::StreamAccessor::getStream(cam.stream));
-
-		//if (i > 0) cudaSafeCall(cudaStreamSynchronize(cv::cuda::StreamAccessor::getStream(cameras_[i-1].stream)));
-
-		//allocate all hash blocks which are corresponding to depth map entries
-		//if (value("voxels", false)) _alloc(i, cv::cuda::StreamAccessor::getStream(cam.stream));
-
-		// Calculate normals
-	}
-
-	// Must have finished all allocations and rendering before next integration
-	cudaSafeCall(cudaDeviceSynchronize());
-
-	return active;
-}
-
-void SceneRep::integrate() {
-	/*for (size_t i=0; i<cameras_.size(); ++i) {
-		auto &cam = cameras_[i];
-
-		setLastRigidTransform(cam.source->getPose().cast<float>());
-		//m_hashData.updateParams(m_hashParams);
-
-		//generate a linear hash array with only occupied entries
-		_compactifyVisible(cam.params);
-
-		//volumetrically integrate the depth data into the depth SDFBlocks
-		//_integrateDepthMap(cam.gpu, cam.params);
-
-		//_garbageCollect();
-
-		m_numIntegratedFrames++;
-	}*/
-
-	_compactifyAllocated();
-	_integrateDepthMaps();
-}
-
-void SceneRep::garbage() {
-	//_compactifyAllocated();
-	//if (value("voxels", false)) _garbageCollect();
-
-	//cudaSafeCall(cudaStreamSynchronize(integ_stream_));
-}
-
-/*void SceneRep::integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask) {
-		
-	setLastRigidTransform(lastRigidTransform);
-
-	//make the rigid transform available on the GPU
-	m_hashData.updateParams(m_hashParams);
-
-	//allocate all hash blocks which are corresponding to depth map entries
-	_alloc(depthCameraData, depthCameraParams, d_bitMask);
-
-	//generate a linear hash array with only occupied entries
-	_compactifyHashEntries();
-
-	//volumetrically integrate the depth data into the depth SDFBlocks
-	_integrateDepthMap(depthCameraData, depthCameraParams);
-
-	_garbageCollect(depthCameraData);
-
-	m_numIntegratedFrames++;
-}*/
-
-/*void SceneRep::setLastRigidTransformAndCompactify(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData) {
-	setLastRigidTransform(lastRigidTransform);
-	_compactifyHashEntries();
-}*/
-
-/* Nick: To reduce weights between frames */
-void SceneRep::nextFrame() {
-	if (do_reset_) {
-		do_reset_ = false;
-		_destroy();
-		_create(_parametersFromConfig());
-	} else {
-		//ftl::cuda::compactifyAllocated(m_hashData, m_hashParams, integ_stream_);
-		//if (reg_mode_) ftl::cuda::clearVoxels(m_hashData, m_hashParams); 
-		//else ftl::cuda::starveVoxels(m_hashData, m_hashParams, integ_stream_);
-		m_numIntegratedFrames = 0;
-	}
-}
-
-//! resets the hash to the initial state (i.e., clears all data)
-void SceneRep::reset() {
-	m_numIntegratedFrames = 0;
-	m_hashData.updateParams(m_hashParams);
-	resetCUDA(m_hashData, m_hashParams);
-}
-
-unsigned int SceneRep::getOccupiedCount() {
-	unsigned int count;
-	cudaSafeCall(cudaMemcpy(&count, m_hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
-	return count+1;	//there is one more free than the address suggests (0 would be also a valid address)
-}
-
-HashParams SceneRep::_parametersFromConfig() {
-	//auto &cfg = ftl::config::resolve(config);
-	HashParams params;
-	// First camera view is set to identity pose to be at the centre of
-	// the virtual coordinate space.
-	params.m_hashNumBuckets = value("hashNumBuckets", 100000);
-	params.m_virtualVoxelSize = value("SDFVoxelSize", 0.006f);
-	params.m_maxIntegrationDistance = value("SDFMaxIntegrationDistance", 10.0f);
-	params.m_truncation = value("SDFTruncation", 0.1f);
-	params.m_truncScale = value("SDFTruncationScale", 0.01f);
-	params.m_integrationWeightSample = value("SDFIntegrationWeightSample", 10);
-	params.m_integrationWeightMax = value("SDFIntegrationWeightMax", 255);
-	params.m_spatialSmoothing = value("spatialSmoothing", 0.04f); // 4cm
-	params.m_colourSmoothing = value("colourSmoothing", 20.0f);
-	params.m_confidenceThresh = value("confidenceThreshold", 20.0f);
-	params.m_flags = 0;
-	params.m_flags |= (value("clipping", false)) ? ftl::voxhash::kFlagClipping : 0;
-	params.m_flags |= (value("mls", false)) ? ftl::voxhash::kFlagMLS : 0;
-	params.m_maxBounds = make_float3(
-		value("bbox_x_max", 2.0f),
-		value("bbox_y_max", 2.0f),
-		value("bbox_z_max", 2.0f));
-	params.m_minBounds = make_float3(
-		value("bbox_x_min", -2.0f),
-		value("bbox_y_min", -2.0f),
-		value("bbox_z_min", -2.0f));
-	return params;
-}
-
-void SceneRep::_create(const HashParams& params) {
-	m_hashParams = params;
-	m_hashData.allocate(m_hashParams);
-
-	reset();
-}
-
-void SceneRep::_destroy() {
-	m_hashData.free();
-}
-
-void SceneRep::_alloc(int camid, cudaStream_t stream) {
-	// NOTE (nick): We might want this later...
-	/*if (false) {
-		// TODO(Nick) Make this work without memcpy to host first
-		allocate until all blocks are allocated
-		unsigned int prevFree = 0; //getHeapFreeCount();
-		while (1) {
-			resetHashBucketMutexCUDA(m_hashData, m_hashParams, stream);
-			allocCUDA(m_hashData, m_hashParams, camid, cameras_[camid].params, stream);
-
-			unsigned int currFree = getHeapFreeCount();
-
-			if (prevFree != currFree) {
-				prevFree = currFree;
-			}
-			else {
-				break;
-			}
-		}
-	}
-	else {*/
-		//this version is faster, but it doesn't guarantee that all blocks are allocated (staggers alloc to the next frame)
-		//resetHashBucketMutexCUDA(m_hashData, m_hashParams, stream);
-		//allocCUDA(m_hashData, m_hashParams, camid, cameras_[camid].params, stream);
-	//}
-}
-
-
-void SceneRep::_compactifyVisible(const DepthCameraParams &camera) { //const DepthCameraData& depthCameraData) {
-	//ftl::cuda::compactifyOccupied(m_hashData, m_hashParams, integ_stream_);		//this version uses atomics over prefix sums, which has a much better performance
-	//m_hashData.updateParams(m_hashParams);	//make sure numOccupiedBlocks is updated on the GPU
-}
-
-void SceneRep::_compactifyAllocated() {
-	//ftl::cuda::compactifyAllocated(m_hashData, m_hashParams, integ_stream_);		//this version uses atomics over prefix sums, which has a much better performance
-	//std::cout << "Occ blocks = " << m_hashParams.m_numOccupiedBlocks << std::endl;
-	//m_hashData.updateParams(m_hashParams);	//make sure numOccupiedBlocks is updated on the GPU
-}
-
-/*void SceneRep::_integrateDepthMap(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams) {
-	if (!reg_mode_) ftl::cuda::integrateDepthMap(m_hashData, m_hashParams, depthCameraData, depthCameraParams, integ_stream_);
-	else ftl::cuda::integrateRegistration(m_hashData, m_hashParams, depthCameraData, depthCameraParams, integ_stream_);
-}*/
-
-//extern "C" void bilateralFilterFloatMap(float* d_output, float* d_input, float sigmaD, float sigmaR, unsigned int width, unsigned int height);
-
-void SceneRep::_integrateDepthMaps() {
-	//cudaSafeCall(cudaDeviceSynchronize());
-
-	for (size_t i=0; i<cameras_.size(); ++i) {
-		if (!cameras_[i].source->isReady()) continue;
-		//ftl::cuda::clear_depth(*(cameras_[i].gpu.depth2_tex_), integ_stream_);
-		cameras_[i].gpu.computeNormals(integ_stream_);
-		ftl::cuda::clear_points(*(cameras_[i].gpu.points_tex_), integ_stream_);
-		ftl::cuda::mls_smooth(*(cameras_[i].gpu.points_tex_), m_hashParams, cameras_.size(), i, integ_stream_);
-		//ftl::cuda::int_to_float(*(cameras_[i].gpu.depth2_tex_), *(cameras_[i].gpu.depth_tex_), 1.0f / 1000.0f, integ_stream_);
-		//ftl::cuda::hole_fill(*(cameras_[i].gpu.depth2_tex_), *(cameras_[i].gpu.depth_tex_), cameras_[i].params, integ_stream_);
-		//bilateralFilterFloatMap(cameras_[i].gpu.depth_tex_->devicePtr(), cameras_[i].gpu.depth3_tex_->devicePtr(), 3, 7, cameras_[i].gpu.depth_tex_->width(), cameras_[i].gpu.depth_tex_->height());
-	}
-	//if (value("voxels", false)) ftl::cuda::integrateDepthMaps(m_hashData, m_hashParams, cameras_.size(), integ_stream_);
-}
-
-void SceneRep::_garbageCollect() {
-	//ftl::cuda::garbageCollectIdentify(m_hashData, m_hashParams, integ_stream_);
-	//resetHashBucketMutexCUDA(m_hashData, m_hashParams, integ_stream_);	//needed if linked lists are enabled -> for memeory deletion
-	//ftl::cuda::garbageCollectFree(m_hashData, m_hashParams, integ_stream_);
-}
diff --git a/components/codecs/src/nvpipe_encoder.cpp b/components/codecs/src/nvpipe_encoder.cpp
index 42374bede..f1b068d74 100644
--- a/components/codecs/src/nvpipe_encoder.cpp
+++ b/components/codecs/src/nvpipe_encoder.cpp
@@ -47,6 +47,10 @@ bool NvPipeEncoder::encode(const cv::Mat &in, definition_t odefinition, bitrate_
 
 	//LOG(INFO) << "Definition: " << ftl::codecs::getWidth(definition) << "x" << ftl::codecs::getHeight(definition);
 
+    if (in.empty()) {
+        LOG(ERROR) << "Missing data for Nvidia encoder";
+        return false;
+    }
     if (!_createEncoder(in, definition, bitrate)) return false;
 
     //LOG(INFO) << "NvPipe Encode: " << int(definition) << " " << in.cols;
diff --git a/components/renderers/cpp/include/ftl/cuda/points.hpp b/components/renderers/cpp/include/ftl/cuda/points.hpp
index ddbac8ba9..cb996c4ec 100644
--- a/components/renderers/cpp/include/ftl/cuda/points.hpp
+++ b/components/renderers/cpp/include/ftl/cuda/points.hpp
@@ -21,6 +21,12 @@ void point_cloud(ftl::cuda::TextureObject<float4> &output,
 void clipping(ftl::cuda::TextureObject<float4> &points,
 		const ClipSpace &clip, cudaStream_t stream);
 
+void point_cloud(ftl::cuda::TextureObject<float> &output, ftl::cuda::TextureObject<float4> &points, const ftl::rgbd::Camera &params, const float4x4 &poseinv, cudaStream_t stream);
+
+void world_to_cam(ftl::cuda::TextureObject<float4> &points, const float4x4 &poseinv, cudaStream_t stream);
+
+void cam_to_world(ftl::cuda::TextureObject<float4> &points, const float4x4 &pose, cudaStream_t stream);
+
 }
 }
 
diff --git a/components/renderers/cpp/include/ftl/cuda/weighting.hpp b/components/renderers/cpp/include/ftl/cuda/weighting.hpp
index 9498d0508..15d3dbcec 100644
--- a/components/renderers/cpp/include/ftl/cuda/weighting.hpp
+++ b/components/renderers/cpp/include/ftl/cuda/weighting.hpp
@@ -4,19 +4,41 @@
 namespace ftl {
 namespace cuda {
 
+__device__ inline float weighting(float r, float h) {
+	if (r >= h) return 0.0f;
+	float rh = r / h;
+	rh = 1.0f - rh*rh;
+	return rh*rh*rh*rh;
+}
+
 /*
  * Guennebaud, G.; Gross, M. Algebraic point set surfaces. ACMTransactions on Graphics Vol. 26, No. 3, Article No. 23, 2007.
  * Used in: FusionMLS: Highly dynamic 3D reconstruction with consumer-grade RGB-D cameras
  *     r = distance between points
  *     h = smoothing parameter in meters (default 4cm)
  */
-__device__ inline float spatialWeighting(float r, float h) {
+__device__ inline float spatialWeighting(const float3 &a, const float3 &b, float h) {
+	const float r = length(a-b);
 	if (r >= h) return 0.0f;
 	float rh = r / h;
 	rh = 1.0f - rh*rh;
 	return rh*rh*rh*rh;
 }
 
+/*
+ * Colour weighting as suggested in:
+ * C. Kuster et al. Spatio-Temporal Geometry Fusion for Multiple Hybrid Cameras using Moving Least Squares Surfaces. 2014.
+ * c = colour distance
+ */
+ __device__ inline float colourWeighting(uchar4 a, uchar4 b, float h) {
+	const float3 delta = make_float3((float)a.x - (float)b.x, (float)a.y - (float)b.y, (float)a.z - (float)b.z);
+	const float c = length(delta);
+	if (c >= h) return 0.0f;
+	float ch = c / h;
+	ch = 1.0f - ch*ch;
+	return ch*ch*ch*ch;
+}
+
 }
 }
 
diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp
index aed04ad89..6a6bbb967 100644
--- a/components/renderers/cpp/src/splat_render.cpp
+++ b/components/renderers/cpp/src/splat_render.cpp
@@ -54,6 +54,7 @@ void Splatter::renderChannel(
 					ftl::render::SplatParams &params, ftl::rgbd::Frame &out,
 					const Channel &channel, cudaStream_t stream)
 {
+	if (channel == Channel::None) return;
 	cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
 	temp_.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(0x7FFFFFFF), cvstream);
 	temp_.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0x7FFFFFFF), cvstream);
@@ -105,7 +106,7 @@ void Splatter::renderChannel(
 	// Accumulate attribute contributions for each pixel
 	for (auto &f : scene_->frames) {
 		// Convert colour from BGR to BGRA if needed
-		if (f.get<GpuMat>(Channel::Colour).type() == CV_8UC3) {
+		if (f.get<GpuMat>(channel).type() == CV_8UC3) {
 			// Convert to 4 channel colour
 			auto &col = f.get<GpuMat>(Channel::Colour);
 			GpuMat tmp(col.size(), CV_8UC4);
@@ -247,6 +248,15 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
 		out.create<GpuMat>(Channel::Right, Format<uchar4>(camera.width, camera.height));
 		out.get<GpuMat>(Channel::Right).setTo(cv::Scalar(76,76,76), cvstream);
 		renderChannel(params, out, Channel::Right, stream);
+	} else if (chan != Channel::None) {
+		if (ftl::rgbd::isFloatChannel(chan)) {
+			out.create<GpuMat>(chan, Format<float>(camera.width, camera.height));
+			out.get<GpuMat>(chan).setTo(cv::Scalar(0.0f), cvstream);
+		} else {
+			out.create<GpuMat>(chan, Format<uchar4>(camera.width, camera.height));
+			out.get<GpuMat>(chan).setTo(cv::Scalar(76,76,76,255), cvstream);
+		}
+		renderChannel(params, out, chan, stream);
 	}
 
 	return true;
diff --git a/components/renderers/cpp/src/splatter.cu b/components/renderers/cpp/src/splatter.cu
index 3b1ae4b47..c7456e57b 100644
--- a/components/renderers/cpp/src/splatter.cu
+++ b/components/renderers/cpp/src/splatter.cu
@@ -63,6 +63,8 @@ __device__ inline float4 make_float4(const uchar4 &c) {
 #define SMOOTHING_MULTIPLIER_B 4.0f		// For z contribution
 #define SMOOTHING_MULTIPLIER_C 4.0f		// For colour contribution
 
+#define ACCUM_DIAMETER 8
+
 /*
  * Pass 2: Accumulate attribute contributions if the points pass a visibility test.
  */
@@ -92,7 +94,7 @@ __global__ void dibr_attribute_contrib_kernel(
 	if (camPos.z > params.camera.maxDepth) return;
 	const uint2 screenPos = params.camera.camToScreen<uint2>(camPos);
 
-    const int upsample = 8; //min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z));
+    //const int upsample = 8; //min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z));
 
 	// Not on screen so stop now...
 	if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return;
@@ -107,16 +109,16 @@ __global__ void dibr_attribute_contrib_kernel(
 
 	// Each thread in warp takes an upsample point and updates corresponding depth buffer.
 	const int lane = tid % WARP_SIZE;
-	for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
-		const float u = (i % upsample) - (upsample / 2);
-		const float v = (i / upsample) - (upsample / 2);
+	for (int i=lane; i<ACCUM_DIAMETER*ACCUM_DIAMETER; i+=WARP_SIZE) {
+		const float u = (i % ACCUM_DIAMETER) - (ACCUM_DIAMETER / 2);
+		const float v = (i / ACCUM_DIAMETER) - (ACCUM_DIAMETER / 2);
 
         // Use the depth buffer to determine this pixels 3D position in camera space
         const float d = ((float)depth_in.tex2D(screenPos.x+u, screenPos.y+v)/1000.0f);
 		const float3 nearest = params.camera.screenToCam((int)(screenPos.x+u),(int)(screenPos.y+v),d);
 
         // What is contribution of our current point at this pixel?
-        const float weight = ftl::cuda::spatialWeighting(length(nearest - camPos), SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx));
+        const float weight = ftl::cuda::spatialWeighting(nearest, camPos, SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx));
         if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) {  // TODO: Use confidence threshold here
             const float4 wcolour = colour * weight;
 			//const float4 wnormal = normal * weight;
@@ -187,7 +189,7 @@ __global__ void dibr_attribute_contrib_kernel(
         const float3 nearest = params.camera.screenToCam((int)(screenPos.x+u),(int)(screenPos.y+v),d);
 
         // What is contribution of our current point at this pixel?
-        const float weight = ftl::cuda::spatialWeighting(length(nearest - camPos), SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx));
+        const float weight = ftl::cuda::spatialWeighting(nearest, camPos, SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx));
         if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) {  // TODO: Use confidence threshold here
             const float wcolour = colour * weight;
             //const float4 wnormal = normal * weight;
diff --git a/components/rgbd-sources/src/streamer.cpp b/components/rgbd-sources/src/streamer.cpp
index 676cf58cf..7a9118c9f 100644
--- a/components/rgbd-sources/src/streamer.cpp
+++ b/components/rgbd-sources/src/streamer.cpp
@@ -496,7 +496,7 @@ void Streamer::_transmitPacket(StreamSource *src, const ftl::codecs::Packet &pkt
 		frame_no_,
 		static_cast<uint8_t>((chan & 0x1) | ((hasChan2) ? 0x2 : 0x0))
 	};
-	LOG(INFO) << "codec:" << (int) pkt.codec;
+
 	// Lock to prevent clients being added / removed
 	//SHARED_LOCK(src->mutex,lk);
 	auto c = src->clients.begin();
diff --git a/components/rgbd-sources/src/virtual.cpp b/components/rgbd-sources/src/virtual.cpp
index 0e6db9738..016662695 100644
--- a/components/rgbd-sources/src/virtual.cpp
+++ b/components/rgbd-sources/src/virtual.cpp
@@ -47,8 +47,6 @@ class VirtualImpl : public ftl::rgbd::detail::Source {
 					frame.hasChannel(host_->getChannel())) {
 				frame.download(host_->getChannel());
 				cv::swap(frame.get<cv::Mat>(host_->getChannel()), depth_);
-			} else {
-				LOG(ERROR) << "Channel 2 frame in rendering";
 			}
 
 			auto cb = host_->callback();
-- 
GitLab