diff --git a/CMakeLists.txt b/CMakeLists.txt
index 53dc9b883599a657c3c971487ac298bbdf0b46cf..305168889f4427fbfa26f5a33a7926f5f7c2dd98 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -276,7 +276,7 @@ else()
 	add_definitions(-DUNIX)
 	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -msse3 -Werror=return-type")
 	set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_DEBUG -pg -Wall")
-	set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -mfpmath=sse")
+	set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -Wall -mfpmath=sse")
 	set(OS_LIBS "dl")
 endif()
 
diff --git a/applications/gui/src/camera.cpp b/applications/gui/src/camera.cpp
index 6db9462d6a3d9d31c8786dc97cef2e12df2b6cec..e33ec641ca9b8a0b90340bad56088e524aa03e90 100644
--- a/applications/gui/src/camera.cpp
+++ b/applications/gui/src/camera.cpp
@@ -237,52 +237,94 @@ void ftl::gui::Camera::draw(std::vector<ftl::rgbd::FrameSet*> &fss) {
 					if (data.size() > 0) {
 						auto &d = *data.rbegin();
 						
-						//cv::Mat over_depth;
-						//over_depth.create(im1_.size(), CV_32F);
+						cv::Mat over_depth;
+						over_depth.create(im1_.size(), CV_32F);
 
 						auto cam = ftl::rgbd::Camera::from(intrinsics_);
 
-						float screenWidth = intrinsics_->value("screen_size", 1.2f);  // In meters
+						float screenWidth = intrinsics_->value("screen_size", 0.6f);  // In meters
 						float screenHeight = (9.0f/16.0f) * screenWidth;
 
-						//Eigen::Vector3f pc(-screenWidth/2.0f,-screenHeight/2.0f,0.0);
-						Eigen::Vector3f pa(-screenWidth/2.0f,screenHeight/2.0f,0.0);
-						//Eigen::Vector3f pb(screenWidth/2.0f,screenHeight/2.0f,0.0);
+						float screenDistance = (d.depth > cam.minDepth && d.depth < cam.maxDepth) ? d.depth : intrinsics_->value("screen_dist_default", 0.5f);  // Face distance from screen in meters
 
-						float screenDistance = (d.depth > cam.minDepth && d.depth < cam.maxDepth) ? d.depth : intrinsics_->value("screen_dist_default", 1.2f);  // Face distance from screen in meters
-
-						auto pos = f.getLeft().screenToCam(d.box.x+(d.box.width/2), d.box.y+(d.box.height/2), -screenDistance);
+						auto pos = f.getLeft().screenToCam(d.box.x+(d.box.width/2), d.box.y+(d.box.height/2), screenDistance);
 						Eigen::Vector3f eye;
-						eye[0] = pos.x;
-						eye[1] = -pos.y;
-						eye[2] = pos.z;
+						eye[0] = -pos.x;
+						eye[1] = pos.y;
+						eye[2] = -pos.z;
+						//eye[3] = 0;
 
 						Eigen::Translation3f trans(eye);
 						Eigen::Affine3f t(trans);
 						Eigen::Matrix4f viewPose = t.matrix();
 
-						// Top Left norm dist
-						Eigen::Vector3f tl = (pa - eye);
-						Eigen::Vector3f princ = tl;
-
-						Eigen::Matrix4d windowPose;
-						windowPose.setIdentity();
-
-						Eigen::Matrix4d fakepose = viewPose.cast<double>().inverse() * state_.getPose();
-						ftl::rgbd::Camera fakecam = cam;
-						fakecam.fx = screenDistance; //eye.norm();
-						fakecam.fy = fakecam.fx;
-						fakecam.cx = (princ[0]) * fakecam.fx / princ[2] / screenWidth * cam.width;
-						fakecam.cy = ((princ[1]) * fakecam.fy / princ[2] / screenHeight * cam.height) - cam.height;
-						fakecam.fx = fakecam.fx / screenWidth * cam.width;
-						fakecam.fy = fakecam.fy / screenHeight * cam.height;
+						// Calculate where the screen is within current camera space
+						Eigen::Vector4f p1 = viewPose.cast<float>() * (Eigen::Vector4f(screenWidth/2.0, screenHeight/2.0, 0, 1));
+						Eigen::Vector4f p2 = viewPose.cast<float>() * (Eigen::Vector4f(screenWidth/2.0, -screenHeight/2.0, 0, 1));
+						Eigen::Vector4f p3 = viewPose.cast<float>() * (Eigen::Vector4f(-screenWidth/2.0, screenHeight/2.0, 0, 1));
+						Eigen::Vector4f p4 = viewPose.cast<float>() * (Eigen::Vector4f(-screenWidth/2.0, -screenHeight/2.0, 0, 1));
+						p1 = p1 / p1[3];
+						p2 = p2 / p2[3];
+						p3 = p3 / p3[3];
+						p4 = p4 / p4[3];
+						float2 p1screen = cam.camToScreen<float2>(make_float3(p1[0],p1[1],p1[2]));
+						float2 p2screen = cam.camToScreen<float2>(make_float3(p2[0],p2[1],p2[2]));
+						float2 p3screen = cam.camToScreen<float2>(make_float3(p3[0],p3[1],p3[2]));
+						float2 p4screen = cam.camToScreen<float2>(make_float3(p4[0],p4[1],p4[2]));
+
+						//cv::line(im1_, cv::Point2f(p1screen.x, p1screen.y), cv::Point2f(p2screen.x, p2screen.y), cv::Scalar(255,0,255,0));
+						//cv::line(im1_, cv::Point2f(p4screen.x, p4screen.y), cv::Point2f(p2screen.x, p2screen.y), cv::Scalar(255,0,255,0));
+						//cv::line(im1_, cv::Point2f(p1screen.x, p1screen.y), cv::Point2f(p3screen.x, p3screen.y), cv::Scalar(255,0,255,0));
+						//cv::line(im1_, cv::Point2f(p3screen.x, p3screen.y), cv::Point2f(p4screen.x, p4screen.y), cv::Scalar(255,0,255,0));
+						//LOG(INFO) << "DRAW LINE: " << p1screen.x << "," << p1screen.y << " -> " << p2screen.x << "," << p2screen.y;
+
+						std::vector<cv::Point2f> quad_pts;
+						std::vector<cv::Point2f> squre_pts;
+						quad_pts.push_back(cv::Point2f(p1screen.x,p1screen.y));
+						quad_pts.push_back(cv::Point2f(p2screen.x,p2screen.y));
+						quad_pts.push_back(cv::Point2f(p3screen.x,p3screen.y));
+						quad_pts.push_back(cv::Point2f(p4screen.x,p4screen.y));
+						squre_pts.push_back(cv::Point2f(0,0));
+						squre_pts.push_back(cv::Point2f(0,cam.height));
+						squre_pts.push_back(cv::Point2f(cam.width,0));
+						squre_pts.push_back(cv::Point2f(cam.width,cam.height));
+
+						cv::Mat transmtx = cv::getPerspectiveTransform(quad_pts,squre_pts);
+						cv::Mat transformed = cv::Mat::zeros(im1_.rows, im1_.cols, CV_8UC4);
+						//cv::warpPerspective(im1_, im1_, transmtx, im1_.size());
+
+						ftl::render::ViewPort vp;
+						vp.x = std::min(p4screen.x, std::min(p3screen.x, std::min(p1screen.x,p2screen.x)));
+						vp.y = std::min(p4screen.y, std::min(p3screen.y, std::min(p1screen.y,p2screen.y)));
+						vp.width = std::max(p4screen.x, std::max(p3screen.x, std::max(p1screen.x,p2screen.x))) - vp.x;
+						vp.height = std::max(p4screen.y, std::max(p3screen.y, std::max(p1screen.y,p2screen.y))) - vp.y;
+						renderer_->setViewPort(ftl::render::ViewPortMode::Warping, vp);
+
+						//Eigen::Matrix4d windowPose;
+						//windowPose.setIdentity();
+
+						//Eigen::Matrix4d fakepose = (viewPose.cast<double>()).inverse() * state_.getPose();
+						//ftl::rgbd::Camera fakecam = cam;
+
+						//eye[1] = -eye[1];
+						//eye[2] = -eye[2];
+						//Eigen::Translation3f trans2(eye);
+						//Eigen::Affine3f t2(trans2);
+						//Eigen::Matrix4f viewPose2 = t2.matrix();
 
 						// Use face tracked window pose for virtual camera
-						state_.getLeft() = fakecam;
+						//state_.getLeft() = fakecam;
 						transform_ix_ = fset->id;  // Disable keyboard/mouse pose setting
+
 						state_.setPose(transforms_[transform_ix_] * viewPose.cast<double>());
 
-						//ftl::overlay::draw3DLine(state_.getLeft(), im1_, over_depth, state_.getPose().inverse() * Eigen::Vector4d(eye[0],eye[1],eye[2],1), state_.getPose().inverse() * Eigen::Vector4d(0,0,0,1), cv::Scalar(0,0,255,0));
+						//Eigen::Vector4d pt1 = state_.getPose().inverse() * Eigen::Vector4d(eye[0],eye[1],eye[2],1);
+						//pt1 /= pt1[3];
+						//Eigen::Vector4d pt2 = state_.getPose().inverse() * Eigen::Vector4d(0,0,0,1);
+						//pt2 /= pt2[3];
+
+
+						//ftl::overlay::draw3DLine(state_.getLeft(), im1_, over_depth, pt1, pt2, cv::Scalar(0,0,255,0));
 						//ftl::overlay::drawRectangle(state_.getLeft(), im1_, over_depth, windowPose.inverse() * state_.getPose(), cv::Scalar(255,0,0,0), screenWidth, screenHeight);
 						//ftl::overlay::drawCamera(state_.getLeft(), im1_, over_depth, fakecam, fakepose, cv::Scalar(255,0,255,255), 1.0,screen_->root()->value("show_frustrum", false));
 					}
@@ -326,7 +368,7 @@ void ftl::gui::Camera::_draw(std::vector<ftl::rgbd::FrameSet*> &fss) {
 
 		for (auto *fs : fss) {
 			if (!usesFrameset(fs->id)) continue;
-			for (int i=0; i<fs->frames.size(); ++i) {
+			for (size_t i=0; i<fs->frames.size(); ++i) {
 				auto pose = fs->frames[i].getPose().inverse() * state_.getPose();
 				Eigen::Vector4d pos = pose.inverse() * Eigen::Vector4d(0,0,0,1);
 				pos /= pos[3];
@@ -396,7 +438,7 @@ void ftl::gui::Camera::update(std::vector<ftl::rgbd::FrameSet*> &fss) {
 
 			ftl::rgbd::Frame *frame = nullptr;
 
-			if (fid_ >= fs->frames.size()) return;
+			if ((size_t)fid_ >= fs->frames.size()) return;
 			frame = &fs->frames[fid_];
 			_downloadFrames(frame);
 			auto n = frame->get<std::string>("name");
@@ -599,6 +641,8 @@ cv::Mat ftl::gui::Camera::visualizeActiveChannel() {
 			break;
 		case Channel::Right:
 			result = im2_;
+			break;
+		default: break;
 	}
 	return result;
 }
diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt
index f7c34113ad11973b6a5437591791e44fffc15f09..39e4d51f82be600103f91d23e4a0564fff7ca32f 100644
--- a/applications/reconstruct/CMakeLists.txt
+++ b/applications/reconstruct/CMakeLists.txt
@@ -4,21 +4,7 @@
 
 set(REPSRC
 	src/main.cpp
-	#src/voxel_scene.cpp
-	#src/ray_cast_sdf.cu
 	src/camera_util.cu
-	#src/ray_cast_sdf.cpp
-	#src/virtual_source.cpp
-	#src/splat_render.cpp
-	#src/dibr.cu
-	#src/mls.cu
-	#src/depth_camera.cu
-	#src/depth_camera.cpp
-	#src/ilw/ilw.cpp
-	#src/ilw/ilw.cu
-	#src/ilw/fill.cu
-	#src/ilw/discontinuity.cu
-	#src/ilw/correspondence.cu
 	src/reconstruction.cpp
 )
 
diff --git a/applications/reconstruct/src/ilw/correspondence.cu b/applications/reconstruct/src/ilw/correspondence.cu
deleted file mode 100644
index 9fb440ae41bf6e1515d72759207154fa8224a588..0000000000000000000000000000000000000000
--- a/applications/reconstruct/src/ilw/correspondence.cu
+++ /dev/null
@@ -1,198 +0,0 @@
-#include "ilw_cuda.hpp"
-#include <ftl/cuda/weighting.hpp>
-
-using ftl::cuda::TextureObject;
-using ftl::rgbd::Camera;
-using ftl::cuda::Mask;
-
-#define T_PER_BLOCK 8
-
-template<int FUNCTION>
-__device__ float weightFunction(const ftl::cuda::ILWParams &params, float dweight, float cweight);
-
-template <>
-__device__ inline float weightFunction<0>(const ftl::cuda::ILWParams &params, float dweight, float cweight) {
-	return (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight);
-}
-
-template <>
-__device__ inline float weightFunction<1>(const ftl::cuda::ILWParams &param, float dweight, float cweight) {
-	return (cweight * cweight * dweight);
-}
-
-template <>
-__device__ inline float weightFunction<2>(const ftl::cuda::ILWParams &param, float dweight, float cweight) {
-	return (dweight * dweight * cweight);
-}
-
-template <>
-__device__ inline float weightFunction<3>(const ftl::cuda::ILWParams &params, float dweight, float cweight) {
-	return (dweight == 0.0f) ? 0.0f : (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight);
-}
-
-template <>
-__device__ inline float weightFunction<4>(const ftl::cuda::ILWParams &params, float dweight, float cweight) {
-	return cweight;
-}
-
-template<int COR_STEPS, int FUNCTION> 
-__global__ void correspondence_energy_vector_kernel(
-        TextureObject<float> d1,
-        TextureObject<float> d2,
-        TextureObject<uchar4> c1,
-        TextureObject<uchar4> c2,
-        TextureObject<float> dout,
-		TextureObject<float> conf,
-		TextureObject<int> mask,
-        float4x4 pose1,
-        float4x4 pose1_inv,
-        float4x4 pose2,  // Inverse
-        Camera cam1,
-        Camera cam2, ftl::cuda::ILWParams params) {
-
-    // Each warp picks point in p1
-    //const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
-	const int x = (blockIdx.x*blockDim.x + threadIdx.x); // / WARP_SIZE;
-    const int y = blockIdx.y*blockDim.y + threadIdx.y;
-    
-    //const float3 world1 = make_float3(p1.tex2D(x, y));
-    const float depth1 = d1.tex2D(x,y); //(pose1_inv * world1).z;  // Initial starting depth
-	if (depth1 < cam1.minDepth || depth1 > cam1.maxDepth) return;
-
-	// TODO: Temporary hack to ensure depth1 is present
-	//const float4 temp = vout.tex2D(x,y);
-	//vout(x,y) =  make_float4(depth1, 0.0f, temp.z, temp.w);
-	
-	const float3 world1 = pose1 * cam1.screenToCam(x,y,depth1);
-
-    const auto colour1 = c1.tex2D((float)x+0.5f, (float)y+0.5f);
-
-	float bestdepth = 0.0f;
-	float bestweight = 0.0f;
-	float bestcolour = 0.0f;
-	float bestdweight = 0.0f;
-	float totalcolour = 0.0f;
-	int count = 0;
-	float contrib = 0.0f;
-    
-	const float step_interval = params.range / (COR_STEPS / 2);
-	
-	const float3 rayStep_world = pose1.getFloat3x3() * cam1.screenToCam(x,y,step_interval);
-	const float3 rayStart_2 = pose2 * world1;
-	const float3 rayStep_2 = pose2.getFloat3x3() * rayStep_world;
-
-    // Project to p2 using cam2
-    // Each thread takes a possible correspondence and calculates a weighting
-    //const int lane = tid % WARP_SIZE;
-	for (int i=0; i<COR_STEPS; ++i) {
-		const int j = i - (COR_STEPS/2);
-		const float depth_adjust = (float)j * step_interval + depth1;
-
-        // Calculate adjusted depth 3D point in camera 2 space
-        const float3 worldPos = world1 + j * rayStep_world; //(pose1 * cam1.screenToCam(x, y, depth_adjust));
-        const float3 camPos = rayStart_2 + j * rayStep_2; //pose2 * worldPos;
-        const float2 screen = cam2.camToScreen<float2>(camPos);
-
-        if (screen.x >= cam2.width || screen.y >= cam2.height) continue;
-
-		// Generate a depth correspondence value
-		const float depth2 = d2.tex2D(int(screen.x+0.5f), int(screen.y+0.5f));
-		const float dweight = ftl::cuda::weighting(fabs(depth2 - camPos.z), params.spatial_smooth);
-		//const float dweight = ftl::cuda::weighting(fabs(depth_adjust - depth1), 2.0f*params.range);
-		
-		// Generate a colour correspondence value
-		const auto colour2 = c2.tex2D(screen.x, screen.y);
-		const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth);
-
-		const float weight = weightFunction<FUNCTION>(params, dweight, cweight);
-
-		++count;
-		contrib += weight;
-		bestcolour = max(cweight, bestcolour);
-		bestdweight = max(dweight, bestdweight);
-		totalcolour += cweight;
-		if (weight > bestweight) {
-			bestweight = weight;
-			bestdepth = depth_adjust;
-		}
-    }
-
-	const float avgcolour = totalcolour/(float)count;
-	const float confidence = bestcolour / totalcolour; //bestcolour - avgcolour;
-	
-	Mask m(mask.tex2D(x,y));
-
-    //if (bestweight > 0.0f) {
-        float old = conf.tex2D(x,y);
-
-        if (bestweight * confidence > old) {
-			dout(x,y) = bestdepth;
-			conf(x,y) = bestweight * confidence;
-		}
-	//}
-	
-	// If a good enough match is found, mark dodgy depth as solid
-	if ((m.isFilled() || m.isDiscontinuity()) && (bestweight > params.match_threshold)) mask(x,y) = 0;
-}
-
-void ftl::cuda::correspondence(
-        TextureObject<float> &d1,
-        TextureObject<float> &d2,
-        TextureObject<uchar4> &c1,
-        TextureObject<uchar4> &c2,
-        TextureObject<float> &dout,
-		TextureObject<float> &conf,
-		TextureObject<int> &mask,
-        float4x4 &pose1,
-        float4x4 &pose1_inv,
-        float4x4 &pose2,
-        const Camera &cam1,
-        const Camera &cam2, const ILWParams &params, int func,
-        cudaStream_t stream) {
-
-	const dim3 gridSize((d1.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (d1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-    //printf("COR SIZE %d,%d\n", p1.width(), p1.height());
-
-	switch (func) {
-    case 0: correspondence_energy_vector_kernel<16,0><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
-	case 1: correspondence_energy_vector_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
-	case 2: correspondence_energy_vector_kernel<16,2><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
-	case 3: correspondence_energy_vector_kernel<16,3><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
-	case 4: correspondence_energy_vector_kernel<16,4><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
-	}
-
-    cudaSafeCall( cudaGetLastError() );
-}
-
-//==============================================================================
-
-__global__ void mask_filter_kernel(
-		TextureObject<float> depth,
-		TextureObject<int> mask) {
-	
-	const int x = (blockIdx.x*blockDim.x + threadIdx.x);
-	const int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	if (x >= 0 && x < depth.width() && y >=0 && y < depth.height()) {
-		Mask m(mask.tex2D(x,y));
-		if (m.isFilled()) {
-			depth(x,y) = MINF;
-		}
-	}
-}
-
-
-void ftl::cuda::mask_filter(
-		TextureObject<float> &depth,
-		TextureObject<int> &mask,
-		cudaStream_t stream) {
-	const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-	mask_filter_kernel<<<gridSize, blockSize, 0, stream>>>(
-		depth, mask
-	);
-	cudaSafeCall( cudaGetLastError() );
-}
diff --git a/applications/reconstruct/src/ilw/discontinuity.cu b/applications/reconstruct/src/ilw/discontinuity.cu
deleted file mode 100644
index fcadde03efa6078c8041a244b717c99fbb24581f..0000000000000000000000000000000000000000
--- a/applications/reconstruct/src/ilw/discontinuity.cu
+++ /dev/null
@@ -1,56 +0,0 @@
-#include "ilw_cuda.hpp"
-
-#define T_PER_BLOCK 8
-
-using ftl::cuda::Mask;
-
-template <int RADIUS>
-__global__ void discontinuity_kernel(ftl::cuda::TextureObject<int> mask_out, ftl::cuda::TextureObject<float> depth,
-										const cv::Size size, const double minDepth, const double maxDepth) {
-	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	if (x < size.width && y < size.height) {
-		Mask mask(0);
-
-		const float d = depth.tex2D((int)x, (int)y);
-
-		// Calculate depth between 0.0 and 1.0
-		//float p = (d - params.minDepth) / (params.maxDepth - params.minDepth);
-
-		if (d >= minDepth && d <= maxDepth) {
-			/* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. */
-			// Is there a discontinuity nearby?
-			for (int u=-RADIUS; u<=RADIUS; ++u) {
-				for (int v=-RADIUS; v<=RADIUS; ++v) {
-					// If yes, the flag using w = -1
-					if (fabs(depth.tex2D((int)x+u, (int)y+v) - d) > 0.1f) mask.isDiscontinuity(true);
-				}
-			}
-		}
-		
-		mask_out(x,y) = (int)mask;
-	}
-}
-
-void ftl::cuda::discontinuity(ftl::cuda::TextureObject<int> &mask_out, ftl::cuda::TextureObject<float> &depth,
-								const cv::Size size, const double minDepth, const double maxDepth,
-								uint discon, cudaStream_t stream) {
-			
-	const dim3 gridSize((size.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (size.height + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-	switch (discon) {
-	case 5 :	discontinuity_kernel<5><<<gridSize, blockSize, 0, stream>>>(mask_out, depth, size, minDepth, maxDepth); break;
-	case 4 :	discontinuity_kernel<4><<<gridSize, blockSize, 0, stream>>>(mask_out, depth, size, minDepth, maxDepth); break;
-	case 3 :	discontinuity_kernel<3><<<gridSize, blockSize, 0, stream>>>(mask_out, depth, size, minDepth, maxDepth); break;
-	case 2 :	discontinuity_kernel<2><<<gridSize, blockSize, 0, stream>>>(mask_out, depth, size, minDepth, maxDepth); break;
-	case 1 :	discontinuity_kernel<1><<<gridSize, blockSize, 0, stream>>>(mask_out, depth, size, minDepth, maxDepth); break;
-	default:	break;
-	}
-	cudaSafeCall( cudaGetLastError() );
-
-#ifdef _DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-#endif
-}
diff --git a/applications/reconstruct/src/ilw/fill.cu b/applications/reconstruct/src/ilw/fill.cu
deleted file mode 100644
index 4109c1034d045ecdede9e28582cd19df670f5a63..0000000000000000000000000000000000000000
--- a/applications/reconstruct/src/ilw/fill.cu
+++ /dev/null
@@ -1,71 +0,0 @@
-#include "ilw_cuda.hpp"
-#include <ftl/cuda/weighting.hpp>
-
-using ftl::cuda::Mask;
-
-#define T_PER_BLOCK 8
-
-template <int RADIUS>
-__global__ void preprocess_kernel(
-    	ftl::cuda::TextureObject<float> depth_in,
-		ftl::cuda::TextureObject<float> depth_out,
-		ftl::cuda::TextureObject<uchar4> colour,
-		ftl::cuda::TextureObject<int> mask,
-		ftl::rgbd::Camera camera,
-		ftl::cuda::ILWParams params) {
-
-    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-    float d = depth_in.tex2D((int)x,(int)y);
-    Mask main_mask(mask.tex2D((int)x,(int)y));
-	uchar4 c = colour.tex2D((int)x,(int)y);
-
-	// Calculate discontinuity mask
-
-	// Fill missing depths
-	if (d < camera.minDepth || d > camera.maxDepth) {
-		float depth_accum = 0.0f;
-		float contrib = 0.0f;
-
-        int v=0;
-		//for (int v=-RADIUS; v<=RADIUS; ++v) {
-			for (int u=-RADIUS; u<=RADIUS; ++u) {
-				uchar4 c2 = colour.tex2D((int)x+u,(int)y+v);
-                float d2 = depth_in.tex2D((int)x+u,(int)y+v);
-                Mask m(mask.tex2D((int)x+u,(int)y+v));
-
-				if (!m.isDiscontinuity() && d2 >= camera.minDepth && d2 <= camera.maxDepth) {
-					float w = ftl::cuda::colourWeighting(c, c2, params.fill_match);
-					depth_accum += d2*w;
-					contrib += w;
-				}
-			}
-		//}
-
-		if (contrib > params.fill_threshold) {
-            d = depth_accum / contrib;
-            main_mask.isFilled(true);
-        }
-	}
-
-    mask(x,y) = (int)main_mask;
-	depth_out(x,y) = d;
-}
-
-void ftl::cuda::preprocess_depth(
-		ftl::cuda::TextureObject<float> &depth_in,
-		ftl::cuda::TextureObject<float> &depth_out,
-		ftl::cuda::TextureObject<uchar4> &colour,
-		ftl::cuda::TextureObject<int> &mask,
-		const ftl::rgbd::Camera &camera,
-		const ftl::cuda::ILWParams &params,
-		cudaStream_t stream) {
-
-	const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-	preprocess_kernel<10><<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, colour, mask, camera, params);
-
-	cudaSafeCall( cudaGetLastError() );
-}
diff --git a/applications/reconstruct/src/ilw/ilw.cpp b/applications/reconstruct/src/ilw/ilw.cpp
deleted file mode 100644
index a686f35c337573a04415b0f13a598ec8a3538057..0000000000000000000000000000000000000000
--- a/applications/reconstruct/src/ilw/ilw.cpp
+++ /dev/null
@@ -1,363 +0,0 @@
-#include "ilw.hpp"
-#include <ftl/utility/matrix_conversion.hpp>
-#include <ftl/rgbd/source.hpp>
-#include <ftl/cuda/points.hpp>
-#include <loguru.hpp>
-
-#include "ilw_cuda.hpp"
-
-using ftl::ILW;
-using ftl::detail::ILWData;
-using ftl::codecs::Channel;
-using ftl::codecs::Channels;
-using ftl::rgbd::Format;
-using cv::cuda::GpuMat;
-
-// TODO: Put in common place
-static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) {
-  Eigen::Affine3d rx =
-      Eigen::Affine3d(Eigen::AngleAxisd(ax, Eigen::Vector3d(1, 0, 0)));
-  Eigen::Affine3d ry =
-      Eigen::Affine3d(Eigen::AngleAxisd(ay, Eigen::Vector3d(0, 1, 0)));
-  Eigen::Affine3d rz =
-      Eigen::Affine3d(Eigen::AngleAxisd(az, Eigen::Vector3d(0, 0, 1)));
-  return rz * rx * ry;
-}
-
-ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) {
-    enabled_ = value("ilw_align", false);
-    iterations_ = value("iterations", 4);
-    motion_rate_ = value("motion_rate", 0.8f);
-    motion_window_ = value("motion_window", 3);
-    use_lab_ = value("use_Lab", false);
-    params_.colour_smooth = value("colour_smooth", 50.0f);
-    params_.spatial_smooth = value("spatial_smooth", 0.04f);
-    params_.cost_ratio = value("cost_ratio", 0.2f);
-	params_.cost_threshold = value("cost_threshold", 1.0f);
-	discon_mask_ = value("discontinuity_mask",2);
-	fill_depth_ = value("fill_depth", false);
-
-    on("fill_depth", [this](const ftl::config::Event &e) {
-        fill_depth_ = value("fill_depth", false);
-    });
-
-	on("ilw_align", [this](const ftl::config::Event &e) {
-        enabled_ = value("ilw_align", false);
-    });
-
-    on("iterations", [this](const ftl::config::Event &e) {
-        iterations_ = value("iterations", 4);
-    });
-
-    on("motion_rate", [this](const ftl::config::Event &e) {
-        motion_rate_ = value("motion_rate", 0.4f);
-    });
-
-    on("motion_window", [this](const ftl::config::Event &e) {
-        motion_window_ = value("motion_window", 3);
-    });
-
-	on("discontinuity_mask", [this](const ftl::config::Event &e) {
-        discon_mask_ = value("discontinuity_mask", 2);
-    });
-
-    on("use_Lab", [this](const ftl::config::Event &e) {
-        use_lab_ = value("use_Lab", false);
-    });
-
-    on("colour_smooth", [this](const ftl::config::Event &e) {
-        params_.colour_smooth = value("colour_smooth", 50.0f);
-    });
-
-    on("spatial_smooth", [this](const ftl::config::Event &e) {
-        params_.spatial_smooth = value("spatial_smooth", 0.04f);
-    });
-
-    on("cost_ratio", [this](const ftl::config::Event &e) {
-        params_.cost_ratio = value("cost_ratio", 0.2f);
-    });
-
-	on("cost_threshold", [this](const ftl::config::Event &e) {
-        params_.cost_threshold = value("cost_threshold", 1.0f);
-    });
-
-    params_.flags = 0;
-    if (value("ignore_bad", false)) params_.flags |= ftl::cuda::kILWFlag_IgnoreBad;
-    if (value("ignore_bad_colour", false)) params_.flags |= ftl::cuda::kILWFlag_SkipBadColour;
-    if (value("restrict_z", true)) params_.flags |= ftl::cuda::kILWFlag_RestrictZ;
-    if (value("colour_confidence_only", false)) params_.flags |= ftl::cuda::kILWFlag_ColourConfidenceOnly;
-
-    on("ignore_bad", [this](const ftl::config::Event &e) {
-        if (value("ignore_bad", false)) params_.flags |= ftl::cuda::kILWFlag_IgnoreBad;
-        else params_.flags &= ~ftl::cuda::kILWFlag_IgnoreBad;
-    });
-
-    on("ignore_bad_colour", [this](const ftl::config::Event &e) {
-        if (value("ignore_bad_colour", false)) params_.flags |= ftl::cuda::kILWFlag_SkipBadColour;
-        else params_.flags &= ~ftl::cuda::kILWFlag_SkipBadColour;
-    });
-
-    on("restrict_z", [this](const ftl::config::Event &e) {
-        if (value("restrict_z", false)) params_.flags |= ftl::cuda::kILWFlag_RestrictZ;
-        else params_.flags &= ~ftl::cuda::kILWFlag_RestrictZ;
-    });
-
-    on("colour_confidence_only", [this](const ftl::config::Event &e) {
-        if (value("colour_confidence_only", false)) params_.flags |= ftl::cuda::kILWFlag_ColourConfidenceOnly;
-        else params_.flags &= ~ftl::cuda::kILWFlag_ColourConfidenceOnly;
-    });
-
-	if (config["clipping"].is_object()) {
-		auto &c = config["clipping"];
-		float rx = c.value("pitch", 0.0f);
-		float ry = c.value("yaw", 0.0f);
-		float rz = c.value("roll", 0.0f);
-		float x = c.value("x", 0.0f);
-		float y = c.value("y", 0.0f);
-		float z = c.value("z", 0.0f);
-		float width = c.value("width", 1.0f);
-		float height = c.value("height", 1.0f);
-		float depth = c.value("depth", 1.0f);
-
-		Eigen::Affine3f r = create_rotation_matrix(rx, ry, rz).cast<float>();
-		Eigen::Translation3f trans(Eigen::Vector3f(x,y,z));
-		Eigen::Affine3f t(trans);
-
-		clip_.origin = MatrixConversion::toCUDA(r.matrix() * t.matrix());
-		clip_.size = make_float3(width, height, depth);
-		clipping_ = value("clipping_enabled", true);
-	} else {
-		clipping_ = false;
-	}
-
-	on("clipping_enabled", [this](const ftl::config::Event &e) {
-		clipping_ = value("clipping_enabled", true);
-	});
-
-	cudaSafeCall(cudaStreamCreate(&stream_));
-}
-
-ILW::~ILW() {
-
-}
-
-bool ILW::process(ftl::rgbd::FrameSet &fs) {
-    if (!enabled_) return false;
-
-	//fs.upload(Channel::Colour + Channel::Depth, stream_);
-    _phase0(fs, stream_);
-
-	params_.range = value("search_range", 0.05f);
-    params_.fill_match = value("fill_match", 50.0f);
-    params_.fill_threshold = value("fill_threshold", 0.0f);
-	params_.match_threshold = value("match_threshold", 0.3f);
-
-    for (int i=0; i<iterations_; ++i) {
-        _phase1(fs, value("cost_function",3), stream_);
-        //for (int j=0; j<3; ++j) {
-            _phase2(fs, motion_rate_, stream_);
-        //}
-
-		params_.range *= value("search_reduce", 0.9f);
-		// TODO: Break if no time left
-
-		//cudaSafeCall(cudaStreamSynchronize(stream_));
-    }
-
-	for (size_t i=0; i<fs.frames.size(); ++i) {
-		auto &f = fs.frames[i];
-		auto *s = fs.sources[i];
-			
-        auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size()));
-        auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse());
-        ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, discon_mask_, stream_);
-    }
-
-	cudaSafeCall(cudaStreamSynchronize(stream_));
-    return true;
-}
-
-bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
-    // Make points channel...
-    for (size_t i=0; i<fs.frames.size(); ++i) {
-		auto &f = fs.frames[i];
-		auto *s = fs.sources[i];
-
-		if (f.empty(Channel::Depth + Channel::Colour)) {
-			LOG(ERROR) << "Missing required channel";
-			continue;
-		}
-			
-        //auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size()));
-        auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse());
-        //ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, discon_mask_, stream);
-
-        // TODO: Create energy vector texture and clear it
-        // Create energy and clear it
-
-        // Convert colour from BGR to BGRA if needed
-		if (f.get<GpuMat>(Channel::Colour).type() == CV_8UC3) {
-            cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
-			// Convert to 4 channel colour
-			auto &col = f.get<GpuMat>(Channel::Colour);
-			GpuMat tmp(col.size(), CV_8UC4);
-			cv::cuda::swap(col, tmp);
-            if (use_lab_) cv::cuda::cvtColor(tmp,tmp, cv::COLOR_BGR2Lab, 0, cvstream);
-			cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA, 0, cvstream);
-		}
-
-		// Clip first?
-		if (clipping_) {
-			auto clip = clip_;
-			clip.origin = clip.origin * pose;
-			ftl::cuda::clipping(f.createTexture<float>(Channel::Depth), s->parameters(), clip, stream);
-		}
-
-        f.createTexture<float>(Channel::Depth2, Format<float>(f.get<GpuMat>(Channel::Colour).size()));
-        f.createTexture<float>(Channel::Confidence, Format<float>(f.get<GpuMat>(Channel::Colour).size()));
-		//f.createTexture<int>(Channel::Mask, Format<int>(f.get<GpuMat>(Channel::Colour).size()));
-        f.createTexture<uchar4>(Channel::Colour);
-		f.createTexture<float>(Channel::Depth);
-
-		//f.get<GpuMat>(Channel::Mask).setTo(cv::Scalar(0));
-    }
-
-	//cudaSafeCall(cudaStreamSynchronize(stream_));
-
-    return true;
-}
-
-bool ILW::_phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream) {
-    // Run correspondence kernel to create an energy vector
-    cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
-
-    // Find discontinuity mask
-    /*for (size_t i=0; i<fs.frames.size(); ++i) {
-        auto &f = fs.frames[i];
-        auto s = fs.sources[i];
-
-        ftl::cuda::discontinuity(
-            f.getTexture<int>(Channel::Mask),
-            f.getTexture<float>(Channel::Depth),
-            s->parameters(),
-            discon_mask_,
-            stream
-        );
-    }*/
-
-	// First do any preprocessing
-	if (fill_depth_) {
-		for (size_t i=0; i<fs.frames.size(); ++i) {
-			auto &f = fs.frames[i];
-            auto s = fs.sources[i];
-
-			ftl::cuda::preprocess_depth(
-				f.getTexture<float>(Channel::Depth),
-				f.getTexture<float>(Channel::Depth2),
-				f.getTexture<uchar4>(Channel::Colour),
-				f.getTexture<int>(Channel::Mask),
-				s->parameters(),
-				params_,
-				stream
-			);
-
-			//cv::cuda::swap(f.get<GpuMat>(Channel::Depth),f.get<GpuMat>(Channel::Depth2)); 
-			f.swapChannels(Channel::Depth, Channel::Depth2);
-		}
-	}
-
-	//cudaSafeCall(cudaStreamSynchronize(stream_));
-
-	// For each camera combination
-    for (size_t i=0; i<fs.frames.size(); ++i) {
-        auto &f1 = fs.frames[i];
-        f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream);
-		f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream);
-
-		Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0);
-		d1 = fs.sources[i]->getPose() * d1;
-
-        for (size_t j=0; j<fs.frames.size(); ++j) {
-            if (i == j) continue;
-
-            //LOG(INFO) << "Running phase1";
-
-            auto &f2 = fs.frames[j];
-            auto s1 = fs.sources[i];
-            auto s2 = fs.sources[j];
-
-			// Are cameras facing similar enough direction?
-			Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0);
-			d2 = fs.sources[j]->getPose() * d2;
-			// No, so skip this combination
-			if (d1.dot(d2) <= 0.0) continue;
-
-            auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>());
-            auto pose1_inv = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse());
-            auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse());
-
-            try {
-            //Calculate energy vector to best correspondence
-            ftl::cuda::correspondence(
-                f1.getTexture<float>(Channel::Depth),
-                f2.getTexture<float>(Channel::Depth),
-                f1.getTexture<uchar4>(Channel::Colour),
-                f2.getTexture<uchar4>(Channel::Colour),
-                // TODO: Add normals and other things...
-                f1.getTexture<float>(Channel::Depth2),
-                f1.getTexture<float>(Channel::Confidence),
-				f1.getTexture<int>(Channel::Mask),
-                pose1,
-                pose1_inv,
-                pose2,
-                s1->parameters(),
-                s2->parameters(),
-                params_,
-                win,
-                stream
-            );
-            } catch (ftl::exception &e) {
-                LOG(ERROR) << "Exception in correspondence: " << e.what();
-            }
-
-            //LOG(INFO) << "Correspondences done... " << i;
-        }
-    }
-
-	//cudaSafeCall(cudaStreamSynchronize(stream_));
-
-    return true;
-}
-
-bool ILW::_phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream) {
-    // Run energies and motion kernel
-
-	// Smooth vectors across a window and iteratively
-	// strongly disagreeing vectors should cancel out
-	// A weak vector is overriden by a stronger one.
-
-    for (size_t i=0; i<fs.frames.size(); ++i) {
-        auto &f = fs.frames[i];
-
-        auto pose = MatrixConversion::toCUDA(fs.sources[i]->getPose().cast<float>()); //.inverse());
-
-        ftl::cuda::move_points(
-             f.getTexture<float>(Channel::Depth),
-             f.getTexture<float>(Channel::Depth2),
-			 f.getTexture<float>(Channel::Confidence),
-             fs.sources[i]->parameters(),
-             pose,
-			 params_,
-             rate,
-             motion_window_,
-             stream
-        );
-
-        if (f.hasChannel(Channel::Mask)) {
-			ftl::cuda::mask_filter(f.getTexture<float>(Channel::Depth),
-				f.getTexture<int>(Channel::Mask), stream_);
-		}
-    }
-
-    return true;
-}
diff --git a/applications/reconstruct/src/ilw/ilw.cu b/applications/reconstruct/src/ilw/ilw.cu
deleted file mode 100644
index 710f7521d4a508dd8fc968f4648a5402f8045fd7..0000000000000000000000000000000000000000
--- a/applications/reconstruct/src/ilw/ilw.cu
+++ /dev/null
@@ -1,112 +0,0 @@
-#include "ilw_cuda.hpp"
-#include <ftl/cuda/weighting.hpp>
-
-using ftl::cuda::TextureObject;
-using ftl::rgbd::Camera;
-
-#define WARP_SIZE 32
-#define T_PER_BLOCK 8
-#define FULL_MASK 0xffffffff
-
-__device__ inline float warpMin(float e) {
-	for (int i = WARP_SIZE/2; i > 0; i /= 2) {
-		const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
-		e = min(e, other);
-	}
-	return e;
-}
-
-__device__ inline float warpSum(float e) {
-	for (int i = WARP_SIZE/2; i > 0; i /= 2) {
-		const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
-		e += other;
-	}
-	return e;
-}
-
-//==============================================================================
-
-
-
-//==============================================================================
-
-
-
-//==============================================================================
-
-//#define MOTION_RADIUS 9
-
-template <int MOTION_RADIUS>
-__global__ void move_points_kernel(
-    ftl::cuda::TextureObject<float> d_old,
-	ftl::cuda::TextureObject<float> d_new,
-	ftl::cuda::TextureObject<float> conf,
-    ftl::rgbd::Camera camera,
-	float4x4 pose,
-	ftl::cuda::ILWParams params,
-    float rate) {
-
-    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	const float d0_new = d_new.tex2D((int)x,(int)y);
-	const float d0_old = d_old.tex2D((int)x,(int)y);
-	if (d0_new == 0.0f) return;  // No correspondence found
-    
-    if (x < d_old.width() && y < d_old.height()) {
-		//const float4 world = p(x,y);
-		//if (world.x == MINF) return;
-
-		float delta = 0.0f; //make_float4(0.0f, 0.0f, 0.0f, 0.0f); //ev.tex2D((int)x,(int)y);
-		float contrib = 0.0f;
-
-		// Calculate screen space distortion with neighbours
-		for (int v=-MOTION_RADIUS; v<=MOTION_RADIUS; ++v) {
-			for (int u=-MOTION_RADIUS; u<=MOTION_RADIUS; ++u) {
-				const float dn_new = d_new.tex2D((int)x+u,(int)y+v);
-				const float dn_old = d_old.tex2D((int)x+u,(int)y+v);
-				const float confn = conf.tex2D((int)x+u,(int)y+v);
-				//const float3 pn = make_float3(p.tex2D((int)x+u,(int)y+v));
-				//if (pn.x == MINF) continue;
-				if (dn_new == 0.0f) continue;  // Neighbour has no new correspondence
-
-				const float s = ftl::cuda::spatialWeighting(camera.screenToCam(x,y,d0_new), camera.screenToCam(x+u,y+v,dn_new), params.range); // ftl::cuda::weighting(fabs(d0_new - dn_new), params.range);
-				contrib += (confn+0.01f) * s;
-				delta += (confn+0.01f) * s * ((confn == 0.0f) ? dn_old : dn_new);
-			}
-		}
-
-        if (contrib > 0.0f) {
-            //const float3 newworld = pose * camera.screenToCam(x, y, vec0.x + rate * ((delta / contrib) - vec0.x));
-			//p(x,y) = make_float4(newworld, world.w); //world + rate * (vec / contrib);
-			
-			d_old(x,y) = d0_old + rate * ((delta / contrib) - d0_old);
-        }
-    }
-}
-
-
-void ftl::cuda::move_points(
-        ftl::cuda::TextureObject<float> &d_old,
-		ftl::cuda::TextureObject<float> &d_new,
-		ftl::cuda::TextureObject<float> &conf,
-        const ftl::rgbd::Camera &camera,
-		const float4x4 &pose,
-		const ftl::cuda::ILWParams &params,
-        float rate,
-        int radius,
-        cudaStream_t stream) {
-
-    const dim3 gridSize((d_old.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (d_old.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-    switch (radius) {
-    case 9 : move_points_kernel<9><<<gridSize, blockSize, 0, stream>>>(d_old,d_new,conf,camera, pose, params, rate); break;
-    case 5 : move_points_kernel<5><<<gridSize, blockSize, 0, stream>>>(d_old,d_new,conf,camera, pose, params, rate); break;
-    case 3 : move_points_kernel<3><<<gridSize, blockSize, 0, stream>>>(d_old,d_new,conf,camera, pose, params, rate); break;
-    case 1 : move_points_kernel<1><<<gridSize, blockSize, 0, stream>>>(d_old,d_new,conf,camera, pose, params, rate); break;
-    case 0 : move_points_kernel<0><<<gridSize, blockSize, 0, stream>>>(d_old,d_new,conf,camera, pose, params, rate); break;
-    }
-
-    cudaSafeCall( cudaGetLastError() );
-}
diff --git a/applications/reconstruct/src/ilw/ilw.hpp b/applications/reconstruct/src/ilw/ilw.hpp
deleted file mode 100644
index 78251832d5647233082c8c59072084c6b1d60559..0000000000000000000000000000000000000000
--- a/applications/reconstruct/src/ilw/ilw.hpp
+++ /dev/null
@@ -1,84 +0,0 @@
-#ifndef _FTL_RECONSTRUCT_ILW_HPP_
-#define _FTL_RECONSTRUCT_ILW_HPP_
-
-#include <ftl/cuda_common.hpp>
-#include <ftl/rgbd/frameset.hpp>
-#include <ftl/configurable.hpp>
-#include <vector>
-
-#include "ilw_cuda.hpp"
-
-#include <ftl/cuda/points.hpp>
-
-namespace ftl {
-
-namespace detail {
-struct ILWData{
-    // x,y,z + confidence
-    ftl::cuda::TextureObject<float4> correspondence;
-
-    ftl::cuda::TextureObject<float4> points;
-
-	// Residual potential energy
-	ftl::cuda::TextureObject<float> residual;
-
-	// Flow magnitude
-	ftl::cuda::TextureObject<float> flow;
-};
-}
-
-/**
- * For a set of sources, perform Iterative Lattice Warping to correct the
- * location of points between the cameras. The algorithm finds possible
- * correspondences and warps the original pixel lattice of points in each
- * camera towards the correspondences, iterating the process as many times as
- * possible. The result is that both local and global adjustment is made to the
- * point clouds to improve micro alignment that may have been incorrect due to
- * either inaccurate camera pose estimation or noise/errors in the depth maps.
- */
-class ILW : public ftl::Configurable {
-    public:
-    explicit ILW(nlohmann::json &config);
-    ~ILW();
-
-    /**
-     * Take a frameset and perform the iterative lattice warping.
-     */
-    bool process(ftl::rgbd::FrameSet &fs);
-
-    inline bool isLabColour() const { return use_lab_; }
-
-    private:
-    /*
-     * Initialise data.
-     */
-    bool _phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream);
-
-    /*
-     * Find possible correspondences and a confidence value.
-     */
-    bool _phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream);
-
-    /*
-     * Calculate energies and move the points.
-     */
-    bool _phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream);
-
-    std::vector<detail::ILWData> data_;
-    bool enabled_;
-    ftl::cuda::ILWParams params_;
-    int iterations_;
-    float motion_rate_;
-    int motion_window_;
-    bool use_lab_;
-	int discon_mask_;
-	bool fill_depth_;
-	ftl::cuda::ClipSpace clip_;
-	bool clipping_;
-
-	cudaStream_t stream_;
-};
-
-}
-
-#endif  // _FTL_RECONSTRUCT_ILW_HPP_
diff --git a/applications/reconstruct/src/ilw/ilw_cuda.hpp b/applications/reconstruct/src/ilw/ilw_cuda.hpp
deleted file mode 100644
index 94e522347bb64d9b0b091c3f0cd4914b8abd91c2..0000000000000000000000000000000000000000
--- a/applications/reconstruct/src/ilw/ilw_cuda.hpp
+++ /dev/null
@@ -1,85 +0,0 @@
-#ifndef _FTL_ILW_CUDA_HPP_
-#define _FTL_ILW_CUDA_HPP_
-
-#include <ftl/cuda_common.hpp>
-#include <ftl/rgbd/camera.hpp>
-#include <ftl/cuda_matrix_util.hpp>
-#include <ftl/cuda/mask.hpp>
-
-namespace ftl {
-namespace cuda {
-
-struct ILWParams {
-	float spatial_smooth;
-	float colour_smooth;
-	float fill_match;
-	float fill_threshold;
-	float match_threshold;
-	float cost_ratio;
-	float cost_threshold;
-	float range;
-	uint flags;
-};
-
-static const uint kILWFlag_IgnoreBad = 0x0001;
-static const uint kILWFlag_RestrictZ = 0x0002;
-static const uint kILWFlag_SkipBadColour = 0x0004;
-static const uint kILWFlag_ColourConfidenceOnly = 0x0008;
-
-void discontinuity(
-	ftl::cuda::TextureObject<int> &mask_out,
-	ftl::cuda::TextureObject<float> &depth,
-	const cv::Size size,
-	const double minDepth,
-	const double maxDepth,
-	uint discon, cudaStream_t stream
-);
-
-void mask_filter(
-	ftl::cuda::TextureObject<float> &depth,
-	ftl::cuda::TextureObject<int> &mask,
-	cudaStream_t stream);
-
-void preprocess_depth(
-	ftl::cuda::TextureObject<float> &depth_in,
-	ftl::cuda::TextureObject<float> &depth_out,
-	ftl::cuda::TextureObject<uchar4> &colour,
-	ftl::cuda::TextureObject<int> &mask,
-	const ftl::rgbd::Camera &camera,
-	const ILWParams &params,
-	cudaStream_t stream
-);
-
-void correspondence(
-	ftl::cuda::TextureObject<float> &d1,
-	ftl::cuda::TextureObject<float> &d2,
-	ftl::cuda::TextureObject<uchar4> &c1,
-	ftl::cuda::TextureObject<uchar4> &c2,
-	ftl::cuda::TextureObject<float> &dout,
-	ftl::cuda::TextureObject<float> &conf,
-	ftl::cuda::TextureObject<int> &mask,
-	float4x4 &pose1,
-	float4x4 &pose1_inv,
-	float4x4 &pose2,
-	const ftl::rgbd::Camera &cam1,
-	const ftl::rgbd::Camera &cam2,
-	const ILWParams &params, int win,
-	cudaStream_t stream
-);
-
-void move_points(
-	ftl::cuda::TextureObject<float> &d_old,
-	ftl::cuda::TextureObject<float> &d_new,
-	ftl::cuda::TextureObject<float> &conf,
-	const ftl::rgbd::Camera &camera,
-	const float4x4 &pose,
-	const ILWParams &params,
-	float rate,
-	int radius,
-	cudaStream_t stream
-);
-
-}
-}
-
-#endif  // _FTL_ILW_CUDA_HPP_
diff --git a/applications/reconstruct/src/mls/mls.cu b/applications/reconstruct/src/mls/mls.cu
deleted file mode 100644
index 4de8a07db023d21620f605cd3556913aeafe06cb..0000000000000000000000000000000000000000
--- a/applications/reconstruct/src/mls/mls.cu
+++ /dev/null
@@ -1,273 +0,0 @@
-#include <ftl/cuda_common.hpp>
-#include <ftl/cuda_util.hpp>
-#include <ftl/depth_camera.hpp>
-#include "depth_camera_cuda.hpp"
-
-#include "mls_cuda.hpp"
-
-#define T_PER_BLOCK 16
-#define MINF __int_as_float(0xff800000)
-
-using ftl::voxhash::DepthCameraCUDA;
-using ftl::voxhash::HashData;
-using ftl::voxhash::HashParams;
-using ftl::cuda::TextureObject;
-using ftl::render::SplatParams;
-
-extern __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS];
-extern __constant__ HashParams c_hashParams;
-
-/// ===== MLS Smooth
-
-/*
- * Kim, K., Chalidabhongse, T. H., Harwood, D., & Davis, L. (2005).
- * Real-time foreground-background segmentation using codebook model.
- * Real-Time Imaging. https://doi.org/10.1016/j.rti.2004.12.004
- */
- __device__ float colordiffFloat(const uchar4 &pa, const uchar4 &pb) {
-	const float x_2 = pb.x * pb.x + pb.y * pb.y + pb.z * pb.z;
-	const float v_2 = pa.x * pa.x + pa.y * pa.y + pa.z * pa.z;
-	const float xv_2 = powf(float(pb.x * pa.x + pb.y * pa.y + pb.z * pa.z), 2.0f);
-	const float p_2 = xv_2 / v_2;
-	return sqrt(x_2 - p_2);
-}
-
-__device__ float colordiffFloat2(const uchar4 &pa, const uchar4 &pb) {
-	float3 delta = make_float3((float)pa.x - (float)pb.x, (float)pa.y - (float)pb.y, (float)pa.z - (float)pb.z);
-	return length(delta);
-}
-
-/*
- * Colour weighting as suggested in:
- * C. Kuster et al. Spatio-Temporal Geometry Fusion for Multiple Hybrid Cameras using Moving Least Squares Surfaces. 2014.
- * c = colour distance
- */
- __device__ float colourWeighting(float c) {
-	const float h = c_hashParams.m_colourSmoothing;
-	if (c >= h) return 0.0f;
-	float ch = c / h;
-	ch = 1.0f - ch*ch;
-	return ch*ch*ch*ch;
-}
-
-#define WINDOW_RADIUS 5
-
-__device__ float mlsCamera(int cam, const float3 &mPos, uchar4 c1, float3 &wpos) {
-	const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
-
-	const float3 pf = camera.poseInverse * mPos;
-	float3 pos = make_float3(0.0f, 0.0f, 0.0f);
-	const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf));
-	float weights = 0.0f;
-
-	//#pragma unroll
-	for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) {
-		for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) {
-			//if (screenPos.x+u < width && screenPos.y+v < height) {	//on screen
-				float depth = tex2D<float>(camera.depth, screenPos.x+u, screenPos.y+v);
-				const float3 camPos = camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth);
-				float weight = ftl::cuda::spatialWeighting(length(pf - camPos), c_hashParams.m_spatialSmoothing);
-
-				if (weight > 0.0f) {
-					uchar4 c2 = tex2D<uchar4>(camera.colour, screenPos.x+u, screenPos.y+v);
-					weight *= colourWeighting(colordiffFloat2(c1,c2));
-
-					if (weight > 0.0f) {
-						wpos += weight* (camera.pose * camPos);
-						weights += weight;
-					}
-				}			
-			//}
-		}
-	}
-
-	//wpos += (camera.pose * pos);
-
-	return weights;
-}
-
-__device__ float mlsCameraNoColour(int cam, const float3 &mPos, uchar4 c1, const float4 &norm, float3 &wpos, float h) {
-	const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
-
-	const float3 pf = camera.poseInverse * mPos;
-	float3 pos = make_float3(0.0f, 0.0f, 0.0f);
-	const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf));
-	float weights = 0.0f;
-
-	//#pragma unroll
-	for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) {
-		for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) {
-			//if (screenPos.x+u < width && screenPos.y+v < height) {	//on creen
-				float depth = tex2D<float>(camera.depth, screenPos.x+u, screenPos.y+v);
-				const float3 camPos = camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth);
-
-				// TODO:(Nick) dot product of normals < 0 means the point
-				// should be ignored with a weight of 0 since it is facing the wrong direction
-				// May be good to simply weight using the dot product to give
-				// a stronger weight to those whose normals are closer
-
-				float weight = ftl::cuda::spatialWeighting(length(pf - camPos), h);
-
-				if (weight > 0.0f) {
-					float4 n2 = tex2D<float4>(camera.normal, screenPos.x+u, screenPos.y+v);
-					if (dot(make_float3(norm), make_float3(n2)) > 0.0f) {
-
-						uchar4 c2 = tex2D<uchar4>(camera.colour, screenPos.x+u, screenPos.y+v);
-
-						if (colourWeighting(colordiffFloat2(c1,c2)) > 0.0f) {
-							pos += weight*camPos; // (camera.pose * camPos);
-							weights += weight;
-						}
-					}
-				}			
-			//}
-		}
-	}
-
-	if (weights > 0.0f) wpos += (camera.pose * (pos / weights)) * weights;
-
-	return weights;
-}
-
-
-__global__ void mls_smooth_kernel(ftl::cuda::TextureObject<float4> output, HashParams hashParams, int numcams, int cam) {
-	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	const int width = output.width();
-	const int height = output.height();
-
-	const DepthCameraCUDA &mainCamera = c_cameras[cam];
-
-	if (x < width && y < height) {
-
-		const float depth = tex2D<float>(mainCamera.depth, x, y);
-		const uchar4 c1 = tex2D<uchar4>(mainCamera.colour, x, y);
-		const float4 norm = tex2D<float4>(mainCamera.normal, x, y);
-		//if (x == 400 && y == 200) printf("NORMX: %f\n", norm.x);
-
-		float3 wpos = make_float3(0.0f);
-		float3 wnorm = make_float3(0.0f);
-		float weights = 0.0f;
-
-		if (depth >= mainCamera.params.m_sensorDepthWorldMin && depth <= mainCamera.params.m_sensorDepthWorldMax) {
-			float3 mPos = mainCamera.pose * mainCamera.params.kinectDepthToSkeleton(x, y, depth);
-
-			if ((!(hashParams.m_flags & ftl::voxhash::kFlagClipping)) || (mPos.x > hashParams.m_minBounds.x && mPos.x < hashParams.m_maxBounds.x &&
-					mPos.y > hashParams.m_minBounds.y && mPos.y < hashParams.m_maxBounds.y &&
-					mPos.z > hashParams.m_minBounds.z && mPos.z < hashParams.m_maxBounds.z)) {
-
-				if (hashParams.m_flags & ftl::voxhash::kFlagMLS) {
-					for (uint cam2=0; cam2<numcams; ++cam2) {
-						//if (cam2 == cam) weights += mlsCameraNoColour(cam2, mPos, c1, wpos, c_hashParams.m_spatialSmoothing*0.1f); //weights += 0.5*mlsCamera(cam2, mPos, c1, wpos);
-						weights += mlsCameraNoColour(cam2, mPos, c1, norm, wpos, c_hashParams.m_spatialSmoothing); //*((cam == cam2)? 0.1f : 5.0f));
-
-						// Previous approach
-						//if (cam2 == cam) continue;
-						//weights += mlsCameraBest(cam2, mPos, c1, wpos);
-					}
-					wpos /= weights;
-				} else {
-					weights = 1000.0f;
-					wpos = mPos;
-				} 
-
-				//output(x,y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF);
-
-				if (weights >= hashParams.m_confidenceThresh) output(x,y) = make_float4(wpos, 0.0f);
-
-				//const uint2 screenPos = make_uint2(mainCamera.params.cameraToKinectScreenInt(mainCamera.poseInverse * wpos));
-				//if (screenPos.x < output.width() && screenPos.y < output.height()) {
-				//	output(screenPos.x,screenPos.y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF);
-				//}
-			}
-		}
-	}
-}
-
-void ftl::cuda::mls_smooth(TextureObject<float4> &output, const HashParams &hashParams, int numcams, int cam, cudaStream_t stream) {
-	const dim3 gridSize((output.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (output.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-	mls_smooth_kernel<<<gridSize, blockSize, 0, stream>>>(output, hashParams, numcams, cam);
-
-#ifdef _DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-#endif
-}
-
-
-// ===== Render Depth using MLS ================================================
-
-#define MAX_UPSAMPLE 5
-#define SAMPLE_BUFFER ((2*MAX_UPSAMPLE+1)*(2*MAX_UPSAMPLE+1))
-#define WARP_SIZE 32
-#define BLOCK_WIDTH 4
-#define MLS_RADIUS 5
-#define MLS_WIDTH (2*MLS_RADIUS+1)
-#define MLS_SAMPLES (MLS_WIDTH*MLS_WIDTH)
-
-__global__ void mls_render_depth_kernel(const TextureObject<int> input, TextureObject<int> output, SplatParams params, int numcams) {
-	/*const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	const int width = output.width();
-	const int height = output.height();
-
-	if (x < width && y < height) {
-
-		const float depth = tex2D<float>(mainCamera.depth, x, y);
-		const uchar4 c1 = tex2D<uchar4>(mainCamera.colour, x, y);
-		const float4 norm = tex2D<float4>(mainCamera.normal, x, y);
-		//if (x == 400 && y == 200) printf("NORMX: %f\n", norm.x);
-
-		float3 wpos = make_float3(0.0f);
-		float3 wnorm = make_float3(0.0f);
-		float weights = 0.0f;
-
-		if (depth >= mainCamera.params.m_sensorDepthWorldMin && depth <= mainCamera.params.m_sensorDepthWorldMax) {
-			float3 mPos = mainCamera.pose * mainCamera.params.kinectDepthToSkeleton(x, y, depth);
-
-			if ((!(hashParams.m_flags & ftl::voxhash::kFlagClipping)) || (mPos.x > hashParams.m_minBounds.x && mPos.x < hashParams.m_maxBounds.x &&
-					mPos.y > hashParams.m_minBounds.y && mPos.y < hashParams.m_maxBounds.y &&
-					mPos.z > hashParams.m_minBounds.z && mPos.z < hashParams.m_maxBounds.z)) {
-
-				if (hashParams.m_flags & ftl::voxhash::kFlagMLS) {
-					for (uint cam2=0; cam2<numcams; ++cam2) {
-						//if (cam2 == cam) weights += mlsCameraNoColour(cam2, mPos, c1, wpos, c_hashParams.m_spatialSmoothing*0.1f); //weights += 0.5*mlsCamera(cam2, mPos, c1, wpos);
-						weights += mlsCameraNoColour(cam2, mPos, c1, norm, wpos, c_hashParams.m_spatialSmoothing); //*((cam == cam2)? 0.1f : 5.0f));
-
-						// Previous approach
-						//if (cam2 == cam) continue;
-						//weights += mlsCameraBest(cam2, mPos, c1, wpos);
-					}
-					wpos /= weights;
-				} else {
-					weights = 1000.0f;
-					wpos = mPos;
-				} 
-
-				//output(x,y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF);
-
-				//if (weights >= hashParams.m_confidenceThresh) output(x,y) = make_float4(wpos, 0.0f);
-
-				const uint2 screenPos = make_uint2(mainCamera.params.cameraToKinectScreenInt(mainCamera.poseInverse * wpos));
-				if (screenPos.x < output.width() && screenPos.y < output.height()) {
-					output(screenPos.x,screenPos.y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF);
-				}
-			}
-		}
-	}*/
-}
-
-
-void ftl::cuda::mls_render_depth(const TextureObject<int> &input, TextureObject<int> &output, const SplatParams &params, int numcams, cudaStream_t stream) {
-	const dim3 gridSize((output.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (output.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-	mls_render_depth_kernel<<<gridSize, blockSize, 0, stream>>>(input, output, params, numcams);
-
-#ifdef _DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-#endif
-}
diff --git a/components/control/cpp/src/master.cpp b/components/control/cpp/src/master.cpp
index af61c64c1ccf1f20522473c0feaee79cef2f2f31..44361b2d557c31e72f9b824911aaefbde1d4cc80 100644
--- a/components/control/cpp/src/master.cpp
+++ b/components/control/cpp/src/master.cpp
@@ -99,6 +99,7 @@ Master::Master(Configurable *root, Universe *net)
 			delete nc;
 			delete configuration;
 		}
+		peerConfigurables_[peer].clear();
 	});
 }
 
diff --git a/components/operators/src/detectandtrack.cpp b/components/operators/src/detectandtrack.cpp
index 0f2e0dd069a9277648a7ee87a537000ae66cb123..195ae5ada002aae9995519f421aae987e25c22b6 100644
--- a/components/operators/src/detectandtrack.cpp
+++ b/components/operators/src/detectandtrack.cpp
@@ -32,7 +32,7 @@ bool DetectAndTrack::init() {
 		debug_ = config()->value<bool>("debug", false);
 	});
 
-	detect_n_frames_ = config()->value<int>("n_frames", 10);
+	detect_n_frames_ = config()->value<int>("n_frames", 20);
 	detect_n_frames_ = detect_n_frames_ < 0.0 ? 0.0 : detect_n_frames_;
 
 	max_distance_ = config()->value<double>("max_distance", 100.0);
@@ -239,7 +239,7 @@ bool DetectAndTrack::apply(Frame &in, Frame &out, cudaStream_t stream) {
 
 		track(im);
 
-		if (!detecting_) {  // && tracked_.size() < max_tracked_) {
+		if ((n_frame_++ % detect_n_frames_ == 0) && !detecting_) {  // && tracked_.size() < max_tracked_) {
 			//if (detect_job_.valid()) detect_job_.get();  // To throw any exceptions
 			detecting_ = true;
 
diff --git a/components/renderers/cpp/CMakeLists.txt b/components/renderers/cpp/CMakeLists.txt
index 66a930c2c515c818010ddae5557ad976a9ca2d70..71d9ad3d5df8065d11c372e19bda462cb4036f1f 100644
--- a/components/renderers/cpp/CMakeLists.txt
+++ b/components/renderers/cpp/CMakeLists.txt
@@ -1,9 +1,9 @@
 add_library(ftlrender
-	src/splatter.cu
-	src/points.cu
 	src/normals.cu
 	src/mask.cu
 	src/screen.cu
+	src/clipping.cu
+	src/dibr.cu
 	src/triangle_render.cu
 	src/reprojection.cu
 	src/CUDARender.cpp
diff --git a/components/renderers/cpp/include/ftl/render/CUDARender.hpp b/components/renderers/cpp/include/ftl/render/CUDARender.hpp
index 616f78a199a4fa0e36ced654e7c18b8a3452c450..3414f43ca244f50d2d2f3306f1ca4206dee382c3 100644
--- a/components/renderers/cpp/include/ftl/render/CUDARender.hpp
+++ b/components/renderers/cpp/include/ftl/render/CUDARender.hpp
@@ -3,7 +3,7 @@
 
 #include <ftl/render/renderer.hpp>
 #include <ftl/rgbd/frameset.hpp>
-#include <ftl/render/splat_params.hpp>
+#include <ftl/render/render_params.hpp>
 #include <ftl/cuda/points.hpp>
 #include <ftl/codecs/channels.hpp>
 //#include <ftl/filters/filter.hpp>
@@ -26,6 +26,11 @@ class CUDARender : public ftl::render::Renderer {
 	bool submit(ftl::rgbd::FrameSet *in, ftl::codecs::Channels<0>, const Eigen::Matrix4d &t) override;
 	//void setOutputDevice(int);
 
+	void setViewPort(ftl::render::ViewPortMode mode, const ftl::render::ViewPort &vp) {
+		params_.viewport = vp;
+		params_.viewPortMode = mode;
+	}
+
 	protected:
 	void _renderChannel(ftl::rgbd::Frame &out, ftl::codecs::Channel channel_in, ftl::codecs::Channel channel_out, const Eigen::Matrix4d &t, cudaStream_t stream);
 
@@ -44,10 +49,12 @@ class CUDARender : public ftl::render::Renderer {
 	float3 light_dir_;
 	uchar4 light_diffuse_;
 	uchar4 light_ambient_;
-	ftl::render::SplatParams params_;
+	ftl::render::Parameters params_;
 	cudaStream_t stream_;
 	float3 light_pos_;
 	Eigen::Matrix4d transform_;
+	float4x4 pose_;
+	float4x4 poseInverse_;
 	float scale_;
 	int64_t last_frame_;
 
diff --git a/components/renderers/cpp/include/ftl/render/render_params.hpp b/components/renderers/cpp/include/ftl/render/render_params.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..8a4da170e0d35f7a1cf1f2587fbdb77b0bd53e63
--- /dev/null
+++ b/components/renderers/cpp/include/ftl/render/render_params.hpp
@@ -0,0 +1,64 @@
+#ifndef _FTL_RENDER_PARAMS_HPP_
+#define _FTL_RENDER_PARAMS_HPP_
+
+#include <ftl/cuda_util.hpp>
+#include <ftl/cuda_matrix_util.hpp>
+#include <ftl/rgbd/camera.hpp>
+
+namespace ftl {
+namespace render {
+
+static const uint kShowDisconMask = 0x00000001;
+static const uint kNormalWeightColours = 0x00000002;
+
+struct ViewPort {
+	short x;
+	short y;
+	short width;
+	short height;
+
+	__device__ __host__ inline short x2() const { return x+width-1; }
+	__device__ __host__ inline short y2() const { return y+height-1; }
+
+	__device__ __host__ inline bool inside(short px, short py) const {
+		return px >= x && px <= x2() && py >= y && py <= y2();
+	}
+
+	__device__ __host__ inline uint2 map(const ftl::rgbd::Camera &cam, const float2 &pt) const {
+		return make_uint2(
+			(pt.x - static_cast<float>(x)) * (static_cast<float>(cam.width) / static_cast<float>(width)),
+			(pt.y - static_cast<float>(y)) * (static_cast<float>(cam.height) / static_cast<float>(height))
+		);
+	}
+
+	__device__ __host__ inline uint2 reverseMap(const ftl::rgbd::Camera &cam, const float2 &pt) const {
+		return make_uint2(
+			(pt.x * (static_cast<float>(width) / static_cast<float>(cam.width))) + static_cast<float>(x),
+			(pt.y * (static_cast<float>(height) / static_cast<float>(cam.height))) + static_cast<float>(y)
+		);
+	}
+};
+
+/**
+ * Control how the viewport should be used.
+ */
+enum class ViewPortMode : uint8_t {
+	Disabled = 0,		// Do not use the viewport data
+	Clipping = 1,		// Clip the rendering to within the viewport for stencil like effect
+	Warping = 2			// Stretch viewport region over entire frame
+};
+
+struct Parameters {
+	uint m_flags;
+	float depthThreshold;
+	int triangle_limit;
+
+	ftl::rgbd::Camera camera;  // Virtual camera intrinsics
+	ftl::render::ViewPort viewport;
+	ftl::render::ViewPortMode viewPortMode;
+};
+
+}
+}
+
+#endif
diff --git a/components/renderers/cpp/include/ftl/render/splat_params.hpp b/components/renderers/cpp/include/ftl/render/splat_params.hpp
deleted file mode 100644
index 7c6b8416896f10aae816565c52813108c80bf385..0000000000000000000000000000000000000000
--- a/components/renderers/cpp/include/ftl/render/splat_params.hpp
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _FTL_RENDER_SPLAT_PARAMS_HPP_
-#define _FTL_RENDER_SPLAT_PARAMS_HPP_
-
-#include <ftl/cuda_util.hpp>
-#include <ftl/cuda_matrix_util.hpp>
-#include <ftl/rgbd/camera.hpp>
-
-namespace ftl {
-namespace render {
-
-static const uint kShowDisconMask = 0x00000001;
-static const uint kNormalWeightColours = 0x00000002;
-
-struct __align__(16) SplatParams {
-	float4x4 m_viewMatrix;
-	float4x4 m_viewMatrixInverse;
-
-	uint m_flags;
-	//float voxelSize;
-	float depthThreshold;
-	int triangle_limit;
-
-	ftl::rgbd::Camera camera;
-};
-
-}
-}
-
-#endif
diff --git a/components/renderers/cpp/src/CUDARender.cpp b/components/renderers/cpp/src/CUDARender.cpp
index a40c792251dfce60f88ee0f6883307b7fa727cc7..8bc1fbd87adac1d9e7a00d8eeaa662caffedfab3 100644
--- a/components/renderers/cpp/src/CUDARender.cpp
+++ b/components/renderers/cpp/src/CUDARender.cpp
@@ -96,6 +96,8 @@ CUDARender::CUDARender(nlohmann::json &config) : ftl::render::Renderer(config),
 		clipping_ = false;
 	//}
 
+	params_.viewPortMode = ftl::render::ViewPortMode::Disabled;
+
 	on("clipping_enabled", [this](const ftl::config::Event &e) {
 		clipping_ = value("clipping_enabled", true);
 	});
@@ -185,7 +187,7 @@ void CUDARender::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Chann
 			return;
 		}
 
-		auto transform = MatrixConversion::toCUDA(f.getPose().cast<float>().inverse() * t.cast<float>().inverse()) * params_.m_viewMatrixInverse;
+		auto transform = MatrixConversion::toCUDA(f.getPose().cast<float>().inverse() * t.cast<float>().inverse()) * poseInverse_;
 		auto transformR = MatrixConversion::toCUDA(f.getPose().cast<float>().inverse()).getFloat3x3();
 
 		if (mesh_) {
@@ -265,7 +267,7 @@ void CUDARender::_dibr(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 			continue;
 		}
 
-		auto transform = params_.m_viewMatrix * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>());
+		auto transform = pose_ * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>());
 
 		if (f.hasChannel(Channel::Depth)) {
 			ftl::cuda::dibr_merge(
@@ -312,7 +314,7 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 		}
 
 		//auto pose = MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>());
-		auto transform = params_.m_viewMatrix * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>());
+		auto transform = pose_ * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>());
 
 		// Calculate and save virtual view screen position of each source pixel
 		if (f.hasChannel(Channel::Depth)) {
@@ -367,7 +369,7 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 				temp_.createTexture<float4>(Channel::Normals),
 				out.createTexture<float>(Channel::Depth),
 				value("normal_radius", 1), value("normal_smoothing", 0.02f),
-				params_.camera, params_.m_viewMatrix.getFloat3x3(), params_.m_viewMatrixInverse.getFloat3x3(), stream_);
+				params_.camera, pose_.getFloat3x3(), poseInverse_.getFloat3x3(), stream_);
 }
 
 void CUDARender::_renderChannel(
@@ -476,9 +478,10 @@ void CUDARender::_updateParameters(ftl::rgbd::Frame &out) {
 	params_.depthThreshold = value("depth_threshold", 0.04f);
 	params_.m_flags = 0;
 	if (value("normal_weight_colours", true)) params_.m_flags |= ftl::render::kNormalWeightColours;
-	params_.m_viewMatrix = MatrixConversion::toCUDA(out.getPose().cast<float>().inverse());
-	params_.m_viewMatrixInverse = MatrixConversion::toCUDA(out.getPose().cast<float>());
 	params_.camera = camera;
+
+	poseInverse_ = MatrixConversion::toCUDA(out.getPose().cast<float>());
+	pose_ = MatrixConversion::toCUDA(out.getPose().cast<float>().inverse());
 }
 
 void CUDARender::_preprocessColours() {
@@ -513,7 +516,7 @@ void CUDARender::_preprocessColours() {
 
 void CUDARender::_postprocessColours(ftl::rgbd::Frame &out) {
 	if (value("cool_effect", false)) {
-		auto pose = params_.m_viewMatrixInverse.getFloat3x3();
+		auto pose = poseInverse_.getFloat3x3();
 		auto col = parseCUDAColour(value("cool_effect_colour", std::string("#2222ff")));
 
 		ftl::cuda::cool_blue(
@@ -573,8 +576,8 @@ void CUDARender::_renderRight(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t) {
 	transform(0, 3) = baseline;
 	Eigen::Matrix4f matrix = out.getPose().cast<float>() * transform.inverse();
 	
-	params_.m_viewMatrix = MatrixConversion::toCUDA(matrix.inverse());
-	params_.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix);
+	pose_ = MatrixConversion::toCUDA(matrix.inverse());
+	poseInverse_ = MatrixConversion::toCUDA(matrix);
 	params_.camera = out.getRightCamera();
 	
 	out.create<GpuMat>(Channel::Right, Format<uchar4>(params_.camera.width, params_.camera.height));
@@ -656,7 +659,7 @@ void CUDARender::begin(ftl::rgbd::Frame &out) {
 	if (env_image_.empty() || !value("environment_enabled", false)) {
 		out.get<GpuMat>(Channel::Colour).setTo(background_, cvstream);
 	} else {
-		auto pose = params_.m_viewMatrixInverse.getFloat3x3();
+		auto pose = poseInverse_.getFloat3x3();
 		ftl::cuda::equirectangular_reproject(
 			env_tex_,
 			out.createTexture<uchar4>(Channel::Colour, true),
diff --git a/components/renderers/cpp/src/OLD_dibr.cu b/components/renderers/cpp/src/OLD_dibr.cu
new file mode 100644
index 0000000000000000000000000000000000000000..f8d4490f6f6a3b2b186843a8c0a36871170508a2
--- /dev/null
+++ b/components/renderers/cpp/src/OLD_dibr.cu
@@ -0,0 +1,781 @@
+#include "splat_render_cuda.hpp"
+#include "depth_camera_cuda.hpp"
+//#include <cuda_runtime.h>
+
+#include <ftl/cuda_matrix_util.hpp>
+
+#include "splat_params.hpp"
+#include "mls_cuda.hpp"
+#include <ftl/depth_camera.hpp>
+
+#define T_PER_BLOCK 8
+#define UPSAMPLE_FACTOR 1.8f
+#define WARP_SIZE 32
+#define DEPTH_THRESHOLD 0.05f
+#define UPSAMPLE_MAX 60
+#define MAX_ITERATIONS 32  // Note: Must be multiple of 32
+#define SPATIAL_SMOOTHING 0.005f
+
+using ftl::cuda::TextureObject;
+using ftl::render::Parameters;
+
+extern __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS];
+
+__global__ void clearColourKernel(TextureObject<uchar4> colour) {
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < colour.width() && y < colour.height()) {
+		//depth(x,y) = 0x7f800000; //PINF;
+		colour(x,y) = make_uchar4(76,76,82,0);
+	}
+}
+
+__device__ inline bool isStable(const float3 &previous, const float3 &estimate, const SplatParams &params, float d) {
+    const float psize = 2.0f * d / params.camera.fx;
+    //printf("PSIZE %f\n", psize);
+    return fabs(previous.x - estimate.x) <= psize &&
+        fabs(previous.y - estimate.y) <= psize &&
+        fabs(previous.z - estimate.z) <= psize;
+}
+
+// ===== PASS 1 : Gather & Upsample (Depth) ====================================
+
+/*
+ * Pass 1: Directly render raw points from all cameras, but upsample the points
+ * if their spacing is within smoothing threshold but greater than their pixel
+ * size in the original image.
+ */
+ __global__ void dibr_merge_upsample_kernel(TextureObject<int> depth, int cam, SplatParams params) {
+	const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
+
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y));
+	//const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y));
+	if (worldPos.x == MINF) return;
+    const float r = (camera.poseInverse * worldPos).z / camera.params.fx;
+
+	// Get virtual camera ray for splat centre and backface cull if possible
+	//const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f);
+	//const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin);
+	//if (dot(rayDir, normal) > 0.0f) return;
+
+    // Find the virtual screen position of current point
+	const float3 camPos = params.m_viewMatrix * worldPos;
+	if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
+	if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
+
+	// TODO: Don't upsample so much that only minimum depth makes it through
+	// Consider also using some SDF style approach to accumulate and smooth a
+	// depth value between points
+	const int upsample = min(UPSAMPLE_MAX-2, int(0.01 * params.camera.fx / camPos.z))+3;
+	const float interval = 1.0f / float(upsample / 2);
+
+            
+    // TODO:(Nick) Check depth buffer and don't do anything if already hidden?
+
+	// Each thread in warp takes an upsample point and updates corresponding depth buffer.
+	const int lane = threadIdx.x % WARP_SIZE;
+	for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
+		const float u = (i % upsample) - (upsample / 2);
+		const float v = (i / upsample) - (upsample / 2);
+
+        // Make an initial estimate of the points location
+		// Use centroid depth as estimate...?
+		const float3 point = params.m_viewMatrix * ftl::cuda::upsampled_point(camera.points, make_float2(float(x)+float(u)*interval, float(y)+float(v)*interval));
+		const float d = point.z;
+
+		const uint2 screenPos = params.camera.cameraToKinectScreen(point);
+		const unsigned int cx = screenPos.x;//+u;
+        const unsigned int cy = screenPos.y;//+v;
+		if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
+			// Transform estimated point to virtual cam space and output z
+			atomicMin(&depth(cx,cy), d * 1000.0f);
+		}
+	}
+}
+
+/*
+ * Pass 1: Directly render each camera into virtual view but with no upsampling
+ * for sparse points.
+ */
+ __global__ void dibr_merge_kernel(TextureObject<int> depth, int cam, SplatParams params) {
+	const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
+
+	const int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y));
+	if (worldPos.x == MINF) return;
+
+    // Find the virtual screen position of current point
+	const float3 camPos = params.m_viewMatrix * worldPos;
+	if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
+	if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
+
+	const float d = camPos.z;
+
+	const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
+	const unsigned int cx = screenPos.x;
+	const unsigned int cy = screenPos.y;
+	if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
+		// Transform estimated point to virtual cam space and output z
+		atomicMin(&depth(cx,cy), d * 1000.0f);
+	}
+}
+
+// ===== PASS 2 : Splat Visible Surface ========================================
+
+/*
+ * Pass 2: Determine depth buffer with enough accuracy for a visibility test in pass 2.
+ * These values are also used as the actual surface estimate during rendering so should
+ * at least be plane or sphere fitted if not MLS smoothed onto the actual surface.
+ */
+__global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, SplatParams params) {
+	const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
+
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y));
+	const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y));
+	if (worldPos.x == MINF) return;
+    const float r = (camera.poseInverse * worldPos).z / camera.params.fx;
+
+	// Get virtual camera ray for splat centre and backface cull if possible
+	//const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f);
+	//const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin);
+	//if (dot(rayDir, normal) > 0.0f) return;
+
+    // Find the virtual screen position of current point
+	const float3 camPos = params.m_viewMatrix * worldPos;
+	if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
+	if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
+	const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
+
+	const int upsample = min(UPSAMPLE_MAX, int((r) * params.camera.fx / camPos.z));
+
+	// Not on screen so stop now...
+	if (screenPos.x - upsample >= depth.width() || screenPos.y - upsample >= depth.height()) return;
+            
+    // TODO:(Nick) Check depth buffer and don't do anything if already hidden?
+
+	// Each thread in warp takes an upsample point and updates corresponding depth buffer.
+	const int lane = threadIdx.x % WARP_SIZE;
+	for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
+		const float u = (i % upsample) - (upsample / 2);
+		const float v = (i / upsample) - (upsample / 2);
+
+        // Make an initial estimate of the points location
+		// Use centroid depth as estimate...?
+        float3 nearest = ftl::cuda::screen_centroid<1>(camera.points, make_float2(screenPos.x+u, screenPos.y+v), make_int2(x,y), params, upsample);
+
+		// Use current points z as estimate
+		//float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,camPos.z);
+		
+		// Or calculate upper and lower bounds for depth and do gradient
+		// descent until the gradient change is too small or max iter is reached
+		// and depth remains within the bounds.
+		// How to find min and max depths?
+
+        //float ld = nearest.z;
+
+		// TODO: (Nick) Estimate depth using points plane, but needs better normals.
+		//float t;
+		//if (ftl::cuda::intersectPlane(normal, worldPos, rayOrigin, rayDir, t)) {
+			// Plane based estimate of surface at this pixel
+			//const float3 nearest = rayOrigin + rayDir * camPos.z;
+			float3 output;
+
+            // Use MLS of camera neighbor points to get more exact estimate
+            // Iterate until pixel is stable on the surface.
+            for (int k=0; k<MAX_ITERATIONS; ++k) {
+
+                // TODO:(Nick) Should perhaps use points from all cameras?
+                // Instead of doing each camera separately...
+                // If the depth already is close then it has already been done and can skip this point
+                if (ftl::cuda::mls_point_surface<1>(camera.points, make_int2(x,y), params.m_viewMatrixInverse * nearest, output, SPATIAL_SMOOTHING) <= 0.0f) {
+                    /*const unsigned int cx = screenPos.x;
+                    const unsigned int cy = screenPos.y;
+                    if (cx < depth.width() && cy < depth.height()) {
+                        atomicMax(&depth(cx,cy), 10000.0f);
+                    }*/
+                    break;
+                }
+            
+				//ftl::cuda::render_depth(depth, params, output);
+
+				output = params.m_viewMatrix * output;
+
+                // This is essentially the SDF function f(x), only the normal should be estimated also from the weights
+                //const float d = nearest.z + (normal.x*output.x + normal.y*output.y + normal.z*output.z);
+
+				const float d = nearest.z + copysignf(0.5f*length(output - nearest), output.z - nearest.z);
+				nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d);
+
+                const float2 sp = params.camera.cameraToKinectScreenFloat(output);
+
+                //if (isStable(nearest, output, params, d)) {
+                //if (fabs(sp.x - float(screenPos.x+u)) < 2.0f && fabs(sp.y - float(screenPos.y+v)) < 2.0f) {
+				if (length(output - nearest) <= 2.0f * params.camera.fx / camPos.z) {
+                    const unsigned int cx = screenPos.x+u;
+                    const unsigned int cy = screenPos.y+v;
+
+                    if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
+                        // Transform estimated point to virtual cam space and output z
+                        atomicMin(&depth(cx,cy), d * 1000.0f);
+                    }
+                    break;
+                }
+
+                /*if (k >= MAX_ITERATIONS-1 && length(output - nearest) <= SPATIAL_SMOOTHING) {
+                    const unsigned int cx = screenPos.x+u;
+                    const unsigned int cy = screenPos.y+v;
+                    if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
+						//atomicMin(&depth(cx,cy), d * 1000.0f);
+						printf("ERR = %f, %f\n", fabs(sp.x - float(screenPos.x+u)), fabs(sp.y - float(screenPos.y+v)));
+                    }
+                }*/
+
+                //nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d);  // ld + (d - ld)*0.8f
+                //ld = d;
+			}
+		//}
+	}
+}
+
+// ------ Alternative for pass 2: principle surfaces ---------------------------
+
+#define NEIGHBOR_RADIUS 1
+#define MAX_NEIGHBORS ((NEIGHBOR_RADIUS*2+1)*(NEIGHBOR_RADIUS*2+1))
+
+/*
+ * Pass 2: Determine depth buffer with enough accuracy for a visibility test in pass 2.
+ * These values are also used as the actual surface estimate during rendering so should
+ * at least be plane or sphere fitted if not MLS smoothed onto the actual surface.
+ */
+ __global__ void dibr_visibility_principal_kernel(TextureObject<int> depth, int cam, SplatParams params) {
+	__shared__ float3 neighborhood_cache[2*T_PER_BLOCK][MAX_NEIGHBORS];
+	__shared__ int minimum[2*T_PER_BLOCK];
+	__shared__ int maximum[2*T_PER_BLOCK];
+
+	const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
+
+	const int warp = threadIdx.x / WARP_SIZE + threadIdx.y*2;
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y));
+	//const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y));
+	if (worldPos.x == MINF) return;
+    const float r = (camera.poseInverse * worldPos).z / camera.params.fx;
+
+	// Get virtual camera ray for splat centre and backface cull if possible
+	//const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f);
+	//const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin);
+	//if (dot(rayDir, normal) > 0.0f) return;
+
+    // Find the virtual screen position of current point
+	const float3 camPos = params.m_viewMatrix * worldPos;
+	if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
+	if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
+	const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
+
+	const int upsample = min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z));
+
+	// Not on screen so stop now...
+	if (screenPos.x - upsample >= depth.width() || screenPos.y - upsample >= depth.height()) return;
+            
+	// TODO:(Nick) Check depth buffer and don't do anything if already hidden?
+	
+	// TODO:(Nick) Preload point neighbors and transform to eye
+	const int lane = threadIdx.x % WARP_SIZE;
+	if (lane == 0) {
+		minimum[warp] = 100000000;
+		maximum[warp] = -100000000;
+	}
+
+	__syncwarp();
+
+	for (int i=lane; i<MAX_NEIGHBORS; i+=WARP_SIZE) {
+		const int u = (i % (2*NEIGHBOR_RADIUS+1)) - NEIGHBOR_RADIUS;
+		const int v = (i / (2*NEIGHBOR_RADIUS+1)) - NEIGHBOR_RADIUS;
+		const float3 point = params.m_viewMatrix * make_float3(tex2D<float4>(camera.points, x+u, y+v));
+		neighborhood_cache[warp][i] = point;
+
+		if (length(point - camPos) <= 0.04f) {
+			atomicMin(&minimum[warp], point.z*1000.0f);
+			atomicMax(&maximum[warp], point.z*1000.0f);
+		}
+	}
+
+	__syncwarp();
+	
+	const float interval = (float(maximum[warp])/1000.0f - float(minimum[warp]) / 1000.0f) / float(MAX_ITERATIONS);
+	//if (y == 200) printf("interval: %f\n", interval);
+
+	// TODO:(Nick) Find min and max depths of neighbors to estimate z bounds
+
+	// Each thread in warp takes an upsample point and updates corresponding depth buffer.
+	for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
+		const float u = (i % upsample) - (upsample / 2);
+		const float v = (i / upsample) - (upsample / 2);
+
+        // Make an initial estimate of the points location
+		// Use centroid depth as estimate...?
+        //float3 nearest = ftl::cuda::screen_centroid<1>(camera.points, make_float2(screenPos.x+u, screenPos.y+v), make_int2(x,y), params, upsample);
+
+		// Use current points z as estimate
+		// TODO: Use min point as estimate
+		float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,float(minimum[warp])/1000.0f);
+		
+		// Or calculate upper and lower bounds for depth and do gradient
+		// descent until the gradient change is too small or max iter is reached
+		// and depth remains within the bounds.
+		// How to find min and max depths?
+
+		// TODO: (Nick) Estimate depth using points plane, but needs better normals.
+		//float t;
+		//if (ftl::cuda::intersectPlane(normal, worldPos, rayOrigin, rayDir, t)) {
+			// Plane based estimate of surface at this pixel
+			//const float3 nearest = rayOrigin + rayDir * camPos.z;
+
+            // Use MLS of camera neighbor points to get more exact estimate
+            // Iterate until pixel is stable on the surface.
+            for (int k=0; k<MAX_ITERATIONS; ++k) {
+
+                // TODO:(Nick) Should perhaps use points from all cameras?
+                // Instead of doing each camera separately...
+                // If the depth already is close then it has already been done and can skip this point
+				const float energy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS>(neighborhood_cache[warp], nearest, SPATIAL_SMOOTHING);
+				
+				if (energy <= 0.0f) break;
+            
+				//ftl::cuda::render_depth(depth, params, output);
+
+                // This is essentially the SDF function f(x), only the normal should be estimated also from the weights
+                //const float d = nearest.z + (normal.x*output.x + normal.y*output.y + normal.z*output.z);
+
+				const float d = nearest.z;
+				nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d+interval);
+				
+				if (energy >= 0.1f) {
+					const unsigned int cx = screenPos.x+u;
+                    const unsigned int cy = screenPos.y+v;
+					if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
+                        // Transform estimated point to virtual cam space and output z
+                        atomicMin(&depth(cx,cy), d * 1000.0f);
+					}
+					break;
+				}
+			}
+		//}
+	}
+}
+
+#define NEIGHBOR_RADIUS_2 3
+#define NEIGHBOR_WINDOW ((NEIGHBOR_RADIUS_2*2+1)*(NEIGHBOR_RADIUS_2*2+1))
+#define MAX_NEIGHBORS_2 32
+
+#define FULL_MASK 0xffffffff
+
+__device__ inline float warpMax(float e) {
+	for (int i = WARP_SIZE/2; i > 0; i /= 2) {
+		const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
+		e = max(e, other);
+	}
+	return e;
+}
+
+__device__ inline float warpMin(float e) {
+	for (int i = WARP_SIZE/2; i > 0; i /= 2) {
+		const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
+		e = min(e, other);
+	}
+	return e;
+}
+
+#define ENERGY_THRESHOLD 0.1f
+#define SMOOTHING_MULTIPLIER_A 10.0f	// For surface search
+#define SMOOTHING_MULTIPLIER_B 4.0f		// For z contribution
+#define SMOOTHING_MULTIPLIER_C 4.0f		// For colour contribution
+
+
+/*
+ * Pass 2: Determine depth buffer with enough accuracy for a visibility test in pass 2.
+ * These values are also used as the actual surface estimate during rendering so should
+ * at least be plane or sphere fitted if not MLS smoothed onto the actual surface.
+ *
+ * This version uses a previous point render as neighbour source.
+ */
+ __global__ void dibr_visibility_principal_kernel2(TextureObject<int> point_in, TextureObject<int> depth, SplatParams params) {
+	__shared__ float3 neighborhood_cache[2*T_PER_BLOCK][MAX_NEIGHBORS_2];
+	__shared__ int minimum[2*T_PER_BLOCK];
+	__shared__ int maximum[2*T_PER_BLOCK];
+	__shared__ unsigned int nidx[2*T_PER_BLOCK];
+
+	const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
+	const int warp = tid / WARP_SIZE; //threadIdx.x / WARP_SIZE + threadIdx.y*2;
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	// Starting point for surface minimum
+	float clusterBase = params.camera.m_sensorDepthWorldMin;
+
+	// Loop to a deeper surface if not on the first one selected...
+	while (clusterBase < params.camera.m_sensorDepthWorldMax) {
+
+	const int lane = tid % WARP_SIZE;
+	if (lane == 0) {
+		minimum[warp] = 100000000;
+		maximum[warp] = -100000000;
+		nidx[warp] = 0;
+	}
+
+	__syncwarp();
+
+	// Search for a valid minimum neighbour
+	// TODO: Should this really be minimum or the median of a depth cluster?
+	// cluster median seems very hard to calculate...
+	for (int i=lane; i<NEIGHBOR_WINDOW; i+=WARP_SIZE) {
+		const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
+		const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
+		const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f);
+		const float3 camPos = params.camera.kinectDepthToSkeleton(x, y, point.z);
+
+		// If it is close enough...
+		// TODO: smoothing / strength should be determined by a number of factors including:
+		//     1) Depth from original source
+		//     2) Colour contrast in underlying RGB
+		//     3) Estimated noise levels in depth values
+		if (point.z > clusterBase && point.z < params.camera.m_sensorDepthWorldMax && length(point - camPos) <= SMOOTHING_MULTIPLIER_A*(point.z / params.camera.fx)) {
+			atomicMin(&minimum[warp], point.z*1000.0f);
+		}
+	}
+
+	__syncwarp();
+
+	const float minDepth = float(minimum[warp])/1000.0f;
+	
+	// Preload valid neighbour points from within a window. A point is valid
+	// if it is within a specific distance of the minimum.
+	// Also calculate the maximum at the same time.
+	// TODO: Could here do a small search in each camera? This would allow all
+	// points to be considered, even those masked in our depth input.
+	const float3 minPos = params.camera.kinectDepthToSkeleton(x, y, minDepth);
+
+	for (int i=lane; i<NEIGHBOR_WINDOW; i+=WARP_SIZE) {
+		const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
+		const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
+		const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f);
+
+		// If it is close enough...
+		if (point.z > params.camera.m_sensorDepthWorldMin && point.z < params.camera.m_sensorDepthWorldMax && length(point - minPos) <= SMOOTHING_MULTIPLIER_A*(point.z / params.camera.fx)) {
+			// Append to neighbour list
+			//unsigned int idx = atomicInc(&nidx[warp], MAX_NEIGHBORS_2-1);
+			unsigned int idx = atomicAdd(&nidx[warp], 1);
+			if (idx >= MAX_NEIGHBORS_2) break;
+			neighborhood_cache[warp][idx] = point;
+			atomicMax(&maximum[warp], point.z*1000.0f);
+		}
+	}
+
+	__syncwarp();
+
+	const float maxDepth = float(maximum[warp])/1000.0f;
+	const float interval = (maxDepth - minDepth) / float(MAX_ITERATIONS);
+
+	if (minDepth >= params.camera.m_sensorDepthWorldMax) return;
+	if (maxDepth <= params.camera.m_sensorDepthWorldMin) return;
+	//if (y == 200) printf("interval: %f\n", maxDepth);
+
+	// If all samples say same depth, then agree and return
+	// TODO: Check this is valid, since small energies should be removed...
+	/*if (fabs(minDepth - maxDepth) < 0.0001f) {
+		if (lane == 0) {
+			const unsigned int cx = x;
+			const unsigned int cy = y;
+			if (minDepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
+				// Transform estimated point to virtual cam space and output z
+				atomicMin(&depth(cx,cy), minDepth * 1000.0f);
+			}
+		}
+		return;
+	}*/
+
+
+	float maxenergy = -1.0f;
+	float bestdepth = 0.0f;
+
+	// Search for best or threshold energy
+	for (int k=lane; k<MAX_ITERATIONS; k+=WARP_SIZE) {
+		const float3 nearest = params.camera.kinectDepthToSkeleton(x,y,minDepth+float(k)*interval);
+		const float myenergy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, min(nidx[warp], MAX_NEIGHBORS_2), SMOOTHING_MULTIPLIER_B*(nearest.z/params.camera.fx));
+		const float newenergy = warpMax(max(myenergy, maxenergy));
+		bestdepth = (myenergy == newenergy) ? nearest.z : (newenergy > maxenergy) ? 0.0f : bestdepth;
+		maxenergy = newenergy;
+	}
+
+	// If enough energy was found and this thread was the one that found the best
+	// then output the depth that this energy occured at.
+	if (bestdepth > 0.0f && maxenergy >= ENERGY_THRESHOLD) {
+		//printf("E D %f %f\n", maxenergy, bestdepth);
+		const unsigned int cx = x;
+		const unsigned int cy = y;
+		if (bestdepth > params.camera.m_sensorDepthWorldMin && bestdepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
+			// Transform estimated point to virtual cam space and output z
+			atomicMin(&depth(cx,cy), bestdepth * 1000.0f);
+			//depth(cx,cy) = bestdepth * 1000.0f;
+		}
+	}
+
+	// TODO: Could the threshold depend upon the number of points? Fewer points
+	// due to distance is incorrect since really there may not be fewer points
+	// Perhaps the best option is to make it depend on depth ... really close
+	// and really far both has lower thresholds due to point densities. Other
+	// option is smoothing factor and surface distances alter with distance to
+	// vary the number of points used ... smoothing factor could be a multiple
+	// of pixel size at given distance. Density from original source is also
+	// an influencer of smoothing factor and thresholds. Colour contrast also
+	// has a weighting influence, high contrast is high certainty in the
+	// disparity so such points should have a high influence over choice of
+	// surface location.
+	//
+	// Magnitude vs dispersion factor in the energy function ...
+	//   * Mag is certainty of surface location
+	//   * Dispersion is how far to propagate that certainty,
+	if (maxenergy >= ENERGY_THRESHOLD) return;
+
+	// Move to next possible surface...
+	clusterBase = minDepth + SMOOTHING_MULTIPLIER_B*(minDepth / params.camera.fx);
+
+	};
+}
+
+// ===== Pass 2 and 3 : Attribute contributions ================================
+
+__device__ inline float4 make_float4(const uchar4 &c) {
+    return make_float4(c.x,c.y,c.z,c.w);
+}
+
+/*
+ * Pass 2: Accumulate attribute contributions if the points pass a visibility test.
+ */
+__global__ void dibr_attribute_contrib_kernel(
+        TextureObject<int> depth_in,
+        TextureObject<float4> colour_out,
+        TextureObject<float4> normal_out,
+        TextureObject<float> contrib_out, int cam,
+        SplatParams params) {
+        
+	const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
+
+	const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
+	//const int warp = tid / WARP_SIZE;
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y));
+	//const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y));
+	if (worldPos.x == MINF) return;
+    const float r = (camera.poseInverse * worldPos).z / camera.params.fx;
+
+	const float3 camPos = params.m_viewMatrix * worldPos;
+	if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
+	if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
+	const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
+
+    const int upsample = 8; //min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z));
+
+	// Not on screen so stop now...
+	if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return;
+            
+    // Is this point near the actual surface and therefore a contributor?
+    const float d = ((float)depth_in.tex2D((int)screenPos.x, (int)screenPos.y)/1000.0f);
+    //if (abs(d - camPos.z) > DEPTH_THRESHOLD) return;
+
+    // TODO:(Nick) Should just one thread load these to shared mem?
+    const float4 colour = make_float4(tex2D<uchar4>(camera.colour, x, y));
+    const float4 normal = tex2D<float4>(camera.normal, x, y);
+
+	// Each thread in warp takes an upsample point and updates corresponding depth buffer.
+	const int lane = tid % WARP_SIZE;
+	for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
+		const float u = (i % upsample) - (upsample / 2);
+		const float v = (i / upsample) - (upsample / 2);
+
+        // Use the depth buffer to determine this pixels 3D position in camera space
+        const float d = ((float)depth_in.tex2D(screenPos.x+u, screenPos.y+v)/1000.0f);
+		const float3 nearest = params.camera.kinectDepthToSkeleton((int)(screenPos.x+u),(int)(screenPos.y+v),d);
+
+        // What is contribution of our current point at this pixel?
+        const float weight = ftl::cuda::spatialWeighting(length(nearest - camPos), SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx));
+        if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) {  // TODO: Use confidence threshold here
+            const float4 wcolour = colour * weight;
+			const float4 wnormal = normal * weight;
+			
+			//printf("Z %f\n", d);
+
+            // Add this points contribution to the pixel buffer
+            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v), wcolour.x);
+            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+1, wcolour.y);
+            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+2, wcolour.z);
+            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+3, wcolour.w);
+            atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v), wnormal.x);
+            atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+1, wnormal.y);
+            atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+2, wnormal.z);
+            atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+3, wnormal.w);
+            atomicAdd(&contrib_out(screenPos.x+u, screenPos.y+v), weight);
+        }
+	}
+}
+
+/*
+ * Pass 2: Accumulate attribute contributions if the points pass a visibility test.
+ */
+/*__global__ void dibr_attribute_contrib_kernel(
+    TextureObject<int> depth_in,
+	TextureObject<uchar4> colour_out,
+	TextureObject<float4> normal_out, int numcams, SplatParams params) {
+
+    const int i = threadIdx.y*blockDim.y + threadIdx.x;
+    const int bx = blockIdx.x*blockDim.x;
+    const int by = blockIdx.y*blockDim.y;
+    const int x = bx + threadIdx.x;
+    const int y = by + threadIdx.y;
+    
+    for (int j=0; j<numcams; ++j) {
+        const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[j];
+	
+        float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y));
+        float r = (camera.poseInverse * worldPos).z;
+        //if (ftl::cuda::mls_point_surface<3>(camera.points, make_int2(x,y), worldPos, 0.02f) < 0.001f) continue;
+        if (worldPos.x == MINF) continue;
+        
+        const float3 camPos = params.m_viewMatrix * worldPos;
+
+        // Estimate upsample factor using ratio of source depth and output depth
+
+		const int upsample = min(15, (int)(UPSAMPLE_FACTOR * (r / camPos.z))+1);
+		const float upfactor = 2.0f / (float)(upsample);
+
+        for (int v=0; v<upsample; ++v) {
+            for (int u=0; u<upsample; ++u) {
+                float3 point;
+                const ftl::cuda::fragment nearest = ftl::cuda::upsampled_point(camera.points, camera.normal, camera.colour,
+                    make_float2((float)x-1.0f+u*upfactor,(float)y-1.0f+v*upfactor));
+                //if (ftl::cuda::mls_point_surface<3>(camera.points, make_int2(x,y), nearest, point, 0.02f) < 0.001f) continue;
+                ftl::cuda::render_fragment(depth_in, normal_out, colour_out, params, nearest);
+            }
+        }
+    }
+}*/
+
+
+
+__global__ void dibr_normalise_kernel(
+        TextureObject<float4> colour_in,
+        TextureObject<uchar4> colour_out,
+        TextureObject<float4> normals,
+        TextureObject<float> contribs) {
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < colour_in.width() && y < colour_in.height()) {
+        const float4 colour = colour_in.tex2D((int)x,(int)y);
+        const float4 normal = normals.tex2D((int)x,(int)y);
+        const float contrib = contribs.tex2D((int)x,(int)y);
+
+        if (contrib > 0.0f) {
+            colour_out(x,y) = make_uchar4(colour.x / contrib, colour.y / contrib, colour.z / contrib, 0);
+            normals(x,y) = normal / contrib;
+        }
+	}
+}
+
+void ftl::cuda::dibr(const TextureObject<int> &depth_out,
+        const TextureObject<uchar4> &colour_out,
+        const TextureObject<float4> &normal_out,
+        const TextureObject<float> &confidence_out,
+		const TextureObject<float4> &tmp_colour,
+		const TextureObject<int> &tmp_depth,
+        int numcams,
+        const SplatParams &params,
+        cudaStream_t stream) {
+
+	const dim3 sgridSize((depth_out.width() + 2 - 1)/2, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 sblockSize(2*WARP_SIZE, T_PER_BLOCK);
+    const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    clearColourKernel<<<gridSize, blockSize, 0, stream>>>(colour_out);
+    ftl::cuda::clear_to_zero(confidence_out, stream);
+    ftl::cuda::clear_colour(tmp_colour, stream);
+    ftl::cuda::clear_colour(normal_out, stream);
+	
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
+
+	//int i=3;
+
+	bool noSplatting = params.m_flags & ftl::render::kNoSplatting;
+
+	// Pass 1, gather and upsample depth maps
+	if (params.m_flags & ftl::render::kNoUpsampling) {
+		for (int i=0; i<numcams; ++i)
+			dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>((noSplatting) ? depth_out : tmp_depth, i, params);
+	} else {
+		for (int i=0; i<numcams; ++i)
+			dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>((noSplatting) ? depth_out : tmp_depth, i, params);
+	}
+
+	if (noSplatting) {
+		// Pass 3, accumulate all point contributions to pixels
+		for (int i=0; i<numcams; ++i)
+        	dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, tmp_colour, normal_out, confidence_out, i, params);
+	} else {
+		// Pass 2
+		dibr_visibility_principal_kernel2<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, depth_out, params);
+
+		// Pass 3, accumulate all point contributions to pixels
+		for (int i=0; i<numcams; ++i)
+        	dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, tmp_colour, normal_out, confidence_out, i, params);
+	}
+	// Pass 2
+	//dibr_visibility_principal_kernel2<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, depth_out, params);
+
+    // Pass 2, merge a depth map from each camera.
+	//for (int i=0; i<numcams; ++i)
+    //    dibr_visibility_principal_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, i, params);
+
+    // Pass 4, normalise contributions
+    dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(tmp_colour, colour_out, normal_out, confidence_out);
+
+	cudaSafeCall( cudaGetLastError() );
+	
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
+}
+
+void ftl::cuda::dibr_raw(const TextureObject<int> &depth_out,
+    	int numcams, const SplatParams &params, cudaStream_t stream) {
+
+    const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+	
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
+
+	//dibr_depthmap_direct_kernel<<<gridSize, blockSize, 0, stream>>>(depth_out, numcams, params);
+	cudaSafeCall( cudaGetLastError() );
+	
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
+}
+
diff --git a/components/renderers/cpp/src/clipping.cu b/components/renderers/cpp/src/clipping.cu
new file mode 100644
index 0000000000000000000000000000000000000000..354376365c5334e8d4bf0671f76f7aec07c441bb
--- /dev/null
+++ b/components/renderers/cpp/src/clipping.cu
@@ -0,0 +1,34 @@
+#include <ftl/cuda/points.hpp>
+
+#define T_PER_BLOCK 8
+
+__device__ bool isClipped(const float4 &p, const ftl::cuda::ClipSpace &clip) {
+	const float3 tp = clip.origin * make_float3(p);
+	return fabs(tp.x) > clip.size.x || fabs(tp.y) > clip.size.y || fabs(tp.z) > clip.size.z;
+}
+
+__global__ void clipping_kernel(ftl::cuda::TextureObject<float> depth, ftl::rgbd::Camera camera, ftl::cuda::ClipSpace clip)
+{
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < depth.width() && y < depth.height()) {
+		float d = depth(x,y);
+		float4 p = make_float4(camera.screenToCam(x,y,d), 0.0f);
+
+		if (isClipped(p, clip)) {
+			depth(x,y) = MINF;
+		}
+	}
+}
+
+void ftl::cuda::clipping(ftl::cuda::TextureObject<float> &depth,
+		const ftl::rgbd::Camera &camera,
+		const ClipSpace &clip, cudaStream_t stream) {
+
+	const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	clipping_kernel<<<gridSize, blockSize, 0, stream>>>(depth, camera, clip);
+	cudaSafeCall( cudaGetLastError() );
+}
\ No newline at end of file
diff --git a/components/renderers/cpp/src/dibr.cu b/components/renderers/cpp/src/dibr.cu
index 66428ce3086e42b6a3e81c667ae126314d910c98..90e37ff4fcae5b94274aedb43dfb306bff03e957 100644
--- a/components/renderers/cpp/src/dibr.cu
+++ b/components/renderers/cpp/src/dibr.cu
@@ -1,781 +1,117 @@
-#include "splat_render_cuda.hpp"
-#include "depth_camera_cuda.hpp"
-//#include <cuda_runtime.h>
+#include <ftl/render/render_params.hpp>
+#include "splatter_cuda.hpp"
+#include <ftl/rgbd/camera.hpp>
+#include <ftl/cuda_common.hpp>
 
-#include <ftl/cuda_matrix_util.hpp>
-
-#include "splat_params.hpp"
-#include "mls_cuda.hpp"
-#include <ftl/depth_camera.hpp>
+#include <ftl/cuda/makers.hpp>
 
 #define T_PER_BLOCK 8
-#define UPSAMPLE_FACTOR 1.8f
-#define WARP_SIZE 32
-#define DEPTH_THRESHOLD 0.05f
-#define UPSAMPLE_MAX 60
-#define MAX_ITERATIONS 32  // Note: Must be multiple of 32
-#define SPATIAL_SMOOTHING 0.005f
 
 using ftl::cuda::TextureObject;
-using ftl::render::SplatParams;
-
-extern __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS];
-
-__global__ void clearColourKernel(TextureObject<uchar4> colour) {
-	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	if (x < colour.width() && y < colour.height()) {
-		//depth(x,y) = 0x7f800000; //PINF;
-		colour(x,y) = make_uchar4(76,76,82,0);
-	}
-}
-
-__device__ inline bool isStable(const float3 &previous, const float3 &estimate, const SplatParams &params, float d) {
-    const float psize = 2.0f * d / params.camera.fx;
-    //printf("PSIZE %f\n", psize);
-    return fabs(previous.x - estimate.x) <= psize &&
-        fabs(previous.y - estimate.y) <= psize &&
-        fabs(previous.z - estimate.z) <= psize;
-}
-
-// ===== PASS 1 : Gather & Upsample (Depth) ====================================
-
-/*
- * Pass 1: Directly render raw points from all cameras, but upsample the points
- * if their spacing is within smoothing threshold but greater than their pixel
- * size in the original image.
- */
- __global__ void dibr_merge_upsample_kernel(TextureObject<int> depth, int cam, SplatParams params) {
-	const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
-
-	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
-	const int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y));
-	//const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y));
-	if (worldPos.x == MINF) return;
-    const float r = (camera.poseInverse * worldPos).z / camera.params.fx;
-
-	// Get virtual camera ray for splat centre and backface cull if possible
-	//const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f);
-	//const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin);
-	//if (dot(rayDir, normal) > 0.0f) return;
-
-    // Find the virtual screen position of current point
-	const float3 camPos = params.m_viewMatrix * worldPos;
-	if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
-	if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
-
-	// TODO: Don't upsample so much that only minimum depth makes it through
-	// Consider also using some SDF style approach to accumulate and smooth a
-	// depth value between points
-	const int upsample = min(UPSAMPLE_MAX-2, int(0.01 * params.camera.fx / camPos.z))+3;
-	const float interval = 1.0f / float(upsample / 2);
-
-            
-    // TODO:(Nick) Check depth buffer and don't do anything if already hidden?
-
-	// Each thread in warp takes an upsample point and updates corresponding depth buffer.
-	const int lane = threadIdx.x % WARP_SIZE;
-	for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
-		const float u = (i % upsample) - (upsample / 2);
-		const float v = (i / upsample) - (upsample / 2);
-
-        // Make an initial estimate of the points location
-		// Use centroid depth as estimate...?
-		const float3 point = params.m_viewMatrix * ftl::cuda::upsampled_point(camera.points, make_float2(float(x)+float(u)*interval, float(y)+float(v)*interval));
-		const float d = point.z;
-
-		const uint2 screenPos = params.camera.cameraToKinectScreen(point);
-		const unsigned int cx = screenPos.x;//+u;
-        const unsigned int cy = screenPos.y;//+v;
-		if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
-			// Transform estimated point to virtual cam space and output z
-			atomicMin(&depth(cx,cy), d * 1000.0f);
-		}
-	}
-}
+using ftl::render::Parameters;
 
 /*
- * Pass 1: Directly render each camera into virtual view but with no upsampling
- * for sparse points.
+ * DIBR point cloud with a depth check
  */
- __global__ void dibr_merge_kernel(TextureObject<int> depth, int cam, SplatParams params) {
-	const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
-
+ __global__ void dibr_merge_kernel(TextureObject<float> depth,
+		TextureObject<int> depth_out,
+		float4x4 transform,
+		ftl::rgbd::Camera cam,
+		Parameters params) {
 	const int x = blockIdx.x*blockDim.x + threadIdx.x;
 	const int y = blockIdx.y*blockDim.y + threadIdx.y;
 
-	const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y));
-	if (worldPos.x == MINF) return;
+	const float d0 = depth.tex2D(x, y);
+	if (d0 <= cam.minDepth || d0 >= cam.maxDepth) return;
 
-    // Find the virtual screen position of current point
-	const float3 camPos = params.m_viewMatrix * worldPos;
-	if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
-	if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
+	const float3 camPos = transform * cam.screenToCam(x,y,d0);
 
 	const float d = camPos.z;
 
-	const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
+	const uint2 screenPos = params.camera.camToScreen<uint2>(camPos);
 	const unsigned int cx = screenPos.x;
 	const unsigned int cy = screenPos.y;
-	if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
+	if (d > params.camera.minDepth && d < params.camera.maxDepth && cx < depth_out.width() && cy < depth_out.height()) {
 		// Transform estimated point to virtual cam space and output z
-		atomicMin(&depth(cx,cy), d * 1000.0f);
+		atomicMin(&depth_out(cx,cy), d * 100000.0f);
 	}
 }
 
-// ===== PASS 2 : Splat Visible Surface ========================================
-
 /*
- * Pass 2: Determine depth buffer with enough accuracy for a visibility test in pass 2.
- * These values are also used as the actual surface estimate during rendering so should
- * at least be plane or sphere fitted if not MLS smoothed onto the actual surface.
- */
-__global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, SplatParams params) {
-	const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
-
-	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
-	const int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y));
-	const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y));
-	if (worldPos.x == MINF) return;
-    const float r = (camera.poseInverse * worldPos).z / camera.params.fx;
-
-	// Get virtual camera ray for splat centre and backface cull if possible
-	//const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f);
-	//const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin);
-	//if (dot(rayDir, normal) > 0.0f) return;
-
-    // Find the virtual screen position of current point
-	const float3 camPos = params.m_viewMatrix * worldPos;
-	if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
-	if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
-	const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
-
-	const int upsample = min(UPSAMPLE_MAX, int((r) * params.camera.fx / camPos.z));
-
-	// Not on screen so stop now...
-	if (screenPos.x - upsample >= depth.width() || screenPos.y - upsample >= depth.height()) return;
-            
-    // TODO:(Nick) Check depth buffer and don't do anything if already hidden?
-
-	// Each thread in warp takes an upsample point and updates corresponding depth buffer.
-	const int lane = threadIdx.x % WARP_SIZE;
-	for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
-		const float u = (i % upsample) - (upsample / 2);
-		const float v = (i / upsample) - (upsample / 2);
-
-        // Make an initial estimate of the points location
-		// Use centroid depth as estimate...?
-        float3 nearest = ftl::cuda::screen_centroid<1>(camera.points, make_float2(screenPos.x+u, screenPos.y+v), make_int2(x,y), params, upsample);
-
-		// Use current points z as estimate
-		//float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,camPos.z);
-		
-		// Or calculate upper and lower bounds for depth and do gradient
-		// descent until the gradient change is too small or max iter is reached
-		// and depth remains within the bounds.
-		// How to find min and max depths?
-
-        //float ld = nearest.z;
-
-		// TODO: (Nick) Estimate depth using points plane, but needs better normals.
-		//float t;
-		//if (ftl::cuda::intersectPlane(normal, worldPos, rayOrigin, rayDir, t)) {
-			// Plane based estimate of surface at this pixel
-			//const float3 nearest = rayOrigin + rayDir * camPos.z;
-			float3 output;
-
-            // Use MLS of camera neighbor points to get more exact estimate
-            // Iterate until pixel is stable on the surface.
-            for (int k=0; k<MAX_ITERATIONS; ++k) {
-
-                // TODO:(Nick) Should perhaps use points from all cameras?
-                // Instead of doing each camera separately...
-                // If the depth already is close then it has already been done and can skip this point
-                if (ftl::cuda::mls_point_surface<1>(camera.points, make_int2(x,y), params.m_viewMatrixInverse * nearest, output, SPATIAL_SMOOTHING) <= 0.0f) {
-                    /*const unsigned int cx = screenPos.x;
-                    const unsigned int cy = screenPos.y;
-                    if (cx < depth.width() && cy < depth.height()) {
-                        atomicMax(&depth(cx,cy), 10000.0f);
-                    }*/
-                    break;
-                }
-            
-				//ftl::cuda::render_depth(depth, params, output);
-
-				output = params.m_viewMatrix * output;
-
-                // This is essentially the SDF function f(x), only the normal should be estimated also from the weights
-                //const float d = nearest.z + (normal.x*output.x + normal.y*output.y + normal.z*output.z);
-
-				const float d = nearest.z + copysignf(0.5f*length(output - nearest), output.z - nearest.z);
-				nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d);
-
-                const float2 sp = params.camera.cameraToKinectScreenFloat(output);
-
-                //if (isStable(nearest, output, params, d)) {
-                //if (fabs(sp.x - float(screenPos.x+u)) < 2.0f && fabs(sp.y - float(screenPos.y+v)) < 2.0f) {
-				if (length(output - nearest) <= 2.0f * params.camera.fx / camPos.z) {
-                    const unsigned int cx = screenPos.x+u;
-                    const unsigned int cy = screenPos.y+v;
-
-                    if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
-                        // Transform estimated point to virtual cam space and output z
-                        atomicMin(&depth(cx,cy), d * 1000.0f);
-                    }
-                    break;
-                }
-
-                /*if (k >= MAX_ITERATIONS-1 && length(output - nearest) <= SPATIAL_SMOOTHING) {
-                    const unsigned int cx = screenPos.x+u;
-                    const unsigned int cy = screenPos.y+v;
-                    if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
-						//atomicMin(&depth(cx,cy), d * 1000.0f);
-						printf("ERR = %f, %f\n", fabs(sp.x - float(screenPos.x+u)), fabs(sp.y - float(screenPos.y+v)));
-                    }
-                }*/
-
-                //nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d);  // ld + (d - ld)*0.8f
-                //ld = d;
-			}
-		//}
-	}
-}
-
-// ------ Alternative for pass 2: principle surfaces ---------------------------
-
-#define NEIGHBOR_RADIUS 1
-#define MAX_NEIGHBORS ((NEIGHBOR_RADIUS*2+1)*(NEIGHBOR_RADIUS*2+1))
-
-/*
- * Pass 2: Determine depth buffer with enough accuracy for a visibility test in pass 2.
- * These values are also used as the actual surface estimate during rendering so should
- * at least be plane or sphere fitted if not MLS smoothed onto the actual surface.
- */
- __global__ void dibr_visibility_principal_kernel(TextureObject<int> depth, int cam, SplatParams params) {
-	__shared__ float3 neighborhood_cache[2*T_PER_BLOCK][MAX_NEIGHBORS];
-	__shared__ int minimum[2*T_PER_BLOCK];
-	__shared__ int maximum[2*T_PER_BLOCK];
-
-	const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
-
-	const int warp = threadIdx.x / WARP_SIZE + threadIdx.y*2;
-	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
+* DIBR Point cloud with a constant depth assumption
+*/
+__global__ void dibr_merge_kernel(
+		TextureObject<int> depth_out,
+		float4x4 transform,
+		ftl::rgbd::Camera cam,
+		Parameters params) {
+	const int x = blockIdx.x*blockDim.x + threadIdx.x;
 	const int y = blockIdx.y*blockDim.y + threadIdx.y;
 
-	const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y));
-	//const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y));
-	if (worldPos.x == MINF) return;
-    const float r = (camera.poseInverse * worldPos).z / camera.params.fx;
-
-	// Get virtual camera ray for splat centre and backface cull if possible
-	//const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f);
-	//const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin);
-	//if (dot(rayDir, normal) > 0.0f) return;
-
-    // Find the virtual screen position of current point
-	const float3 camPos = params.m_viewMatrix * worldPos;
-	if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
-	if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
-	const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
+	const float d0 = 1.0f;
 
-	const int upsample = min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z));
-
-	// Not on screen so stop now...
-	if (screenPos.x - upsample >= depth.width() || screenPos.y - upsample >= depth.height()) return;
-            
-	// TODO:(Nick) Check depth buffer and don't do anything if already hidden?
-	
-	// TODO:(Nick) Preload point neighbors and transform to eye
-	const int lane = threadIdx.x % WARP_SIZE;
-	if (lane == 0) {
-		minimum[warp] = 100000000;
-		maximum[warp] = -100000000;
-	}
-
-	__syncwarp();
-
-	for (int i=lane; i<MAX_NEIGHBORS; i+=WARP_SIZE) {
-		const int u = (i % (2*NEIGHBOR_RADIUS+1)) - NEIGHBOR_RADIUS;
-		const int v = (i / (2*NEIGHBOR_RADIUS+1)) - NEIGHBOR_RADIUS;
-		const float3 point = params.m_viewMatrix * make_float3(tex2D<float4>(camera.points, x+u, y+v));
-		neighborhood_cache[warp][i] = point;
-
-		if (length(point - camPos) <= 0.04f) {
-			atomicMin(&minimum[warp], point.z*1000.0f);
-			atomicMax(&maximum[warp], point.z*1000.0f);
-		}
-	}
-
-	__syncwarp();
-	
-	const float interval = (float(maximum[warp])/1000.0f - float(minimum[warp]) / 1000.0f) / float(MAX_ITERATIONS);
-	//if (y == 200) printf("interval: %f\n", interval);
-
-	// TODO:(Nick) Find min and max depths of neighbors to estimate z bounds
-
-	// Each thread in warp takes an upsample point and updates corresponding depth buffer.
-	for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
-		const float u = (i % upsample) - (upsample / 2);
-		const float v = (i / upsample) - (upsample / 2);
-
-        // Make an initial estimate of the points location
-		// Use centroid depth as estimate...?
-        //float3 nearest = ftl::cuda::screen_centroid<1>(camera.points, make_float2(screenPos.x+u, screenPos.y+v), make_int2(x,y), params, upsample);
-
-		// Use current points z as estimate
-		// TODO: Use min point as estimate
-		float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,float(minimum[warp])/1000.0f);
-		
-		// Or calculate upper and lower bounds for depth and do gradient
-		// descent until the gradient change is too small or max iter is reached
-		// and depth remains within the bounds.
-		// How to find min and max depths?
-
-		// TODO: (Nick) Estimate depth using points plane, but needs better normals.
-		//float t;
-		//if (ftl::cuda::intersectPlane(normal, worldPos, rayOrigin, rayDir, t)) {
-			// Plane based estimate of surface at this pixel
-			//const float3 nearest = rayOrigin + rayDir * camPos.z;
-
-            // Use MLS of camera neighbor points to get more exact estimate
-            // Iterate until pixel is stable on the surface.
-            for (int k=0; k<MAX_ITERATIONS; ++k) {
-
-                // TODO:(Nick) Should perhaps use points from all cameras?
-                // Instead of doing each camera separately...
-                // If the depth already is close then it has already been done and can skip this point
-				const float energy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS>(neighborhood_cache[warp], nearest, SPATIAL_SMOOTHING);
-				
-				if (energy <= 0.0f) break;
-            
-				//ftl::cuda::render_depth(depth, params, output);
-
-                // This is essentially the SDF function f(x), only the normal should be estimated also from the weights
-                //const float d = nearest.z + (normal.x*output.x + normal.y*output.y + normal.z*output.z);
-
-				const float d = nearest.z;
-				nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d+interval);
-				
-				if (energy >= 0.1f) {
-					const unsigned int cx = screenPos.x+u;
-                    const unsigned int cy = screenPos.y+v;
-					if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
-                        // Transform estimated point to virtual cam space and output z
-                        atomicMin(&depth(cx,cy), d * 1000.0f);
-					}
-					break;
-				}
-			}
-		//}
-	}
-}
-
-#define NEIGHBOR_RADIUS_2 3
-#define NEIGHBOR_WINDOW ((NEIGHBOR_RADIUS_2*2+1)*(NEIGHBOR_RADIUS_2*2+1))
-#define MAX_NEIGHBORS_2 32
-
-#define FULL_MASK 0xffffffff
-
-__device__ inline float warpMax(float e) {
-	for (int i = WARP_SIZE/2; i > 0; i /= 2) {
-		const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
-		e = max(e, other);
-	}
-	return e;
-}
+	const float3 camPos = transform * cam.screenToCam(x,y,d0);
+	const float d = camPos.z;
 
-__device__ inline float warpMin(float e) {
-	for (int i = WARP_SIZE/2; i > 0; i /= 2) {
-		const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
-		e = min(e, other);
+	const uint2 screenPos = params.camera.camToScreen<uint2>(camPos);
+	const unsigned int cx = screenPos.x;
+	const unsigned int cy = screenPos.y;
+	if (d > params.camera.minDepth && d < params.camera.maxDepth && cx < depth_out.width() && cy < depth_out.height()) {
+		// Transform estimated point to virtual cam space and output z
+		atomicMin(&depth_out(cx,cy), d * 100000.0f);
 	}
-	return e;
 }
 
-#define ENERGY_THRESHOLD 0.1f
-#define SMOOTHING_MULTIPLIER_A 10.0f	// For surface search
-#define SMOOTHING_MULTIPLIER_B 4.0f		// For z contribution
-#define SMOOTHING_MULTIPLIER_C 4.0f		// For colour contribution
-
-
-/*
- * Pass 2: Determine depth buffer with enough accuracy for a visibility test in pass 2.
- * These values are also used as the actual surface estimate during rendering so should
- * at least be plane or sphere fitted if not MLS smoothed onto the actual surface.
- *
- * This version uses a previous point render as neighbour source.
- */
- __global__ void dibr_visibility_principal_kernel2(TextureObject<int> point_in, TextureObject<int> depth, SplatParams params) {
-	__shared__ float3 neighborhood_cache[2*T_PER_BLOCK][MAX_NEIGHBORS_2];
-	__shared__ int minimum[2*T_PER_BLOCK];
-	__shared__ int maximum[2*T_PER_BLOCK];
-	__shared__ unsigned int nidx[2*T_PER_BLOCK];
-
-	const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
-	const int warp = tid / WARP_SIZE; //threadIdx.x / WARP_SIZE + threadIdx.y*2;
-	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
-	const int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	// Starting point for surface minimum
-	float clusterBase = params.camera.m_sensorDepthWorldMin;
-
-	// Loop to a deeper surface if not on the first one selected...
-	while (clusterBase < params.camera.m_sensorDepthWorldMax) {
-
-	const int lane = tid % WARP_SIZE;
-	if (lane == 0) {
-		minimum[warp] = 100000000;
-		maximum[warp] = -100000000;
-		nidx[warp] = 0;
-	}
-
-	__syncwarp();
-
-	// Search for a valid minimum neighbour
-	// TODO: Should this really be minimum or the median of a depth cluster?
-	// cluster median seems very hard to calculate...
-	for (int i=lane; i<NEIGHBOR_WINDOW; i+=WARP_SIZE) {
-		const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
-		const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
-		const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f);
-		const float3 camPos = params.camera.kinectDepthToSkeleton(x, y, point.z);
-
-		// If it is close enough...
-		// TODO: smoothing / strength should be determined by a number of factors including:
-		//     1) Depth from original source
-		//     2) Colour contrast in underlying RGB
-		//     3) Estimated noise levels in depth values
-		if (point.z > clusterBase && point.z < params.camera.m_sensorDepthWorldMax && length(point - camPos) <= SMOOTHING_MULTIPLIER_A*(point.z / params.camera.fx)) {
-			atomicMin(&minimum[warp], point.z*1000.0f);
-		}
-	}
-
-	__syncwarp();
-
-	const float minDepth = float(minimum[warp])/1000.0f;
-	
-	// Preload valid neighbour points from within a window. A point is valid
-	// if it is within a specific distance of the minimum.
-	// Also calculate the maximum at the same time.
-	// TODO: Could here do a small search in each camera? This would allow all
-	// points to be considered, even those masked in our depth input.
-	const float3 minPos = params.camera.kinectDepthToSkeleton(x, y, minDepth);
 
-	for (int i=lane; i<NEIGHBOR_WINDOW; i+=WARP_SIZE) {
-		const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
-		const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
-		const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f);
-
-		// If it is close enough...
-		if (point.z > params.camera.m_sensorDepthWorldMin && point.z < params.camera.m_sensorDepthWorldMax && length(point - minPos) <= SMOOTHING_MULTIPLIER_A*(point.z / params.camera.fx)) {
-			// Append to neighbour list
-			//unsigned int idx = atomicInc(&nidx[warp], MAX_NEIGHBORS_2-1);
-			unsigned int idx = atomicAdd(&nidx[warp], 1);
-			if (idx >= MAX_NEIGHBORS_2) break;
-			neighborhood_cache[warp][idx] = point;
-			atomicMax(&maximum[warp], point.z*1000.0f);
-		}
-	}
-
-	__syncwarp();
-
-	const float maxDepth = float(maximum[warp])/1000.0f;
-	const float interval = (maxDepth - minDepth) / float(MAX_ITERATIONS);
-
-	if (minDepth >= params.camera.m_sensorDepthWorldMax) return;
-	if (maxDepth <= params.camera.m_sensorDepthWorldMin) return;
-	//if (y == 200) printf("interval: %f\n", maxDepth);
-
-	// If all samples say same depth, then agree and return
-	// TODO: Check this is valid, since small energies should be removed...
-	/*if (fabs(minDepth - maxDepth) < 0.0001f) {
-		if (lane == 0) {
-			const unsigned int cx = x;
-			const unsigned int cy = y;
-			if (minDepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
-				// Transform estimated point to virtual cam space and output z
-				atomicMin(&depth(cx,cy), minDepth * 1000.0f);
-			}
-		}
-		return;
-	}*/
-
-
-	float maxenergy = -1.0f;
-	float bestdepth = 0.0f;
-
-	// Search for best or threshold energy
-	for (int k=lane; k<MAX_ITERATIONS; k+=WARP_SIZE) {
-		const float3 nearest = params.camera.kinectDepthToSkeleton(x,y,minDepth+float(k)*interval);
-		const float myenergy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, min(nidx[warp], MAX_NEIGHBORS_2), SMOOTHING_MULTIPLIER_B*(nearest.z/params.camera.fx));
-		const float newenergy = warpMax(max(myenergy, maxenergy));
-		bestdepth = (myenergy == newenergy) ? nearest.z : (newenergy > maxenergy) ? 0.0f : bestdepth;
-		maxenergy = newenergy;
-	}
-
-	// If enough energy was found and this thread was the one that found the best
-	// then output the depth that this energy occured at.
-	if (bestdepth > 0.0f && maxenergy >= ENERGY_THRESHOLD) {
-		//printf("E D %f %f\n", maxenergy, bestdepth);
-		const unsigned int cx = x;
-		const unsigned int cy = y;
-		if (bestdepth > params.camera.m_sensorDepthWorldMin && bestdepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
-			// Transform estimated point to virtual cam space and output z
-			atomicMin(&depth(cx,cy), bestdepth * 1000.0f);
-			//depth(cx,cy) = bestdepth * 1000.0f;
-		}
-	}
-
-	// TODO: Could the threshold depend upon the number of points? Fewer points
-	// due to distance is incorrect since really there may not be fewer points
-	// Perhaps the best option is to make it depend on depth ... really close
-	// and really far both has lower thresholds due to point densities. Other
-	// option is smoothing factor and surface distances alter with distance to
-	// vary the number of points used ... smoothing factor could be a multiple
-	// of pixel size at given distance. Density from original source is also
-	// an influencer of smoothing factor and thresholds. Colour contrast also
-	// has a weighting influence, high contrast is high certainty in the
-	// disparity so such points should have a high influence over choice of
-	// surface location.
-	//
-	// Magnitude vs dispersion factor in the energy function ...
-	//   * Mag is certainty of surface location
-	//   * Dispersion is how far to propagate that certainty,
-	if (maxenergy >= ENERGY_THRESHOLD) return;
-
-	// Move to next possible surface...
-	clusterBase = minDepth + SMOOTHING_MULTIPLIER_B*(minDepth / params.camera.fx);
-
-	};
-}
-
-// ===== Pass 2 and 3 : Attribute contributions ================================
+void ftl::cuda::dibr_merge(TextureObject<float> &depth, TextureObject<int> &depth_out, const float4x4 &transform, const ftl::rgbd::Camera &cam, Parameters params, cudaStream_t stream) {
+    const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
-__device__ inline float4 make_float4(const uchar4 &c) {
-    return make_float4(c.x,c.y,c.z,c.w);
+	dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>(depth, depth_out, transform, cam, params);
+    cudaSafeCall( cudaGetLastError() );
 }
 
-/*
- * Pass 2: Accumulate attribute contributions if the points pass a visibility test.
- */
-__global__ void dibr_attribute_contrib_kernel(
-        TextureObject<int> depth_in,
-        TextureObject<float4> colour_out,
-        TextureObject<float4> normal_out,
-        TextureObject<float> contrib_out, int cam,
-        SplatParams params) {
-        
-	const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
-
-	const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
-	//const int warp = tid / WARP_SIZE;
-	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
-	const int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-	const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y));
-	//const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y));
-	if (worldPos.x == MINF) return;
-    const float r = (camera.poseInverse * worldPos).z / camera.params.fx;
-
-	const float3 camPos = params.m_viewMatrix * worldPos;
-	if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
-	if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
-	const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
-
-    const int upsample = 8; //min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z));
-
-	// Not on screen so stop now...
-	if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return;
-            
-    // Is this point near the actual surface and therefore a contributor?
-    const float d = ((float)depth_in.tex2D((int)screenPos.x, (int)screenPos.y)/1000.0f);
-    //if (abs(d - camPos.z) > DEPTH_THRESHOLD) return;
-
-    // TODO:(Nick) Should just one thread load these to shared mem?
-    const float4 colour = make_float4(tex2D<uchar4>(camera.colour, x, y));
-    const float4 normal = tex2D<float4>(camera.normal, x, y);
-
-	// Each thread in warp takes an upsample point and updates corresponding depth buffer.
-	const int lane = tid % WARP_SIZE;
-	for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
-		const float u = (i % upsample) - (upsample / 2);
-		const float v = (i / upsample) - (upsample / 2);
-
-        // Use the depth buffer to determine this pixels 3D position in camera space
-        const float d = ((float)depth_in.tex2D(screenPos.x+u, screenPos.y+v)/1000.0f);
-		const float3 nearest = params.camera.kinectDepthToSkeleton((int)(screenPos.x+u),(int)(screenPos.y+v),d);
-
-        // What is contribution of our current point at this pixel?
-        const float weight = ftl::cuda::spatialWeighting(length(nearest - camPos), SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx));
-        if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) {  // TODO: Use confidence threshold here
-            const float4 wcolour = colour * weight;
-			const float4 wnormal = normal * weight;
-			
-			//printf("Z %f\n", d);
+void ftl::cuda::dibr_merge(TextureObject<int> &depth_out, const float4x4 &transform, const ftl::rgbd::Camera &cam, Parameters params, cudaStream_t stream) {
+    const dim3 gridSize((cam.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (cam.height + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
-            // Add this points contribution to the pixel buffer
-            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v), wcolour.x);
-            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+1, wcolour.y);
-            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+2, wcolour.z);
-            atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+3, wcolour.w);
-            atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v), wnormal.x);
-            atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+1, wnormal.y);
-            atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+2, wnormal.z);
-            atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+3, wnormal.w);
-            atomicAdd(&contrib_out(screenPos.x+u, screenPos.y+v), weight);
-        }
-	}
+	dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>(depth_out, transform, cam, params);
+    cudaSafeCall( cudaGetLastError() );
 }
 
-/*
- * Pass 2: Accumulate attribute contributions if the points pass a visibility test.
- */
-/*__global__ void dibr_attribute_contrib_kernel(
-    TextureObject<int> depth_in,
-	TextureObject<uchar4> colour_out,
-	TextureObject<float4> normal_out, int numcams, SplatParams params) {
-
-    const int i = threadIdx.y*blockDim.y + threadIdx.x;
-    const int bx = blockIdx.x*blockDim.x;
-    const int by = blockIdx.y*blockDim.y;
-    const int x = bx + threadIdx.x;
-    const int y = by + threadIdx.y;
-    
-    for (int j=0; j<numcams; ++j) {
-        const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[j];
-	
-        float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y));
-        float r = (camera.poseInverse * worldPos).z;
-        //if (ftl::cuda::mls_point_surface<3>(camera.points, make_int2(x,y), worldPos, 0.02f) < 0.001f) continue;
-        if (worldPos.x == MINF) continue;
-        
-        const float3 camPos = params.m_viewMatrix * worldPos;
-
-        // Estimate upsample factor using ratio of source depth and output depth
-
-		const int upsample = min(15, (int)(UPSAMPLE_FACTOR * (r / camPos.z))+1);
-		const float upfactor = 2.0f / (float)(upsample);
-
-        for (int v=0; v<upsample; ++v) {
-            for (int u=0; u<upsample; ++u) {
-                float3 point;
-                const ftl::cuda::fragment nearest = ftl::cuda::upsampled_point(camera.points, camera.normal, camera.colour,
-                    make_float2((float)x-1.0f+u*upfactor,(float)y-1.0f+v*upfactor));
-                //if (ftl::cuda::mls_point_surface<3>(camera.points, make_int2(x,y), nearest, point, 0.02f) < 0.001f) continue;
-                ftl::cuda::render_fragment(depth_in, normal_out, colour_out, params, nearest);
-            }
-        }
-    }
-}*/
-
-
+// ==== Normalize ==============================================================
 
+template <typename A, typename B>
 __global__ void dibr_normalise_kernel(
-        TextureObject<float4> colour_in,
-        TextureObject<uchar4> colour_out,
-        TextureObject<float4> normals,
+        TextureObject<A> in,
+        TextureObject<B> out,
         TextureObject<float> contribs) {
-	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x < in.width() && y < in.height()) {
+		const float contrib = contribs.tex2D((int)x,(int)y);
+        const A a = in.tex2D((int)x,(int)y);
+        //const float4 normal = normals.tex2D((int)x,(int)y);
 
-	if (x < colour_in.width() && y < colour_in.height()) {
-        const float4 colour = colour_in.tex2D((int)x,(int)y);
-        const float4 normal = normals.tex2D((int)x,(int)y);
-        const float contrib = contribs.tex2D((int)x,(int)y);
+		//out(x,y) = (contrib == 0.0f) ? make<B>(a) : make<B>(a / contrib);
 
         if (contrib > 0.0f) {
-            colour_out(x,y) = make_uchar4(colour.x / contrib, colour.y / contrib, colour.z / contrib, 0);
-            normals(x,y) = normal / contrib;
+            out(x,y) = make<B>(a / contrib);
+            //normals(x,y) = normal / contrib;
         }
-	}
-}
-
-void ftl::cuda::dibr(const TextureObject<int> &depth_out,
-        const TextureObject<uchar4> &colour_out,
-        const TextureObject<float4> &normal_out,
-        const TextureObject<float> &confidence_out,
-		const TextureObject<float4> &tmp_colour,
-		const TextureObject<int> &tmp_depth,
-        int numcams,
-        const SplatParams &params,
-        cudaStream_t stream) {
-
-	const dim3 sgridSize((depth_out.width() + 2 - 1)/2, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 sblockSize(2*WARP_SIZE, T_PER_BLOCK);
-    const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-    clearColourKernel<<<gridSize, blockSize, 0, stream>>>(colour_out);
-    ftl::cuda::clear_to_zero(confidence_out, stream);
-    ftl::cuda::clear_colour(tmp_colour, stream);
-    ftl::cuda::clear_colour(normal_out, stream);
-	
-#ifdef _DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-#endif
-
-	//int i=3;
-
-	bool noSplatting = params.m_flags & ftl::render::kNoSplatting;
-
-	// Pass 1, gather and upsample depth maps
-	if (params.m_flags & ftl::render::kNoUpsampling) {
-		for (int i=0; i<numcams; ++i)
-			dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>((noSplatting) ? depth_out : tmp_depth, i, params);
-	} else {
-		for (int i=0; i<numcams; ++i)
-			dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>((noSplatting) ? depth_out : tmp_depth, i, params);
-	}
-
-	if (noSplatting) {
-		// Pass 3, accumulate all point contributions to pixels
-		for (int i=0; i<numcams; ++i)
-        	dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, tmp_colour, normal_out, confidence_out, i, params);
-	} else {
-		// Pass 2
-		dibr_visibility_principal_kernel2<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, depth_out, params);
-
-		// Pass 3, accumulate all point contributions to pixels
-		for (int i=0; i<numcams; ++i)
-        	dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, tmp_colour, normal_out, confidence_out, i, params);
-	}
-	// Pass 2
-	//dibr_visibility_principal_kernel2<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, depth_out, params);
-
-    // Pass 2, merge a depth map from each camera.
-	//for (int i=0; i<numcams; ++i)
-    //    dibr_visibility_principal_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, i, params);
-
-    // Pass 4, normalise contributions
-    dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(tmp_colour, colour_out, normal_out, confidence_out);
-
-	cudaSafeCall( cudaGetLastError() );
-	
-#ifdef _DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-#endif
+    }
 }
 
-void ftl::cuda::dibr_raw(const TextureObject<int> &depth_out,
-    	int numcams, const SplatParams &params, cudaStream_t stream) {
-
-    const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+template <typename A, typename B>
+void ftl::cuda::dibr_normalise(TextureObject<A> &in, TextureObject<B> &out, TextureObject<float> &contribs, cudaStream_t stream) {
+    const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
     const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-	
-#ifdef _DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-#endif
 
-	//dibr_depthmap_direct_kernel<<<gridSize, blockSize, 0, stream>>>(depth_out, numcams, params);
-	cudaSafeCall( cudaGetLastError() );
-	
-#ifdef _DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-#endif
+    dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(in, out, contribs);
+    cudaSafeCall( cudaGetLastError() );
 }
 
+template void ftl::cuda::dibr_normalise<float4,uchar4>(TextureObject<float4> &in, TextureObject<uchar4> &out, TextureObject<float> &contribs, cudaStream_t stream);
+template void ftl::cuda::dibr_normalise<float,float>(TextureObject<float> &in, TextureObject<float> &out, TextureObject<float> &contribs, cudaStream_t stream);
+template void ftl::cuda::dibr_normalise<float4,float4>(TextureObject<float4> &in, TextureObject<float4> &out, TextureObject<float> &contribs, cudaStream_t stream);
diff --git a/components/renderers/cpp/src/mask.cu b/components/renderers/cpp/src/mask.cu
index 6e9621c8064359021de616c8662f3b4227ba1806..1adb4fb75c4c25d4ee70e8d1f378a43702142745 100644
--- a/components/renderers/cpp/src/mask.cu
+++ b/components/renderers/cpp/src/mask.cu
@@ -3,7 +3,6 @@
 
 using ftl::cuda::TextureObject;
 using ftl::cuda::Mask;
-using ftl::render::SplatParams;
 
 #define T_PER_BLOCK 16
 
diff --git a/components/renderers/cpp/src/reprojection.cu b/components/renderers/cpp/src/reprojection.cu
index 79b01ff534cf36d26c4d58c3e15d3b32fbd95020..808501a4ad66406e284aa7030a939e1e24038c85 100644
--- a/components/renderers/cpp/src/reprojection.cu
+++ b/components/renderers/cpp/src/reprojection.cu
@@ -1,4 +1,4 @@
-#include <ftl/render/splat_params.hpp>
+#include <ftl/render/render_params.hpp>
 #include "splatter_cuda.hpp"
 #include <ftl/rgbd/camera.hpp>
 #include <ftl/cuda_common.hpp>
@@ -10,8 +10,9 @@
 #define ACCUM_DIAMETER 8
 
 using ftl::cuda::TextureObject;
-using ftl::render::SplatParams;
+using ftl::render::Parameters;
 using ftl::rgbd::Camera;
+using ftl::render::ViewPortMode;
 
 /*template <typename T>
 __device__ inline T generateInput(const T &in, const SplatParams &params, const float4 &worldPos) {
@@ -54,10 +55,20 @@ __device__ inline void accumulateOutput(TextureObject<float4> &out, TextureObjec
 	atomicAdd(&contrib(pos.x, pos.y), w);
 } 
 
+template <ViewPortMode VPMODE>
+__device__ inline uint2 convertScreen(const Parameters &params, int x, int y) {
+	return make_uint2(x,y);
+}
+
+template <>
+__device__ inline uint2 convertScreen<ViewPortMode::Warping>(const Parameters &params, int x, int y) {
+	return params.viewport.reverseMap(params.camera, make_float2(x,y));
+}
+
 /*
- * Pass 2: Accumulate attribute contributions if the points pass a visibility test.
+ * Full reprojection with normals and depth
  */
- template <typename A, typename B>
+ template <typename A, typename B, ViewPortMode VPMODE>
 __global__ void reprojection_kernel(
         TextureObject<A> in,				// Attribute input
         TextureObject<float> depth_src,
@@ -65,7 +76,7 @@ __global__ void reprojection_kernel(
 		TextureObject<float4> normals,
 		TextureObject<B> out,			// Accumulated output
 		TextureObject<float> contrib,
-		SplatParams params,
+		Parameters params,
 		Camera camera, float4x4 transform, float3x3 transformR) {
         
 	const int x = (blockIdx.x*blockDim.x + threadIdx.x);
@@ -77,7 +88,9 @@ __global__ void reprojection_kernel(
 	//const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d);
 	//if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return;
 
-	const float3 camPos = transform * params.camera.screenToCam(x, y, d);
+	//const uint2 rpt = make_uint2(x,y);
+	const uint2 rpt = convertScreen<VPMODE>(params, x, y);
+	const float3 camPos = transform * params.camera.screenToCam(rpt.x, rpt.y, d);
 	if (camPos.z < camera.minDepth) return;
 	if (camPos.z > camera.maxDepth) return;
 	const float2 screenPos = camera.camToScreen<float2>(camPos);
@@ -101,6 +114,7 @@ __global__ void reprojection_kernel(
 
 	// TODO: Z checks need to interpolate between neighbors if large triangles are used
 	//float weight = ftl::cuda::weighting(fabs(camPos.z - d2), params.depthThreshold);
+	// TODO: Depth threshold should relate to pixel size
 	float weight = (fabs(camPos.z - d2) <= params.depthThreshold) ? 1.0f : 0.0f;
 
 	/* Buehler C. et al. 2001. Unstructured Lumigraph Rendering. */
@@ -128,24 +142,18 @@ void ftl::cuda::reproject(
 		TextureObject<float4> &normals,
 		TextureObject<B> &out,   // Accumulated output
 		TextureObject<float> &contrib,
-		const SplatParams &params,
-		const Camera &camera, const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream) {
+		const Parameters &params,
+		const Camera &camera, const float4x4 &transform, const float3x3 &transformR,
+		cudaStream_t stream) {
 	const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
 	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
-    reprojection_kernel<<<gridSize, blockSize, 0, stream>>>(
-        in,
-        depth_src,
-		depth_in,
-		normals,
-		out,
-		contrib,
-		params,
-		camera,
-		transform,
-		transformR
-    );
-    cudaSafeCall( cudaGetLastError() );
+	switch (params.viewPortMode) {
+    case ViewPortMode::Disabled: reprojection_kernel<A,B,ViewPortMode::Disabled><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, normals, out, contrib, params, camera, transform, transformR); break;
+	case ViewPortMode::Clipping: reprojection_kernel<A,B,ViewPortMode::Clipping><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, normals, out, contrib, params, camera, transform, transformR); break;
+	case ViewPortMode::Warping: reprojection_kernel<A,B,ViewPortMode::Warping><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, normals, out, contrib, params, camera, transform, transformR); break;
+	}
+	cudaSafeCall( cudaGetLastError() );
 }
 
 template void ftl::cuda::reproject(
@@ -155,9 +163,10 @@ template void ftl::cuda::reproject(
 	ftl::cuda::TextureObject<float4> &normals,
 	ftl::cuda::TextureObject<float4> &out,	// Accumulated output
 	ftl::cuda::TextureObject<float> &contrib,
-	const ftl::render::SplatParams &params,
+	const ftl::render::Parameters &params,
 	const ftl::rgbd::Camera &camera,
-	const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream);
+	const float4x4 &transform, const float3x3 &transformR,
+	cudaStream_t stream);
 
 template void ftl::cuda::reproject(
 		ftl::cuda::TextureObject<float> &in,	// Original colour image
@@ -166,9 +175,10 @@ template void ftl::cuda::reproject(
 		ftl::cuda::TextureObject<float4> &normals,
 		ftl::cuda::TextureObject<float> &out,	// Accumulated output
 		ftl::cuda::TextureObject<float> &contrib,
-		const ftl::render::SplatParams &params,
+		const ftl::render::Parameters &params,
 		const ftl::rgbd::Camera &camera,
-		const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream);
+		const float4x4 &transform, const float3x3 &transformR,
+		cudaStream_t stream);
 
 template void ftl::cuda::reproject(
 		ftl::cuda::TextureObject<float4> &in,	// Original colour image
@@ -177,9 +187,10 @@ template void ftl::cuda::reproject(
 		ftl::cuda::TextureObject<float4> &normals,
 		ftl::cuda::TextureObject<float4> &out,	// Accumulated output
 		ftl::cuda::TextureObject<float> &contrib,
-		const ftl::render::SplatParams &params,
+		const ftl::render::Parameters &params,
 		const ftl::rgbd::Camera &camera,
-		const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream);
+		const float4x4 &transform, const float3x3 &transformR,
+		cudaStream_t stream);
 
 //==============================================================================
 //  Without normals
@@ -195,7 +206,7 @@ __global__ void reprojection_kernel(
 		TextureObject<float> depth_in,        // Virtual depth map
 		TextureObject<B> out,			// Accumulated output
 		TextureObject<float> contrib,
-		SplatParams params,
+		Parameters params,
 		Camera camera, float4x4 poseInv) {
         
 	const int x = (blockIdx.x*blockDim.x + threadIdx.x);
@@ -238,7 +249,7 @@ void ftl::cuda::reproject(
 		TextureObject<float> &depth_in,        // Virtual depth map
 		TextureObject<B> &out,   // Accumulated output
 		TextureObject<float> &contrib,
-		const SplatParams &params,
+		const Parameters &params,
 		const Camera &camera, const float4x4 &poseInv, cudaStream_t stream) {
 	const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
 	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
@@ -262,7 +273,7 @@ template void ftl::cuda::reproject(
 	ftl::cuda::TextureObject<float> &depth_in,		// Virtual depth map
 	ftl::cuda::TextureObject<float4> &out,	// Accumulated output
 	ftl::cuda::TextureObject<float> &contrib,
-	const ftl::render::SplatParams &params,
+	const ftl::render::Parameters &params,
 	const ftl::rgbd::Camera &camera,
 	const float4x4 &poseInv, cudaStream_t stream);
 
@@ -272,7 +283,7 @@ template void ftl::cuda::reproject(
 		ftl::cuda::TextureObject<float> &depth_in,		// Virtual depth map
 		ftl::cuda::TextureObject<float> &out,	// Accumulated output
 		ftl::cuda::TextureObject<float> &contrib,
-		const ftl::render::SplatParams &params,
+		const ftl::render::Parameters &params,
 		const ftl::rgbd::Camera &camera,
 		const float4x4 &poseInv, cudaStream_t stream);
 
@@ -282,7 +293,7 @@ template void ftl::cuda::reproject(
 		ftl::cuda::TextureObject<float> &depth_in,		// Virtual depth map
 		ftl::cuda::TextureObject<float4> &out,	// Accumulated output
 		ftl::cuda::TextureObject<float> &contrib,
-		const ftl::render::SplatParams &params,
+		const ftl::render::Parameters &params,
 		const ftl::rgbd::Camera &camera,
 		const float4x4 &poseInv, cudaStream_t stream);
 
@@ -299,7 +310,7 @@ __global__ void reprojection_kernel(
 		TextureObject<float> depth_in,        // Virtual depth map
 		TextureObject<B> out,			// Accumulated output
 		TextureObject<float> contrib,
-		SplatParams params,
+		Parameters params,
 		Camera camera, float4x4 poseInv) {
         
 	const int x = (blockIdx.x*blockDim.x + threadIdx.x);
@@ -339,7 +350,7 @@ void ftl::cuda::reproject(
 		TextureObject<float> &depth_in,        // Virtual depth map
 		TextureObject<B> &out,   // Accumulated output
 		TextureObject<float> &contrib,
-		const SplatParams &params,
+		const Parameters &params,
 		const Camera &camera, const float4x4 &poseInv, cudaStream_t stream) {
 	const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
 	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
@@ -361,7 +372,7 @@ template void ftl::cuda::reproject(
 	ftl::cuda::TextureObject<float> &depth_in,		// Virtual depth map
 	ftl::cuda::TextureObject<float4> &out,	// Accumulated output
 	ftl::cuda::TextureObject<float> &contrib,
-	const ftl::render::SplatParams &params,
+	const ftl::render::Parameters &params,
 	const ftl::rgbd::Camera &camera,
 	const float4x4 &poseInv, cudaStream_t stream);
 
@@ -370,7 +381,7 @@ template void ftl::cuda::reproject(
 		ftl::cuda::TextureObject<float> &depth_in,		// Virtual depth map
 		ftl::cuda::TextureObject<float> &out,	// Accumulated output
 		ftl::cuda::TextureObject<float> &contrib,
-		const ftl::render::SplatParams &params,
+		const ftl::render::Parameters &params,
 		const ftl::rgbd::Camera &camera,
 		const float4x4 &poseInv, cudaStream_t stream);
 
@@ -379,7 +390,7 @@ template void ftl::cuda::reproject(
 		ftl::cuda::TextureObject<float> &depth_in,		// Virtual depth map
 		ftl::cuda::TextureObject<float4> &out,	// Accumulated output
 		ftl::cuda::TextureObject<float> &contrib,
-		const ftl::render::SplatParams &params,
+		const ftl::render::Parameters &params,
 		const ftl::rgbd::Camera &camera,
 		const float4x4 &poseInv, cudaStream_t stream);
 
@@ -484,3 +495,38 @@ void ftl::cuda::fix_bad_colour(
 	fix_colour_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth, out, contribs, bad_colour, cam);
 	cudaSafeCall( cudaGetLastError() );
 }
+
+// ===== Show bad colour normalise =============================================
+
+__global__ void show_missing_colour_kernel(
+		TextureObject<float> depth,
+		TextureObject<uchar4> out,
+		TextureObject<float> contribs,
+		uchar4 bad_colour,
+		ftl::rgbd::Camera cam) {
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < out.width() && y < out.height()) {
+		const float contrib = contribs.tex2D((int)x,(int)y);
+		const float d = depth.tex2D(x,y);
+
+		if (contrib < 0.0000001f && d > cam.minDepth && d < cam.maxDepth) {
+			out(x,y) = bad_colour;
+		}
+	}
+}
+
+void ftl::cuda::show_missing_colour(
+		TextureObject<float> &depth,
+		TextureObject<uchar4> &out,
+		TextureObject<float> &contribs,
+		uchar4 bad_colour,
+		const ftl::rgbd::Camera &cam,
+		cudaStream_t stream) {
+	const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	show_missing_colour_kernel<<<gridSize, blockSize, 0, stream>>>(depth, out, contribs, bad_colour, cam);
+	cudaSafeCall( cudaGetLastError() );
+}
diff --git a/components/renderers/cpp/src/screen.cu b/components/renderers/cpp/src/screen.cu
index f12f255f0b9a3fbdd5f418864567f5688e489ba2..e9cd06f4145c7cb2fd4947387f711af376c68912 100644
--- a/components/renderers/cpp/src/screen.cu
+++ b/components/renderers/cpp/src/screen.cu
@@ -1,51 +1,76 @@
-#include <ftl/render/splat_params.hpp>
+#include <ftl/render/render_params.hpp>
 #include "splatter_cuda.hpp"
 #include <ftl/rgbd/camera.hpp>
 #include <ftl/cuda_common.hpp>
 
 using ftl::rgbd::Camera;
 using ftl::cuda::TextureObject;
-using ftl::render::SplatParams;
+using ftl::render::Parameters;
+using ftl::render::ViewPortMode;
 
 #define T_PER_BLOCK 8
 
+template <ViewPortMode VPMODE>
+__device__ inline uint2 convertToScreen(const Parameters &params, const float3 &camPos);
+
+template <>
+__device__ inline uint2 convertToScreen<ViewPortMode::Disabled>(const Parameters &params, const float3 &camPos) {
+	return  params.camera.camToScreen<uint2>(camPos);
+}
+
+template <>
+__device__ inline uint2 convertToScreen<ViewPortMode::Clipping>(const Parameters &params, const float3 &camPos) {
+	const uint2 r = params.camera.camToScreen<uint2>(camPos);
+	return (params.viewport.inside(r.x, r.y)) ? r : make_uint2(30000, 30000);
+}
+
+template <>
+__device__ inline uint2 convertToScreen<ViewPortMode::Warping>(const Parameters &params, const float3 &camPos) {
+	return params.viewport.map(params.camera, params.camera.camToScreen<float2>(camPos));
+}
+
 /*
  * Convert source screen position to output screen coordinates.
  */
+ template <ftl::render::ViewPortMode VPMODE>
  __global__ void screen_coord_kernel(TextureObject<float> depth,
         TextureObject<float> depth_out,
-		TextureObject<short2> screen_out, Camera vcamera, float4x4 pose, Camera camera) {
+		TextureObject<short2> screen_out, Parameters params, float4x4 pose, Camera camera) {
 	const int x = blockIdx.x*blockDim.x + threadIdx.x;
 	const int y = blockIdx.y*blockDim.y + threadIdx.y;
 
 	if (x >= 0 && y >= 0 && x < depth.width() && y < depth.height()) {
 		uint2 screenPos = make_uint2(30000,30000);
-		//screen_out(x,y) = make_short2(screenPos.x, screenPos.y);
 
 		const float d = depth.tex2D(x, y);
-		//const float3 worldPos = pose * camera.screenToCam(x,y,d);
-		//if (d < camera.minDepth || d > camera.maxDepth) return;
 
 		// Find the virtual screen position of current point
-		const float3 camPos =  (d >= camera.minDepth && d <= camera.maxDepth) ? pose * camera.screenToCam(x,y,d) : make_float3(0.0f,0.0f,0.0f); // worldPos;  // params.m_viewMatrix *
-		screenPos = vcamera.camToScreen<uint2>(camPos);
-
-		if (	camPos.z < vcamera.minDepth ||
-				camPos.z > vcamera.maxDepth ||
-				screenPos.x >= vcamera.width ||
-				screenPos.y >= vcamera.height)
+		const float3 camPos =  (d >= camera.minDepth && d <= camera.maxDepth) ? pose * camera.screenToCam(x,y,d) : make_float3(0.0f,0.0f,0.0f);
+		screenPos = convertToScreen<VPMODE>(params, camPos);
+
+		if (	camPos.z < params.camera.minDepth ||
+				camPos.z > params.camera.maxDepth ||
+				//!vp.inside(screenPos.x, screenPos.y))
+				screenPos.x >= params.camera.width ||
+				screenPos.y >= params.camera.height)
 			screenPos = make_uint2(30000,30000);
 		screen_out(x,y) = make_short2(screenPos.x, screenPos.y);
 		depth_out(x,y) = camPos.z;
 	}
 }
 
-void ftl::cuda::screen_coord(TextureObject<float> &depth, TextureObject<float> &depth_out, TextureObject<short2> &screen_out, const SplatParams &params, const float4x4 &pose, const Camera &camera, cudaStream_t stream) {
+void ftl::cuda::screen_coord(TextureObject<float> &depth, TextureObject<float> &depth_out,
+		TextureObject<short2> &screen_out, const Parameters &params,
+		const float4x4 &pose, const Camera &camera, cudaStream_t stream) {
     const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
     const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
-	screen_coord_kernel<<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params.camera, pose, camera);
-    cudaSafeCall( cudaGetLastError() );
+	switch (params.viewPortMode) {
+	case ViewPortMode::Disabled: screen_coord_kernel<ViewPortMode::Disabled><<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params, pose, camera); break;
+	case ViewPortMode::Clipping: screen_coord_kernel<ViewPortMode::Clipping><<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params, pose, camera); break;
+	case ViewPortMode::Warping: screen_coord_kernel<ViewPortMode::Warping><<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params, pose, camera); break;
+	}
+	cudaSafeCall( cudaGetLastError() );
 }
 
 
@@ -61,12 +86,12 @@ void ftl::cuda::screen_coord(TextureObject<float> &depth, TextureObject<float> &
 	const int y = blockIdx.y*blockDim.y + threadIdx.y;
 
 	if (x >= 0 && y >= 0 && x < depth_out.width() && y < depth_out.height()) {
-		uint2 screenPos = make_uint2(30000,30000);
+		//uint2 screenPos = make_uint2(30000,30000);
 		const float d = camera.maxDepth;
 
 		// Find the virtual screen position of current point
 		const float3 camPos = pose * camera.screenToCam(x,y,d);
-		screenPos = vcamera.camToScreen<uint2>(camPos);
+		uint2 screenPos = vcamera.camToScreen<uint2>(camPos);
 
 		if (	camPos.z < vcamera.minDepth ||
 				camPos.z > vcamera.maxDepth ||
@@ -79,7 +104,7 @@ void ftl::cuda::screen_coord(TextureObject<float> &depth, TextureObject<float> &
 	}
 }
 
-void ftl::cuda::screen_coord(TextureObject<float> &depth_out, TextureObject<short2> &screen_out, const SplatParams &params, const float4x4 &pose, const Camera &camera, cudaStream_t stream) {
+void ftl::cuda::screen_coord(TextureObject<float> &depth_out, TextureObject<short2> &screen_out, const Parameters &params, const float4x4 &pose, const Camera &camera, cudaStream_t stream) {
 	const dim3 gridSize((screen_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (screen_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
 	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp
index 33f0340f2a94007a87981dbf400e953869624392..fcdcaa121c6e52994f54ecf82603edeb88f50775 100644
--- a/components/renderers/cpp/src/splatter_cuda.hpp
+++ b/components/renderers/cpp/src/splatter_cuda.hpp
@@ -2,7 +2,7 @@
 #define _FTL_RECONSTRUCTION_SPLAT_CUDA_HPP_
 
 #include <ftl/cuda_common.hpp>
-#include <ftl/render/splat_params.hpp>
+#include <ftl/render/render_params.hpp>
 
 namespace ftl {
 namespace cuda {
@@ -10,7 +10,7 @@ namespace cuda {
 		ftl::cuda::TextureObject<float> &depth,
 		ftl::cuda::TextureObject<float> &depth_out,
 		ftl::cuda::TextureObject<short2> &screen_out,
-		const ftl::render::SplatParams &params,
+		const ftl::render::Parameters &params,
 		const float4x4 &pose,
 		const ftl::rgbd::Camera &camera,
 		cudaStream_t stream);
@@ -18,7 +18,7 @@ namespace cuda {
 	void screen_coord(
 		ftl::cuda::TextureObject<float> &depth_out,
 		ftl::cuda::TextureObject<short2> &screen_out,
-		const ftl::render::SplatParams &params,
+		const ftl::render::Parameters &params,
 		const float4x4 &pose,
 		const ftl::rgbd::Camera &camera,
 		cudaStream_t stream);
@@ -27,7 +27,7 @@ namespace cuda {
 		ftl::cuda::TextureObject<float> &depth_in,
 		ftl::cuda::TextureObject<int> &depth_out,
 		ftl::cuda::TextureObject<short2> &screen,
-		const ftl::render::SplatParams &params,
+		const ftl::render::Parameters &params,
 		cudaStream_t stream);
 
 	void mesh_blender(
@@ -41,14 +41,14 @@ namespace cuda {
 		ftl::cuda::TextureObject<float4> &points,
 		ftl::cuda::TextureObject<float4> &normals,
 		ftl::cuda::TextureObject<int> &depth,
-		ftl::render::SplatParams params,
+		ftl::render::Parameters params,
 		bool culling,
 		cudaStream_t stream);
 
 	void dibr_merge(
 		ftl::cuda::TextureObject<float4> &points,
 		ftl::cuda::TextureObject<int> &depth,
-		ftl::render::SplatParams params,
+		ftl::render::Parameters params,
 		cudaStream_t stream);
 
 	void dibr_merge(
@@ -56,14 +56,14 @@ namespace cuda {
 		ftl::cuda::TextureObject<int> &depth_out,
 		const float4x4 &transform,
 		const ftl::rgbd::Camera &cam,
-		ftl::render::SplatParams params,
+		ftl::render::Parameters params,
 		cudaStream_t stream);
 
 	void dibr_merge(
 		ftl::cuda::TextureObject<int> &depth_out,
 		const float4x4 &transform,
 		const ftl::rgbd::Camera &cam,
-		ftl::render::SplatParams params,
+		ftl::render::Parameters params,
 		cudaStream_t stream);
 
 	template <typename T>
@@ -74,7 +74,7 @@ namespace cuda {
         ftl::cuda::TextureObject<int> &depth_in,        // Virtual depth map
         ftl::cuda::TextureObject<float> &depth_out,
 		ftl::cuda::TextureObject<T> &colour_out,
-        const ftl::render::SplatParams &params, cudaStream_t stream);
+        const ftl::render::Parameters &params, cudaStream_t stream);
 
 	template <typename A, typename B>
 	void dibr_attribute(
@@ -83,7 +83,7 @@ namespace cuda {
 		ftl::cuda::TextureObject<int> &depth_in,		// Virtual depth map
 		ftl::cuda::TextureObject<B> &out,	// Accumulated output
 		ftl::cuda::TextureObject<float> &contrib,
-		ftl::render::SplatParams &params, cudaStream_t stream);
+		ftl::render::Parameters &params, cudaStream_t stream);
 
 	template <typename A, typename B>
 	void reproject(
@@ -93,9 +93,10 @@ namespace cuda {
 		ftl::cuda::TextureObject<float4> &normals,
 		ftl::cuda::TextureObject<B> &out,	// Accumulated output
 		ftl::cuda::TextureObject<float> &contrib,
-		const ftl::render::SplatParams &params,
+		const ftl::render::Parameters &params,
 		const ftl::rgbd::Camera &camera,
-		const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream);
+		const float4x4 &transform, const float3x3 &transformR,
+		cudaStream_t stream);
 
 	template <typename A, typename B>
 	void reproject(
@@ -104,7 +105,7 @@ namespace cuda {
 		ftl::cuda::TextureObject<float> &depth_in,		// Virtual depth map
 		ftl::cuda::TextureObject<B> &out,	// Accumulated output
 		ftl::cuda::TextureObject<float> &contrib,
-		const ftl::render::SplatParams &params,
+		const ftl::render::Parameters &params,
 		const ftl::rgbd::Camera &camera,
 		const float4x4 &poseInv, cudaStream_t stream);
 
@@ -114,7 +115,7 @@ namespace cuda {
 		ftl::cuda::TextureObject<float> &depth_in,		// Virtual depth map
 		ftl::cuda::TextureObject<B> &out,	// Accumulated output
 		ftl::cuda::TextureObject<float> &contrib,
-		const ftl::render::SplatParams &params,
+		const ftl::render::Parameters &params,
 		const ftl::rgbd::Camera &camera,
 		const float4x4 &poseInv, cudaStream_t stream);
 
diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu
index d183777c65447443540733a2ae8ee97a3e293b35..df7b5dea99a5f88050016302e5df984f212bc7b9 100644
--- a/components/renderers/cpp/src/triangle_render.cu
+++ b/components/renderers/cpp/src/triangle_render.cu
@@ -1,4 +1,4 @@
-#include <ftl/render/splat_params.hpp>
+#include <ftl/render/render_params.hpp>
 #include "splatter_cuda.hpp"
 #include <ftl/rgbd/camera.hpp>
 #include <ftl/cuda_common.hpp>
@@ -7,7 +7,7 @@
 
 using ftl::rgbd::Camera;
 using ftl::cuda::TextureObject;
-using ftl::render::SplatParams;
+using ftl::render::Parameters;
 
 #define T_PER_BLOCK 8
 
@@ -107,7 +107,7 @@ float getZAtCoordinate(const float3 &barycentricCoord, const float (&tri)[3]) {
  * being inside or outside (using bary centric coordinate method). If inside
  * then atomically write to the depth map.
  */
-__device__ void drawTriangle(const float (&d)[3], const short2 (&v)[3], const SplatParams &params, TextureObject<int> &depth_out) {
+__device__ void drawTriangle(const float (&d)[3], const short2 (&v)[3], const Parameters &params, TextureObject<int> &depth_out) {
 	const int minX = min(v[0].x, min(v[1].x, v[2].x));
 	const int minY = min(v[0].y, min(v[1].y, v[2].y));
 	const int maxX = max(v[0].x, max(v[1].x, v[2].x));
@@ -136,7 +136,7 @@ __device__ void drawTriangle(const float (&d)[3], const short2 (&v)[3], const Sp
  * hence that a triangle should not be draw between said verticies.
  * TODO: Use discontinuity mask or some better test here.
  */
-__device__ inline bool isValidTriangle(const float (&d)[3], const SplatParams &params) {
+__device__ inline bool isValidTriangle(const float (&d)[3], const Parameters &params) {
 	return !(fabs(d[0] - d[1]) > params.depthThreshold || fabs(d[0] - d[2]) > params.depthThreshold || d[0] < params.camera.minDepth || d[0] > params.camera.maxDepth);
 }
 
@@ -145,7 +145,7 @@ __device__ inline bool isValidTriangle(const float (&d)[3], const SplatParams &p
  * which verticies to load.
  */
 template <int A, int B>
-__device__ bool loadTriangle(int x, int y, float (&d)[3], short2 (&v)[3], const SplatParams &params, const TextureObject<float> &depth_in, const TextureObject<short2> &screen) {
+__device__ bool loadTriangle(int x, int y, float (&d)[3], short2 (&v)[3], const Parameters &params, const TextureObject<float> &depth_in, const TextureObject<short2> &screen) {
     d[1] = depth_in.tex2D(x+A,y);
     d[2] = depth_in.tex2D(x,y+B);
     v[1] = screen.tex2D(x+A,y);
@@ -159,7 +159,7 @@ __device__ bool loadTriangle(int x, int y, float (&d)[3], short2 (&v)[3], const
  __global__ void triangle_render_kernel(
         TextureObject<float> depth_in,
         TextureObject<int> depth_out,
-		TextureObject<short2> screen, SplatParams params) {
+		TextureObject<short2> screen, Parameters params) {
 	const int x = blockIdx.x*blockDim.x + threadIdx.x;
 	const int y = blockIdx.y*blockDim.y + threadIdx.y;
 
@@ -178,7 +178,7 @@ __device__ bool loadTriangle(int x, int y, float (&d)[3], short2 (&v)[3], const
 	}
 }
 
-void ftl::cuda::triangle_render1(TextureObject<float> &depth_in, TextureObject<int> &depth_out, TextureObject<short2> &screen, const SplatParams &params, cudaStream_t stream) {
+void ftl::cuda::triangle_render1(TextureObject<float> &depth_in, TextureObject<int> &depth_out, TextureObject<short2> &screen, const Parameters &params, cudaStream_t stream) {
     const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
     const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
diff --git a/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp b/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp
index 683c6a98f4146230a3d2bb90e55fec457138ba60..bf0b01c8229931f7f80f7da8fa739e3bf121d1e1 100644
--- a/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp
+++ b/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp
@@ -106,10 +106,10 @@ void StereoVideoSource::init(const string &file) {
 		// set use config file/set (some) default values
 
 		cv::Mat K = cv::Mat::eye(cv::Size(3, 3), CV_64FC1);
-		K.at<double>(0,0) = host_->value("focal", 500.0);
-		K.at<double>(1,1) = host_->value("focal", 500.0);
-		K.at<double>(0,2) = host_->value("centre_x", -color_size_.width/2.0f);
-		K.at<double>(1,2) = host_->value("centre_y", -color_size_.height/2.0f);
+		K.at<double>(0,0) = host_->value("focal", 800.0);
+		K.at<double>(1,1) = host_->value("focal", 800.0);
+		K.at<double>(0,2) = host_->value("centre_x", color_size_.width/2.0f);
+		K.at<double>(1,2) = host_->value("centre_y", color_size_.height/2.0f);
 
 		calib_->setIntrinsics(color_size_, {K, K});
 	}
diff --git a/components/structures/include/ftl/data/frame.hpp b/components/structures/include/ftl/data/frame.hpp
index 5e8ab1a075b5c5dff7d38fc7862f5744408dd986..3a33dc086114ccf8fa288761170d5943bc9bb5fa 100644
--- a/components/structures/include/ftl/data/frame.hpp
+++ b/components/structures/include/ftl/data/frame.hpp
@@ -304,6 +304,7 @@ void ftl::data::Frame<BASE,N,STATE,DATA>::swapTo(ftl::codecs::Channels<BASE> cha
 		// Should we swap this channel?
 		if (channels.has(c)) {
 			f.channels_ += c;
+			// TODO: Make sure this does a move not copy
 			std::swap(f.getData(c),getData(c));
 		}
 	}