diff --git a/components/codecs/include/ftl/codecs/channels.hpp b/components/codecs/include/ftl/codecs/channels.hpp
index d1cf07a3aeb55863acb3374fa3c733b12e5ca240..11262579e73d6d62424f2515ce021265d94ea529 100644
--- a/components/codecs/include/ftl/codecs/channels.hpp
+++ b/components/codecs/include/ftl/codecs/channels.hpp
@@ -17,6 +17,7 @@ enum struct Channel : int {
     Disparity		= 3,
     Depth2			= 3,
     Deviation		= 4,
+    Screen          = 4,
     Normals			= 5,	// 32FC4
     Points			= 6,	// 32FC4
     Confidence		= 7,	// 32F
diff --git a/components/renderers/cpp/CMakeLists.txt b/components/renderers/cpp/CMakeLists.txt
index 8fc7996efcbad8502bcc3dce0fef4fee8ffd327d..45a6e30d890c405d45589effa6fbd1554472a56f 100644
--- a/components/renderers/cpp/CMakeLists.txt
+++ b/components/renderers/cpp/CMakeLists.txt
@@ -4,6 +4,8 @@ add_library(ftlrender
 	src/points.cu
 	src/normals.cu
 	src/mask.cu
+	src/screen.cu
+	src/triangle_render.cu
 )
 
 # These cause errors in CI build and are being removed from PCL in newer versions
diff --git a/components/renderers/cpp/src/screen.cu b/components/renderers/cpp/src/screen.cu
new file mode 100644
index 0000000000000000000000000000000000000000..91f24c840e031e4aef0487274f2ed05f8315cd51
--- /dev/null
+++ b/components/renderers/cpp/src/screen.cu
@@ -0,0 +1,42 @@
+#include <ftl/render/splat_params.hpp>
+#include "splatter_cuda.hpp"
+#include <ftl/rgbd/camera.hpp>
+#include <ftl/cuda_common.hpp>
+
+using ftl::rgbd::Camera;
+using ftl::cuda::TextureObject;
+using ftl::render::SplatParams;
+
+#define T_PER_BLOCK 8
+
+/*
+ * Convert source screen position to output screen coordinates.
+ */
+ __global__ void screen_coord_kernel(TextureObject<float> depth,
+		TextureObject<short2> screen_out, SplatParams params, float4x4 pose, Camera camera) {
+	const int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    uint2 screenPos = make_uint2(30000,30000);
+    screen_out(x,y) = make_short2(screenPos.x, screenPos.y);
+
+    const float d = depth.tex2D(x, y);
+	const float3 worldPos = pose * camera.screenToCam(x,y,d);
+	if (d < camera.minDepth || d > camera.maxDepth) return;
+
+    // Find the virtual screen position of current point
+	const float3 camPos = params.m_viewMatrix * worldPos;
+    screenPos = params.camera.camToScreen<uint2>(camPos);
+
+    if (camPos.z < params.camera.minDepth || camPos.z > params.camera.maxDepth || screenPos.x >= params.camera.width || screenPos.y >= params.camera.height)
+        screenPos = make_uint2(30000,30000);
+    screen_out(x,y) = make_short2(screenPos.x, screenPos.y);
+}
+
+void ftl::cuda::screen_coord(TextureObject<float> &depth, TextureObject<short2> &screen_out, const SplatParams &params, const float4x4 &pose, const Camera &camera, cudaStream_t stream) {
+    const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	screen_coord_kernel<<<gridSize, blockSize, 0, stream>>>(depth, screen_out, params, pose, camera);
+    cudaSafeCall( cudaGetLastError() );
+}
diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp
index 30c47dc1179018a552d8b083245eac71ba56dae2..67f7620d3f7c2a4ee11b42ba9a6fdeefeef2dcf5 100644
--- a/components/renderers/cpp/src/splat_render.cpp
+++ b/components/renderers/cpp/src/splat_render.cpp
@@ -211,11 +211,26 @@ void Splatter::_dibr(cudaStream_t stream) {
 			continue;
 		}
 
-		ftl::cuda::dibr_merge(
+		/*ftl::cuda::dibr_merge(
 			f.createTexture<float4>(Channel::Points),
 			f.createTexture<float4>(Channel::Normals),
 			temp_.createTexture<int>(Channel::Depth2),
 			params_, backcull_, stream
+		);*/
+
+		auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>());
+
+		ftl::cuda::screen_coord(
+			f.createTexture<float>(Channel::Depth),
+			f.createTexture<short2>(Channel::Screen, Format<short2>(f.get<GpuMat>(Channel::Depth).size())),
+			params_, pose, s->parameters(), stream
+		);
+
+		ftl::cuda::triangle_render1(
+			f.getTexture<float>(Channel::Depth),
+			temp_.createTexture<int>(Channel::Depth2),
+			f.getTexture<short2>(Channel::Screen),
+			params_, stream
 		);
 
 		//LOG(INFO) << "DIBR DONE";
@@ -267,7 +282,7 @@ void Splatter::_renderChannel(
 
 	// Now splat the points
 	if (splat_) {
-		if (is_4chan) {
+		/*if (is_4chan) {
 			ftl::cuda::splat(
 				accum_.getTexture<float4>(Channel::Normals),
 				accum_.getTexture<float>(Channel::Density),
@@ -297,10 +312,12 @@ void Splatter::_renderChannel(
 				out.createTexture<uchar4>(channel_out),
 				params_, stream
 			);
-		}
+		}*/
+		temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f);
 	} else {
 		// Swap accum frames directly to output.
 		accum_.swapTo(Channels(channel_out), out);
+		temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f);
 	}
 }
 
@@ -428,7 +445,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 	
 	if (chan == Channel::Depth)
 	{
-		//temp_.get<GpuMat>(Channel::Depth).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream);
+		temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream);
 	} else if (chan == Channel::Normals) {
 		out.create<GpuMat>(Channel::Normals, Format<float4>(camera.width, camera.height));
 
diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp
index 42ec8a0c7dbca51b1c3fe7e22ba2c38eb2447169..fd88969931c3d37d3d47a6103f53bbebaf343ddd 100644
--- a/components/renderers/cpp/src/splatter_cuda.hpp
+++ b/components/renderers/cpp/src/splatter_cuda.hpp
@@ -6,6 +6,21 @@
 
 namespace ftl {
 namespace cuda {
+	void screen_coord(
+		ftl::cuda::TextureObject<float> &depth,
+		ftl::cuda::TextureObject<short2> &screen_out,
+		const ftl::render::SplatParams &params,
+		const float4x4 &pose,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream);
+
+	void triangle_render1(
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<int> &depth_out,
+		ftl::cuda::TextureObject<short2> &screen,
+		const ftl::render::SplatParams &params,
+		cudaStream_t stream);
+	
 	void dibr_merge(
 		ftl::cuda::TextureObject<float4> &points,
 		ftl::cuda::TextureObject<float4> &normals,
diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu
new file mode 100644
index 0000000000000000000000000000000000000000..5c8d2dd42db4e96705508a9bd963b323e0c0b56f
--- /dev/null
+++ b/components/renderers/cpp/src/triangle_render.cu
@@ -0,0 +1,66 @@
+#include <ftl/render/splat_params.hpp>
+#include "splatter_cuda.hpp"
+#include <ftl/rgbd/camera.hpp>
+#include <ftl/cuda_common.hpp>
+
+using ftl::rgbd::Camera;
+using ftl::cuda::TextureObject;
+using ftl::render::SplatParams;
+
+#define T_PER_BLOCK 8
+
+__device__ inline float length2(int dx, int dy) { return dx*dx + dy*dy; }
+
+/*
+ * Convert source screen position to output screen coordinates.
+ */
+ __global__ void triangle_render_1_kernel(
+        TextureObject<float> depth_in,
+        TextureObject<int> depth_out,
+		TextureObject<short2> screen, SplatParams params) {
+	const int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x < 0 || x >= depth_in.width()-1 || y < 0 || y >= depth_in.height()-1) return;
+
+    float d[3];
+    d[0] = depth_in.tex2D(x,y);
+    d[1] = depth_in.tex2D(x+1,y);
+    d[2] = depth_in.tex2D(x,y+1);
+
+    short2 s[3];
+    s[0] = screen.tex2D(x,y);
+    s[1] = screen.tex2D(x+1,y);
+    s[2] = screen.tex2D(x,y+1);
+
+    s[1].x -= s[0].x;
+    s[1].y -= s[0].y;
+    s[2].x -= s[0].x;
+    s[2].y -= s[0].y;
+
+    s[1].x = min(s[1].x,10);
+    s[2].y = min(s[2].y,10);
+
+    for (int sx=0; sx < s[1].x; ++sx) {
+        for (int sy=0; sy < s[2].y; ++sy) {
+            if (sx < sy) continue;
+            if (sx+s[0].x >= params.camera.width || sy+s[0].y >= params.camera.height) continue;
+
+            float dist1 = length2(sx,sy);
+            float dist2 = length2(s[1].x-sx, s[1].y-sy);
+            float dist3 = length2(s[2].x-sx, s[2].y-sy);
+
+            float new_depth = (d[0]*dist1 + d[1]*dist2 + d[2] * dist3) / (dist1+dist2+dist3);
+
+            atomicMin(&depth_out(sx+s[0].x,sy+s[0].y), int(new_depth*1000.0f));
+        }
+    }
+}
+
+void ftl::cuda::triangle_render1(TextureObject<float> &depth_in, TextureObject<int> &depth_out, TextureObject<short2> &screen, const SplatParams &params, cudaStream_t stream) {
+    const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	triangle_render_1_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, screen, params);
+    cudaSafeCall( cudaGetLastError() );
+}