diff --git a/components/renderers/cpp/CMakeLists.txt b/components/renderers/cpp/CMakeLists.txt
index b575721587262e2e468d6cb48cf8c44c6771e6fc..63cf8f1e75663c61e05f7c68d26217de30e964e6 100644
--- a/components/renderers/cpp/CMakeLists.txt
+++ b/components/renderers/cpp/CMakeLists.txt
@@ -2,6 +2,7 @@ add_library(ftlrender
 	src/splat_render.cpp
 	src/splatter.cu
 	src/points.cu
+	src/normals.cu
 )
 
 # These cause errors in CI build and are being removed from PCL in newer versions
diff --git a/components/renderers/cpp/include/ftl/cuda/normals.hpp b/components/renderers/cpp/include/ftl/cuda/normals.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..85620ff287c84ac9e8f5828784a34a8a917d48f6
--- /dev/null
+++ b/components/renderers/cpp/include/ftl/cuda/normals.hpp
@@ -0,0 +1,15 @@
+#ifndef _FTL_CUDA_NORMALS_HPP_
+#define _FTL_CUDA_NORMALS_HPP_
+
+#include <ftl/cuda_common.hpp>
+
+namespace ftl {
+namespace cuda {
+
+void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
+        ftl::cuda::TextureObject<float4> &input, cudaStream_t stream);
+
+}
+}
+
+#endif  // _FTL_CUDA_NORMALS_HPP_
diff --git a/components/renderers/cpp/src/normals.cu b/components/renderers/cpp/src/normals.cu
new file mode 100644
index 0000000000000000000000000000000000000000..813f1aab543b81fdbcb5bdde3471ab3b0c39b0ae
--- /dev/null
+++ b/components/renderers/cpp/src/normals.cu
@@ -0,0 +1,44 @@
+#include <ftl/cuda/normals.hpp>
+
+#define T_PER_BLOCK 16
+
+__global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output,
+        ftl::cuda::TextureObject<float4> input) {
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if(x >= input.width() || y >= input.height()) return;
+
+	output(x,y) = make_float4(MINF, MINF, MINF, MINF);
+
+	if(x > 0 && x < input.width()-1 && y > 0 && y < input.height()-1) {
+		const float3 CC = make_float3(input.tex2D(x+0, y+0)); //[(y+0)*width+(x+0)];
+		const float3 PC = make_float3(input.tex2D(x+0, y+1)); //[(y+1)*width+(x+0)];
+		const float3 CP = make_float3(input.tex2D(x+1, y+0)); //[(y+0)*width+(x+1)];
+		const float3 MC = make_float3(input.tex2D(x+0, y-1)); //[(y-1)*width+(x+0)];
+		const float3 CM = make_float3(input.tex2D(x-1, y+0)); //[(y+0)*width+(x-1)];
+
+		if(CC.x != MINF && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) {
+			const float3 n = cross(PC-MC, CP-CM);
+			const float  l = length(n);
+
+			if(l > 0.0f) {
+				output(x,y) = make_float4(n/-l, 1.0f);
+			}
+		}
+	}
+}
+
+void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
+        ftl::cuda::TextureObject<float4> &input, cudaStream_t stream) {
+	const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(output, input);
+
+    cudaSafeCall( cudaGetLastError() );
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	//cutilCheckMsg(__FUNCTION__);
+#endif
+}
\ No newline at end of file