diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt
index 00e44a444ba45f6cb7b4f68ed2441a1c3ac4fd4c..ae06aab6d142ba8a2ad3a1e0970905e3a70468e9 100644
--- a/applications/reconstruct/CMakeLists.txt
+++ b/applications/reconstruct/CMakeLists.txt
@@ -19,6 +19,8 @@ set(REPSRC
 	#src/virtual_source.cpp
 	src/splat_render.cpp
 	src/dibr.cu
+	src/depth_camera.cu
+	src/depth_camera.cpp
 )
 
 add_executable(ftl-reconstruct ${REPSRC})
diff --git a/applications/reconstruct/src/depth_camera.cpp b/applications/reconstruct/src/depth_camera.cpp
index dd04da7738c60aaf44b208d03bf76272cc9a4c87..93d6ab2b44592fbfc859428443922cfcaa304454 100644
--- a/applications/reconstruct/src/depth_camera.cpp
+++ b/applications/reconstruct/src/depth_camera.cpp
@@ -1,9 +1,9 @@
 #include <ftl/depth_camera.hpp>
+#include "depth_camera_cuda.hpp"
+#include <opencv2/core/cuda_stream_accessor.hpp>
 
 using ftl::voxhash::DepthCamera;
-
-extern "C" void computeNormals(float4* d_output, float3* d_input, unsigned int width, unsigned int height);
-extern "C" void convertDepthFloatToCameraSpaceFloat3(float3* d_output, float* d_input, float4x4 intrinsicsInv, unsigned int width, unsigned int height, const DepthCameraData& depthCameraData);
+using ftl::voxhash::DepthCameraCUDA;
 
 DepthCamera::DepthCamera() {
 	depth_mat_ = nullptr;
@@ -15,7 +15,7 @@ DepthCamera::DepthCamera() {
 	normal_tex_ = nullptr;
 }
 
-void DepthCamera::alloc(const DepthCameraParams& params, bool withNormals=false) { //! todo resizing???
+void DepthCamera::alloc(const DepthCameraParams& params, bool withNormals) { //! todo resizing???
 	depth_mat_ = new cv::cuda::GpuMat(params.m_imageHeight, params.m_imageWidth, CV_32FC1);
 	colour_mat_ = new cv::cuda::GpuMat(params.m_imageHeight, params.m_imageWidth, CV_8UC4);
 	depth_tex_ = new ftl::cuda::TextureObject<float>((cv::cuda::PtrStepSz<float>)*depth_mat_);
@@ -53,6 +53,6 @@ void DepthCamera::updateData(const cv::Mat &depth, const cv::Mat &rgb, cv::cuda:
 }
 
 void DepthCamera::_computeNormals(cudaStream_t stream) {
-	convertDepthFloatToCameraSpaceFloat3(m_data.d_depth3, m_data.d_depth, m_params.m_intrinsicsInverse, m_params.m_width, m_params.m_height, cameraData);
-	computeNormals(m_data.d_normals, m_data.d_depth3, m_params.m_width, m_params.m_height);
+	ftl::cuda::point_cloud((float3*)point_mat_->data, data, stream);
+	ftl::cuda::compute_normals((float3*)point_mat_->data, normal_tex_, stream);
 }
diff --git a/applications/reconstruct/src/depth_camera.cu b/applications/reconstruct/src/depth_camera.cu
new file mode 100644
index 0000000000000000000000000000000000000000..9234504e2d78b1cbf75563693b119b7975b2481e
--- /dev/null
+++ b/applications/reconstruct/src/depth_camera.cu
@@ -0,0 +1,84 @@
+#include <ftl/cuda_common.hpp>
+#include <ftl/cuda_util.hpp>
+#include <ftl/depth_camera.hpp>
+#include "depth_camera_cuda.hpp"
+
+#define T_PER_BLOCK 16
+#define MINF __int_as_float(0xff800000)
+
+using ftl::voxhash::DepthCameraCUDA;
+
+__global__ void point_cloud_kernel(float3* output, DepthCameraCUDA depthCameraData)
+{
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	const int width = depthCameraData.params.m_imageWidth;
+	const int height = depthCameraData.params.m_imageHeight;
+
+	if (x < width && y < height) {
+		float depth = tex2D<float>(depthCameraData.depth, x, y);
+
+		output[y*width+x] = (depth >= depthCameraData.params.m_sensorDepthWorldMin && depth <= depthCameraData.params.m_sensorDepthWorldMax) ?
+			depthCameraData.params.kinectDepthToSkeleton(x, y, depth) :
+			make_float3(MINF, MINF, MINF);
+	}
+}
+
+void ftl::cuda::point_cloud(float3* output, const DepthCameraCUDA &depthCameraData, cudaStream_t stream) {
+	const dim3 gridSize((depthCameraData.params.m_imageWidth + T_PER_BLOCK - 1)/T_PER_BLOCK, (depthCameraData.params.m_imageHeight + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	point_cloud_kernel<<<gridSize, blockSize, 0, stream>>>(output, depthCameraData);
+
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
+}
+
+/// ===== NORMALS =====
+
+__global__ void compute_normals_kernel(const float3 *input, ftl::cuda::TextureObject<float4> output)
+{
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	const int width = output.width();
+
+	if(x >= output.width() || y >= output.height()) return;
+
+	output(x,y) = make_float4(MINF, MINF, MINF, MINF);
+
+	if(x > 0 && x < output.width()-1 && y > 0 && y < output.height()-1)
+	{
+		// TODO:(Nick) Should use a 7x7 window
+		const float3 CC = input[(y+0)*width+(x+0)];
+		const float3 PC = input[(y+1)*width+(x+0)];
+		const float3 CP = input[(y+0)*width+(x+1)];
+		const float3 MC = input[(y-1)*width+(x+0)];
+		const float3 CM = input[(y+0)*width+(x-1)];
+
+		if(CC.x != MINF && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF)
+		{
+			const float3 n = cross(PC-MC, CP-CM);
+			const float  l = length(n);
+
+			if(l > 0.0f)
+			{
+				output(x,y) = make_float4(n/-l, 1.0f);
+			}
+		}
+	}
+}
+
+void ftl::cuda::compute_normals(const float3 *input, ftl::cuda::TextureObject<float4> *output, cudaStream_t stream) {
+	const dim3 gridSize((output->width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (output->height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	compute_normals_kernel<<<gridSize, blockSize, 0, stream>>>(input, *output);
+
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	//cutilCheckMsg(__FUNCTION__);
+#endif
+}
\ No newline at end of file
diff --git a/applications/reconstruct/src/depth_camera_cuda.hpp b/applications/reconstruct/src/depth_camera_cuda.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..05a83a473a7a36ccf6a16fc8c699d63046eea3b1
--- /dev/null
+++ b/applications/reconstruct/src/depth_camera_cuda.hpp
@@ -0,0 +1,17 @@
+#ifndef _FTL_RECONSTRUCTION_CAMERA_CUDA_HPP_
+#define _FTL_RECONSTRUCTION_CAMERA_CUDA_HPP_
+
+#include <ftl/depth_camera.hpp>
+#include <ftl/voxel_hash.hpp>
+
+namespace ftl {
+namespace cuda {
+
+void point_cloud(float3* output, const ftl::voxhash::DepthCameraCUDA &depthCameraData, cudaStream_t stream);
+
+void compute_normals(const float3 *points, ftl::cuda::TextureObject<float4> *normals, cudaStream_t stream);
+
+}
+}
+
+#endif  // _FTL_RECONSTRUCTION_CAMERA_CUDA_HPP_
diff --git a/applications/reconstruct/src/integrators.cu b/applications/reconstruct/src/integrators.cu
index ce51c185dc9a6ff105a69fc6818f8db421e95433..984fd29dce023bd1491c2637b73b98accaf303d9 100644
--- a/applications/reconstruct/src/integrators.cu
+++ b/applications/reconstruct/src/integrators.cu
@@ -42,6 +42,8 @@ __device__ bool colordiff(const uchar4 &pa, const uchar3 &pb, float epsilon) {
 /*
  * Guennebaud, G.; Gross, M. Algebraic point set surfaces. ACMTransactions on Graphics Vol. 26, No. 3, Article No. 23, 2007.
  * Used in: FusionMLS: Highly dynamic 3D reconstruction with consumer-grade RGB-D cameras
+ *     r = distance between points
+ *     h = smoothing parameter in meters (default 4cm)
  */
 __device__ float spatialWeighting(float r) {
 	const float h = c_hashParams.m_spatialSmoothing;
@@ -53,73 +55,6 @@ __device__ float spatialWeighting(float r) {
 
 
 
-/*__global__ void integrateRegistrationKernel(HashData hashData, HashParams hashParams, DepthCameraParams cameraParams, cudaTextureObject_t depthT, cudaTextureObject_t colourT) {
-
-	// Stride over all allocated blocks
-	for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) {
-
-	//TODO check if we should load this in shared memory
-	HashEntry& entry = hashData.d_hashCompactified[bi];
-
-
-	int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(entry.pos);
-
-	uint i = threadIdx.x;	//inside of an SDF block
-	int3 pi = pi_base + make_int3(hashData.delinearizeVoxelIndex(i));
-	float3 pf = hashData.virtualVoxelPosToWorld(pi);
-
-	pf = hashParams.m_rigidTransformInverse * pf;
-	uint2 screenPos = make_uint2(cameraParams.cameraToKinectScreenInt(pf));
-
-	// For this voxel in hash, get its screen position and check it is on screen
-	if (screenPos.x < cameraParams.m_imageWidth && screenPos.y < cameraParams.m_imageHeight) {	//on screen
-
-		//float depth = g_InputDepth[screenPos];
-		float depth = tex2D<float>(depthT, screenPos.x, screenPos.y);
-		//if (depth > 20.0f) return;
-
-		uchar4 color  = make_uchar4(0, 0, 0, 0);
-		color = tex2D<uchar4>(colourT, screenPos.x, screenPos.y);
-
-		// Depth is within accepted max distance from camera
-		if (depth > 0.01f && depth < hashParams.m_maxIntegrationDistance) { // valid depth and color (Nick: removed colour check)
-			float depthZeroOne = cameraParams.cameraToKinectProjZ(depth);
-
-			// Calculate SDF of this voxel wrt the depth map value
-			float sdf = depth - pf.z;
-			float truncation = hashData.getTruncation(depth);
-
-			if (sdf > -truncation) {
-				float weightUpdate = max(hashParams.m_integrationWeightSample * 1.5f * (1.0f-depthZeroOne), 1.0f);
-
-				Voxel curr;	//construct current voxel
-				curr.sdf = sdf;
-				curr.weight = weightUpdate;
-				curr.color = make_uchar3(color.x, color.y, color.z);
-
-				uint idx = entry.ptr + i;
-				
-				Voxel out;
-				const Voxel &v1 = curr;
-				const Voxel &v0 = hashData.d_SDFBlocks[idx];
-
-				float redshift = (v0.weight > 0) ? 1.0f - ((v1.sdf - v0.sdf) / hashParams.m_truncation)*0.5f : 1.0f;
-
-				out.color.x = min(max(v1.color.x*redshift,0.0f),255.0f);
-				out.color.y = min(max(v1.color.y*redshift,0.0f),255.0f);
-				out.color.z = min(max(v1.color.z*(1.0f / redshift),0.0f),255.0f);
-
-				out.sdf = (v0.sdf * (float)v0.weight + v1.sdf * (float)v1.weight) / ((float)v0.weight + (float)v1.weight);
-				out.weight = min(c_hashParams.m_integrationWeightMax, (unsigned int)v0.weight + (unsigned int)v1.weight);
-
-				hashData.d_SDFBlocks[idx] = out;
-
-			}
-		}
-	}
-
-	}  // Stride loop
-}*/
 
 __global__ void integrateDepthMapsKernel(HashData hashData, HashParams hashParams, int numcams) {
 	__shared__ uint all_warp_ballot;