Feature/voxelhash

879d5aff · Nicolas Pope · 7743fe4f · 879d5aff · 879d5aff · 879d5aff
Commit 879d5aff authored 5 years ago by Nicolas Pope
--- a/applications/reconstruct/CMakeLists.txt
+++ b/applications/reconstruct/CMakeLists.txt
 # Need to include staged files and libs
-include_directories(${PROJECT_SOURCE_DIR}/applications/reconstruct/include)
+#include_directories(${PROJECT_SOURCE_DIR}/reconstruct/include)
 #include_directories(${PROJECT_BINARY_DIR})

 set(REPSRC
 	src/main.cpp
+	src/scene_rep_hash_sdf.cu
+	src/ray_cast_sdf.cu
+	src/camera_util.cu
+	src/ray_cast_sdf.cpp
 	src/registration.cpp
 )

@@ -11,6 +15,15 @@ add_executable(ftl-reconstruct ${REPSRC})
 add_dependencies(ftl-reconstruct ftlnet)
 add_dependencies(ftl-reconstruct ftlcommon)

+target_include_directories(ftl-reconstruct PUBLIC
+	$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+	$<INSTALL_INTERFACE:include>
+	PRIVATE src)
+
+if (CUDA_FOUND)
+set_property(TARGET ftl-reconstruct PROPERTY CUDA_SEPARABLE_COMPILATION ON)
+endif()
+
 #target_include_directories(cv-node PUBLIC ${PROJECT_SOURCE_DIR}/include)
 target_link_libraries(ftl-reconstruct ftlcommon ftlrgbd Threads::Threads ${OpenCV_LIBS} glog::glog ftlnet ftlrender)


--- a/applications/reconstruct/include/ftl/cuda_matrix_util.hpp
+++ b/applications/reconstruct/include/ftl/cuda_matrix_util.hpp
--- a/applications/reconstruct/include/ftl/cuda_operators.hpp
+++ b/applications/reconstruct/include/ftl/cuda_operators.hpp
+/*
+ * Copyright 1993-2009 NVIDIA Corporation.  All rights reserved.
+ *
+ * NVIDIA Corporation and its licensors retain all intellectual property and 
+ * proprietary rights in and to this software and related documentation and 
+ * any modifications thereto.  Any use, reproduction, disclosure, or distribution 
+ * of this software and related documentation without an express license 
+ * agreement from NVIDIA Corporation is strictly prohibited.
+ * 
+ */
+
+/*
+    This file implements common mathematical operations on vector types
+    (float3, float4 etc.) since these are not provided as standard by CUDA.
+
+    The syntax is modelled on the Cg standard library.
+*/
+
+#ifndef _FTL_CUDA_OPERATORS_HPP_
+#define _FTL_CUDA_OPERATORS_HPP_
+
+#include <cuda_runtime.h>
+
+
+////////////////////////////////////////////////////////////////////////////////
+typedef unsigned int uint;
+typedef unsigned short ushort;
+
+#ifndef __CUDACC__
+#include <math.h>
+
+inline float fminf(float a, float b)
+{
+  return a < b ? a : b;
+}
+
+inline float fmaxf(float a, float b)
+{
+  return a > b ? a : b;
+}
+
+inline int max(int a, int b)
+{
+  return a > b ? a : b;
+}
+
+inline int min(int a, int b)
+{
+  return a < b ? a : b;
+}
+
+inline float rsqrtf(float x)
+{
+    return 1.0f / sqrtf(x);
+}
+#endif
+
+// float functions
+////////////////////////////////////////////////////////////////////////////////
+
+// lerp
+inline __device__ __host__ float lerp(float a, float b, float t)
+{
+    return a + t*(b-a);
+}
+
+// clamp
+inline __device__ __host__ float clamp(float f, float a, float b)
+{
+    return fmaxf(a, fminf(f, b));
+}
+
+inline __device__ __host__ int sign(float x) { 
+	int t = x<0 ? -1 : 0;
+	return x > 0 ? 1 : t;
+}
+
+// int2 functions
+////////////////////////////////////////////////////////////////////////////////
+
+inline __host__ __device__ int2 make_int2(float2 f)
+{
+  int2 t; t.x = static_cast<int>(f.x); t.y = static_cast<int>(f.y); return t;
+}
+
+inline __host__ __device__ uint2 make_uint2(int2 i)
+{
+  uint2 t; t.x = static_cast<uint>(i.x); t.y = static_cast<uint>(i.y); return t;
+}
+
+// negate
+inline __host__ __device__ int2 operator-(int2 &a)
+{
+    return make_int2(-a.x, -a.y);
+}
+
+// addition
+inline __host__ __device__ int2 operator+(int2 a, int2 b)
+{
+    return make_int2(a.x + b.x, a.y + b.y);
+}
+inline __host__ __device__ void operator+=(int2 &a, int2 b)
+{
+    a.x += b.x; a.y += b.y;
+}
+
+// subtract
+inline __host__ __device__ int2 operator-(int2 a, int2 b)
+{
+    return make_int2(a.x - b.x, a.y - b.y);
+}
+inline __host__ __device__ void operator-=(int2 &a, int2 b)
+{
+    a.x -= b.x; a.y -= b.y;
+}
+
+// multiply
+inline __host__ __device__ int2 operator*(int2 a, int2 b)
+{
+    return make_int2(a.x * b.x, a.y * b.y);
+}
+inline __host__ __device__ int2 operator*(int2 a, int s)
+{
+    return make_int2(a.x * s, a.y * s);
+}
+inline __host__ __device__ int2 operator*(int s, int2 a)
+{
+    return make_int2(a.x * s, a.y * s);
+}
+inline __host__ __device__ void operator*=(int2 &a, int s)
+{
+    a.x *= s; a.y *= s;
+}
+
+// float2 functions
+////////////////////////////////////////////////////////////////////////////////
+
+// negate
+inline __host__ __device__ float2 operator-(float2 &a)
+{
+    return make_float2(-a.x, -a.y);
+}
+
+// addition
+inline __host__ __device__ float2 operator+(float2 a, float2 b)
+{
+    return make_float2(a.x + b.x, a.y + b.y);
+}
+inline __host__ __device__ void operator+=(float2 &a, float2 b)
+{
+    a.x += b.x; a.y += b.y;
+}
+
+// subtract
+inline __host__ __device__ float2 operator-(float2 a, float2 b)
+{
+    return make_float2(a.x - b.x, a.y - b.y);
+}
+inline __host__ __device__ void operator-=(float2 &a, float2 b)
+{
+    a.x -= b.x; a.y -= b.y;
+}
+
+// multiply
+inline __host__ __device__ float2 operator*(float2 a, float2 b)
+{
+    return make_float2(a.x * b.x, a.y * b.y);
+}
+inline __host__ __device__ float2 operator*(float2 a, float s)
+{
+    return make_float2(a.x * s, a.y * s);
+}
+inline __host__ __device__ float2 operator*(float s, float2 a)
+{
+    return make_float2(a.x * s, a.y * s);
+}
+inline __host__ __device__ void operator*=(float2 &a, float s)
+{
+    a.x *= s; a.y *= s;
+}
+
+// divide
+inline __host__ __device__ float2 operator/(float2 a, float2 b)
+{
+    return make_float2(a.x / b.x, a.y / b.y);
+}
+inline __host__ __device__ float2 operator/(float2 a, float s)
+{
+    float inv = 1.0f / s;
+    return a * inv;
+}
+inline __host__ __device__ float2 operator/(float s, float2 a)
+{
+    float inv = 1.0f / s;
+    return a * inv;
+}
+inline __host__ __device__ void operator/=(float2 &a, float s)
+{
+    float inv = 1.0f / s;
+    a *= inv;
+}
+
+// lerp
+inline __device__ __host__ float2 lerp(float2 a, float2 b, float t)
+{
+    return a + t*(b-a);
+}
+
+// clamp
+inline __device__ __host__ float2 clamp(float2 v, float a, float b)
+{
+    return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));
+}
+
+inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)
+{
+    return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
+}
+
+// dot product
+inline __host__ __device__ float dot(float2 a, float2 b)
+{ 
+    return a.x * b.x + a.y * b.y;
+}
+
+// length
+inline __host__ __device__ float length(float2 v)
+{
+    return sqrtf(dot(v, v));
+}
+
+// normalize
+inline __host__ __device__ float2 normalize(float2 v)
+{
+    float invLen = rsqrtf(dot(v, v));
+    return v * invLen;
+}
+
+// floor
+inline __host__ __device__ float2 floor(const float2 v)
+{
+    return make_float2(floor(v.x), floor(v.y));
+}
+
+// reflect
+inline __host__ __device__ float2 reflect(float2 i, float2 n)
+{
+	return i - 2.0f * n * dot(n,i);
+}
+
+// absolute value
+inline __host__ __device__ float2 fabs(float2 v)
+{
+	return make_float2(fabs(v.x), fabs(v.y));
+}
+
+inline __device__ __host__ int2 sign(float2 f) { 
+	return make_int2(sign(f.x), sign(f.y));
+}
+
+// float3 functions
+////////////////////////////////////////////////////////////////////////////////
+
+inline __host__ __device__ float3 make_float3(int3 i)
+{
+  float3 t; t.x = static_cast<float>(i.x); t.y = static_cast<float>(i.y); t.z = static_cast<float>(i.z); return t;
+}
+
+inline __host__ __device__ float3 make_float3(float4 f)
+{
+    return make_float3(f.x,f.y,f.z);
+}
+
+inline __host__ __device__ float3 make_float3(uchar3 c)
+{
+    return make_float3(static_cast<float>(c.x), static_cast<float>(c.y), static_cast<float>(c.z));
+}
+
+inline __host__ __device__ uchar3 make_uchar3(float3 f)
+{
+    return make_uchar3(static_cast<unsigned char>(f.x), static_cast<unsigned char>(f.y), static_cast<unsigned char>(f.z));
+}
+
+inline __host__ __device__ float3 make_float3(float f)
+{
+    return make_float3(f,f,f);
+}
+
+// negate
+inline __host__ __device__ float3 operator-(const float3 &a)
+{
+    return make_float3(-a.x, -a.y, -a.z);
+}
+
+// min
+static __inline__ __host__ __device__ float3 fminf(float3 a, float3 b)
+{
+	return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z));
+}
+
+// max
+static __inline__ __host__ __device__ float3 fmaxf(float3 a, float3 b)
+{
+	return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z));
+}
+
+// addition
+inline __host__ __device__ float3 operator+(float3 a, float3 b)
+{
+    return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
+}
+inline __host__ __device__ float3 operator+(float3 a, float b)
+{
+    return make_float3(a.x + b, a.y + b, a.z + b);
+}
+inline __host__ __device__ void operator+=(float3 &a, float3 b)
+{
+    a.x += b.x; a.y += b.y; a.z += b.z;
+}
+
+// subtract
+inline __host__ __device__ float3 operator-(float3 a, float3 b)
+{
+    return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
+}
+inline __host__ __device__ float3 operator-(float3 a, float b)
+{
+    return make_float3(a.x - b, a.y - b, a.z - b);
+}
+inline __host__ __device__ void operator-=(float3 &a, float3 b)
+{
+    a.x -= b.x; a.y -= b.y; a.z -= b.z;
+}
+
+// multiply
+inline __host__ __device__ float3 operator*(float3 a, float3 b)
+{
+    return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
+}
+inline __host__ __device__ float3 operator*(float3 a, float s)
+{
+    return make_float3(a.x * s, a.y * s, a.z * s);
+}
+inline __host__ __device__ float3 operator*(float s, float3 a)
+{
+    return make_float3(a.x * s, a.y * s, a.z * s);
+}
+inline __host__ __device__ void operator*=(float3 &a, float s)
+{
+    a.x *= s; a.y *= s; a.z *= s;
+}
+
+// divide
+inline __host__ __device__ float3 operator/(float3 a, float3 b)
+{
+    return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
+}
+inline __host__ __device__ float3 operator/(float3 a, float s)
+{
+    float inv = 1.0f / s;
+    return a * inv;
+}
+inline __host__ __device__ float3 operator/(float s, float3 a)
+{
+    float inv = 1.0f / s;
+    return a * inv;
+}
+inline __host__ __device__ void operator/=(float3 &a, float s)
+{
+    float inv = 1.0f / s;
+    a *= inv;
+}
+
+// lerp
+inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
+{
+    return a + t*(b-a);
+}
+
+// clamp
+inline __device__ __host__ float3 clamp(float3 v, float a, float b)
+{
+    return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
+}
+
+inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
+{
+    return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
+}
+
+// dot product
+inline __host__ __device__ float dot(float3 a, float3 b)
+{ 
+    return a.x * b.x + a.y * b.y + a.z * b.z;
+}
+
+// cross product
+inline __host__ __device__ float3 cross(float3 a, float3 b)
+{ 
+    return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); 
+}
+
+// length
+inline __host__ __device__ float length(float3 v)
+{
+    return sqrtf(dot(v, v));
+}
+
+// normalize
+inline __host__ __device__ float3 normalize(float3 v)
+{
+    float invLen = rsqrtf(dot(v, v));
+    return v * invLen;
+}
+
+// floor
+inline __host__ __device__ float3 floor(const float3 v)
+{
+    return make_float3(floor(v.x), floor(v.y), floor(v.z));
+}
+
+// reflect
+inline __host__ __device__ float3 reflect(float3 i, float3 n)
+{
+	return i - 2.0f * n * dot(n,i);
+}
+
+// absolute value
+inline __host__ __device__ float3 fabs(float3 v)
+{
+	return make_float3(fabs(v.x), fabs(v.y), fabs(v.z));
+}
+
+inline __device__ __host__ int3 sign(float3 f) { 
+	return make_int3(sign(f.x), sign(f.y), sign(f.z));
+}
+
+// float4 functions
+////////////////////////////////////////////////////////////////////////////////
+
+inline __host__ __device__ float4 make_float4(float a)
+{
+    return make_float4(a,a,a,a);
+}
+
+inline __host__ __device__ float4 make_float4(float3 f, float a)
+{
+    return make_float4(f.x,f.y,f.z,a);
+}
+
+// negate
+inline __host__ __device__ float4 operator-(float4 &a)
+{
+    return make_float4(-a.x, -a.y, -a.z, -a.w);
+}
+
+// min
+static __inline__ __host__ __device__ float4 fminf(float4 a, float4 b)
+{
+	return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w));
+}
+
+// max
+static __inline__ __host__ __device__ float4 fmaxf(float4 a, float4 b)
+{
+	return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w));
+}
+
+// addition
+inline __host__ __device__ float4 operator+(float4 a, float4 b)
+{
+    return make_float4(a.x + b.x, a.y + b.y, a.z + b.z,  a.w + b.w);
+}
+inline __host__ __device__ void operator+=(float4 &a, float4 b)
+{
+    a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
+}
+
+// subtract
+inline __host__ __device__ float4 operator-(float4 a, float4 b)
+{
+    return make_float4(a.x - b.x, a.y - b.y, a.z - b.z,  a.w - b.w);
+}
+inline __host__ __device__ void operator-=(float4 &a, float4 b)
+{
+    a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
+}
+
+// multiply
+inline __host__ __device__ float4 operator*(float4 a, float s)
+{
+    return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
+}
+inline __host__ __device__ float4 operator*(float s, float4 a)
+{
+    return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
+}
+inline __host__ __device__ void operator*=(float4 &a, float s)
+{
+    a.x *= s; a.y *= s; a.z *= s; a.w *= s;
+}
+
+// divide
+inline __host__ __device__ float4 operator/(float4 a, float4 b)
+{
+    return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
+}
+inline __host__ __device__ float4 operator/(float4 a, float s)
+{
+    float inv = 1.0f / s;
+    return a * inv;
+}
+inline __host__ __device__ float4 operator/(float s, float4 a)
+{
+    float inv = 1.0f / s;
+    return a * inv;
+}
+inline __host__ __device__ void operator/=(float4 &a, float s)
+{
+    float inv = 1.0f / s;
+    a *= inv;
+}
+
+// lerp
+inline __device__ __host__ float4 lerp(float4 a, float4 b, float t)
+{
+    return a + t*(b-a);
+}
+
+// clamp
+inline __device__ __host__ float4 clamp(float4 v, float a, float b)
+{
+    return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
+}
+
+inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)
+{
+    return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
+}
+
+// dot product
+inline __host__ __device__ float dot(float4 a, float4 b)
+{ 
+    return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
+}
+
+// length
+inline __host__ __device__ float length(float4 r)
+{
+    return sqrtf(dot(r, r));
+}
+
+// normalize
+inline __host__ __device__ float4 normalize(float4 v)
+{
+    float invLen = rsqrtf(dot(v, v));
+    return v * invLen;
+}
+
+// floor
+inline __host__ __device__ float4 floor(const float4 v)
+{
+    return make_float4(floor(v.x), floor(v.y), floor(v.z), floor(v.w));
+}
+
+// absolute value
+inline __host__ __device__ float4 fabs(float4 v)
+{
+	return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w));
+}
+
+// int3 functions
+////////////////////////////////////////////////////////////////////////////////
+
+inline __host__ __device__ int3 make_int3(float3 f)
+{
+  int3 t; t.x = static_cast<int>(f.x); t.y = static_cast<int>(f.y); t.z = static_cast<int>(f.z); return t;
+}
+
+inline __host__ __device__ int3 make_int3(uint3 i)
+{
+  int3 t; t.x = static_cast<int>(i.x); t.y = static_cast<int>(i.y); t.z = static_cast<int>(i.z); return t;
+}
+
+inline __host__ __device__ int3 make_int3(int i)
+{
+  int3 t; t.x = i; t.y = i; t.z = i; return t;
+}
+
+// negate
+inline __host__ __device__ int3 operator-(int3 &a)
+{
+    return make_int3(-a.x, -a.y, -a.z);
+}
+
+// min
+inline __host__ __device__ int3 min(int3 a, int3 b)
+{
+    return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
+}
+
+// max
+inline __host__ __device__ int3 max(int3 a, int3 b)
+{
+    return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
+}
+
+// addition
+inline __host__ __device__ int3 operator+(int3 a, int3 b)
+{
+    return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
+}
+inline __host__ __device__ void operator+=(int3 &a, int3 b)
+{
+    a.x += b.x; a.y += b.y; a.z += b.z;
+}
+
+// subtract
+inline __host__ __device__ int3 operator-(int3 a, int3 b)
+{
+    return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
+}
+
+inline __host__ __device__ void operator-=(int3 &a, int3 b)
+{
+    a.x -= b.x; a.y -= b.y; a.z -= b.z;
+}
+
+// multiply
+inline __host__ __device__ int3 operator*(int3 a, int3 b)
+{
+    return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
+}
+inline __host__ __device__ int3 operator*(int3 a, int s)
+{
+    return make_int3(a.x * s, a.y * s, a.z * s);
+}
+inline __host__ __device__ int3 operator*(int s, int3 a)
+{
+    return make_int3(a.x * s, a.y * s, a.z * s);
+}
+inline __host__ __device__ void operator*=(int3 &a, int s)
+{
+    a.x *= s; a.y *= s; a.z *= s;
+}
+
+// divide
+inline __host__ __device__ int3 operator/(int3 a, int3 b)
+{
+    return make_int3(a.x / b.x, a.y / b.y, a.z / b.z);
+}
+inline __host__ __device__ int3 operator/(int3 a, int s)
+{
+    return make_int3(a.x / s, a.y / s, a.z / s);
+}
+inline __host__ __device__ int3 operator/(int s, int3 a)
+{
+    return make_int3(a.x / s, a.y / s, a.z / s);
+}
+inline __host__ __device__ void operator/=(int3 &a, int s)
+{
+    a.x /= s; a.y /= s; a.z /= s;
+}
+
+// clamp
+inline __device__ __host__ int clamp(int f, int a, int b)
+{
+    return max(a, min(f, b));
+}
+
+inline __device__ __host__ int3 clamp(int3 v, int a, int b)
+{
+    return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
+}
+
+inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)
+{
+    return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
+}
+
+
+// uint3 functions
+////////////////////////////////////////////////////////////////////////////////
+
+// min
+inline __host__ __device__ uint3 min(uint3 a, uint3 b)
+{
+    return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
+}
+
+// max
+inline __host__ __device__ uint3 max(uint3 a, uint3 b)
+{
+    return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
+}
+
+// addition
+inline __host__ __device__ uint3 operator+(uint3 a, uint3 b)
+{
+    return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
+}
+inline __host__ __device__ void operator+=(uint3 &a, uint3 b)
+{
+    a.x += b.x; a.y += b.y; a.z += b.z;
+}
+
+// subtract
+inline __host__ __device__ uint3 operator-(uint3 a, uint3 b)
+{
+    return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
+}
+
+inline __host__ __device__ void operator-=(uint3 &a, uint3 b)
+{
+    a.x -= b.x; a.y -= b.y; a.z -= b.z;
+}
+
+// multiply
+inline __host__ __device__ uint3 operator*(uint3 a, uint3 b)
+{
+    return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
+}
+inline __host__ __device__ uint3 operator*(uint3 a, uint s)
+{
+    return make_uint3(a.x * s, a.y * s, a.z * s);
+}
+inline __host__ __device__ uint3 operator*(uint s, uint3 a)
+{
+    return make_uint3(a.x * s, a.y * s, a.z * s);
+}
+inline __host__ __device__ void operator*=(uint3 &a, uint s)
+{
+    a.x *= s; a.y *= s; a.z *= s;
+}
+
+// divide
+inline __host__ __device__ uint3 operator/(uint3 a, uint3 b)
+{
+    return make_uint3(a.x / b.x, a.y / b.y, a.z / b.z);
+}
+inline __host__ __device__ uint3 operator/(uint3 a, uint s)
+{
+    return make_uint3(a.x / s, a.y / s, a.z / s);
+}
+inline __host__ __device__ uint3 operator/(uint s, uint3 a)
+{
+    return make_uint3(a.x / s, a.y / s, a.z / s);
+}
+inline __host__ __device__ void operator/=(uint3 &a, uint s)
+{
+    a.x /= s; a.y /= s; a.z /= s;
+}
+
+// clamp
+inline __device__ __host__ uint clamp(uint f, uint a, uint b)
+{
+    return max(a, min(f, b));
+}
+
+inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)
+{
+    return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
+}
+
+inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)
+{
+    return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
+}
+
+#endif  // _FTL_CUDA_OPERATORS_HPP_
--- a/applications/reconstruct/include/ftl/cuda_util.hpp
+++ b/applications/reconstruct/include/ftl/cuda_util.hpp
+#pragma once
+
+#ifndef _CUDA_UTIL_
+#define _CUDA_UTIL_
+
+#undef max
+#undef min
+
+#include <cuda_runtime.h>
+#include <ftl/cuda_operators.hpp>
+
+// Enable run time assertion checking in kernel code
+#define cudaAssert(condition) if (!(condition)) { printf("ASSERT: %s %s\n", #condition, __FILE__); }
+//#define cudaAssert(condition)
+
+#if defined(__CUDA_ARCH__)
+#define __CONDITIONAL_UNROLL__ #pragma unroll
+#else
+#define __CONDITIONAL_UNROLL__ 
+#endif
+
+#endif
--- a/applications/reconstruct/include/ftl/depth_camera.hpp
+++ b/applications/reconstruct/include/ftl/depth_camera.hpp
+// From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/DepthCameraUtil.h
+
+#pragma once
+
+//#include <cutil_inline.h>
+//#include <cutil_math.h>
+#include <vector_types.h>
+#include <cuda_runtime.h>
+
+#include <ftl/cuda_matrix_util.hpp>
+#include <ftl/cuda_common.hpp>
+
+#include <ftl/depth_camera_params.hpp>
+
+
+extern "C" void updateConstantDepthCameraParams(const DepthCameraParams& params);
+extern __constant__ DepthCameraParams c_depthCameraParams;
+
+
+struct DepthCameraData {
+
+	///////////////
+	// Host part //
+	///////////////
+
+	__device__ __host__
+	DepthCameraData() {
+		/*d_depthData = NULL;
+		d_colorData = NULL;
+		d_depthArray = NULL;
+		d_colorArray = NULL;*/
+
+		depth_mat_ = nullptr;
+		colour_mat_ = nullptr;
+		depth_tex_ = nullptr;
+		colour_tex_ = nullptr;
+	}
+
+	__host__
+	void alloc(const DepthCameraParams& params) { //! todo resizing???
+		/*cudaSafeCall(cudaMalloc(&d_depthData, sizeof(float) * params.m_imageWidth * params.m_imageHeight));
+		cudaSafeCall(cudaMalloc(&d_colorData, sizeof(float4) * params.m_imageWidth * params.m_imageHeight));
+
+		h_depthChannelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
+		cudaSafeCall(cudaMallocArray(&d_depthArray, &h_depthChannelDesc, params.m_imageWidth, params.m_imageHeight));
+		h_colorChannelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);
+		cudaSafeCall(cudaMallocArray(&d_colorArray, &h_colorChannelDesc, params.m_imageWidth, params.m_imageHeight));*/
+
+		std::cout << "Create texture objects: " << params.m_imageWidth << "," << params.m_imageHeight << std::endl;
+		depth_mat_ = new cv::cuda::GpuMat(params.m_imageHeight, params.m_imageWidth, CV_32FC1);
+		colour_mat_ = new cv::cuda::GpuMat(params.m_imageHeight, params.m_imageWidth, CV_8UC4);
+		depth_tex_ = new ftl::cuda::TextureObject<float>((cv::cuda::PtrStepSz<float>)*depth_mat_);
+		colour_tex_ = new ftl::cuda::TextureObject<uchar4>((cv::cuda::PtrStepSz<uchar4>)*colour_mat_);
+		depth_obj_ = depth_tex_->cudaTexture();
+		colour_obj_ = colour_tex_->cudaTexture();
+	}
+
+	__host__
+	void updateParams(const DepthCameraParams& params) {
+		updateConstantDepthCameraParams(params);
+	}
+
+	__host__
+	void updateData(const cv::Mat &depth, const cv::Mat &rgb) {
+		depth_mat_->upload(depth);
+		colour_mat_->upload(rgb);
+	}
+
+	__host__
+	void free() {
+		/*if (d_depthData) cudaSafeCall(cudaFree(d_depthData));
+		if (d_colorData) cudaSafeCall(cudaFree(d_colorData));
+		if (d_depthArray) cudaSafeCall(cudaFreeArray(d_depthArray));
+		if (d_colorArray) cudaSafeCall(cudaFreeArray(d_colorArray));*/
+
+		/*d_depthData = NULL;
+		d_colorData = NULL;
+		d_depthArray = NULL;
+		d_colorArray = NULL;*/
+
+		if (depth_mat_) delete depth_mat_;
+		if (colour_mat_) delete colour_mat_;
+		delete depth_tex_;
+		delete colour_tex_;
+	}
+
+
+	/////////////////
+	// Device part //
+	/////////////////
+
+	static inline const DepthCameraParams& params() {
+		return c_depthCameraParams;
+	}
+
+		///////////////////////////////////////////////////////////////
+		// Camera to Screen
+		///////////////////////////////////////////////////////////////
+
+	__device__
+	static inline float2 cameraToKinectScreenFloat(const float3& pos)	{
+		//return make_float2(pos.x*c_depthCameraParams.fx/pos.z + c_depthCameraParams.mx, c_depthCameraParams.my - pos.y*c_depthCameraParams.fy/pos.z);
+		return make_float2(
+			pos.x*c_depthCameraParams.fx/pos.z + c_depthCameraParams.mx,			
+			pos.y*c_depthCameraParams.fy/pos.z + c_depthCameraParams.my);
+	}
+
+	__device__
+	static inline int2 cameraToKinectScreenInt(const float3& pos)	{
+		float2 pImage = cameraToKinectScreenFloat(pos);
+		return make_int2(pImage + make_float2(0.5f, 0.5f));
+	}
+
+	__device__
+	static inline uint2 cameraToKinectScreen(const float3& pos)	{
+		int2 p = cameraToKinectScreenInt(pos);
+		return make_uint2(p.x, p.y);
+	}
+
+	__device__
+	static inline float cameraToKinectProjZ(float z)	{
+		return (z - c_depthCameraParams.m_sensorDepthWorldMin)/(c_depthCameraParams.m_sensorDepthWorldMax - c_depthCameraParams.m_sensorDepthWorldMin);
+	}
+
+	__device__
+	static inline float3 cameraToKinectProj(const float3& pos) {
+		float2 proj = cameraToKinectScreenFloat(pos);
+
+		float3 pImage = make_float3(proj.x, proj.y, pos.z);
+
+		pImage.x = (2.0f*pImage.x - (c_depthCameraParams.m_imageWidth- 1.0f))/(c_depthCameraParams.m_imageWidth- 1.0f);
+		//pImage.y = (2.0f*pImage.y - (c_depthCameraParams.m_imageHeight-1.0f))/(c_depthCameraParams.m_imageHeight-1.0f);
+		pImage.y = ((c_depthCameraParams.m_imageHeight-1.0f) - 2.0f*pImage.y)/(c_depthCameraParams.m_imageHeight-1.0f);
+		pImage.z = cameraToKinectProjZ(pImage.z);
+
+		return pImage;
+	}
+
+		///////////////////////////////////////////////////////////////
+		// Screen to Camera (depth in meters)
+		///////////////////////////////////////////////////////////////
+
+	__device__
+	static inline float3 kinectDepthToSkeleton(uint ux, uint uy, float depth)	{
+		const float x = ((float)ux-c_depthCameraParams.mx) / c_depthCameraParams.fx;
+		const float y = ((float)uy-c_depthCameraParams.my) / c_depthCameraParams.fy;
+		//const float y = (c_depthCameraParams.my-(float)uy) / c_depthCameraParams.fy;
+		return make_float3(depth*x, depth*y, depth);
+	}
+
+		///////////////////////////////////////////////////////////////
+		// RenderScreen to Camera -- ATTENTION ASSUMES [1,0]-Z range!!!!
+		///////////////////////////////////////////////////////////////
+
+	__device__
+	static inline float kinectProjToCameraZ(float z) {
+		return z * (c_depthCameraParams.m_sensorDepthWorldMax - c_depthCameraParams.m_sensorDepthWorldMin) + c_depthCameraParams.m_sensorDepthWorldMin;
+	}
+
+	// z has to be in [0, 1]
+	__device__
+	static inline float3 kinectProjToCamera(uint ux, uint uy, float z)	{
+		float fSkeletonZ = kinectProjToCameraZ(z);
+		return kinectDepthToSkeleton(ux, uy, fSkeletonZ);
+	}
+	
+	__device__
+	static inline bool isInCameraFrustumApprox(const float4x4& viewMatrixInverse, const float3& pos) {
+		float3 pCamera = viewMatrixInverse * pos;
+		float3 pProj = cameraToKinectProj(pCamera);
+		//pProj *= 1.5f;	//TODO THIS IS A HACK FIX IT :)
+		pProj *= 0.95;
+		return !(pProj.x < -1.0f || pProj.x > 1.0f || pProj.y < -1.0f || pProj.y > 1.0f || pProj.z < 0.0f || pProj.z > 1.0f);  
+	}
+
+	//float*		d_depthData;	//depth data of the current frame (in screen space):: TODO data allocation lives in RGBD Sensor
+	//float4*		d_colorData;
+	
+	//uchar4*		d_colorData;	//color data of the current frame (in screen space):: TODO data allocation lives in RGBD Sensor
+
+	cv::cuda::GpuMat *depth_mat_;
+	cv::cuda::GpuMat *colour_mat_;
+	ftl::cuda::TextureObject<float> *depth_tex_;
+	ftl::cuda::TextureObject<uchar4> *colour_tex_;
+	cudaTextureObject_t depth_obj_;
+	cudaTextureObject_t colour_obj_;
+
+	// cuda arrays for texture access
+	/*cudaArray*	d_depthArray;
+	cudaArray*	d_colorArray;
+	cudaChannelFormatDesc h_depthChannelDesc;
+	cudaChannelFormatDesc h_colorChannelDesc;*/
+};
--- a/applications/reconstruct/include/ftl/depth_camera_params.hpp
+++ b/applications/reconstruct/include/ftl/depth_camera_params.hpp
+// From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/CUDADepthCameraParams.h
+
+//#include <cutil_inline.h>
+//#include <cutil_math.h>
+#include <vector_types.h>
+#include <cuda_runtime.h>
+
+#include <ftl/cuda_matrix_util.hpp>
+#include <ftl/camera_params.hpp>
+
+struct __align__(16) DepthCameraParams {
+	float fx;
+	float fy;
+	float mx;
+	float my;
+
+	unsigned short m_imageWidth;
+	unsigned short m_imageHeight;
+	unsigned int flags;
+
+	float m_sensorDepthWorldMin;
+	float m_sensorDepthWorldMax;
+};
--- a/applications/reconstruct/include/ftl/matrix_conversion.hpp
+++ b/applications/reconstruct/include/ftl/matrix_conversion.hpp
+// From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/MatrixConversion.h
+
+#pragma once
+
+#include <eigen3/Eigen/Eigen>
+// #include "d3dx9math.h"
+#include <ftl/cuda_matrix_util.hpp>
+
+namespace MatrixConversion
+{
+	//static D3DXMATRIX EigToMat(const Eigen::Matrix4f& mat)
+	//{
+	//	D3DXMATRIX m(mat.data());
+	//	D3DXMATRIX res; D3DXMatrixTranspose(&res, &m);
+
+	//	return res;
+	//}
+	//static Eigen::Matrix4f MatToEig(const D3DXMATRIX& mat)
+	//{
+	//	return Eigen::Matrix4f((float*)mat.m).transpose();
+	//}
+
+	/*static mat4f EigToMat(const Eigen::Matrix4f& mat)
+	{
+		return mat4f(mat.data()).getTranspose();
+	}
+
+	static Eigen::Matrix4f MatToEig(const mat4f& mat)
+	{
+		return Eigen::Matrix4f(mat.ptr()).transpose();
+	}*/
+
+	static Eigen::Vector4f VecH(const Eigen::Vector3f& v)
+	{
+		return Eigen::Vector4f(v[0], v[1], v[2], 1.0);
+	}
+
+	static Eigen::Vector3f VecDH(const Eigen::Vector4f& v)
+	{
+		return Eigen::Vector3f(v[0]/v[3], v[1]/v[3], v[2]/v[3]);
+	}
+
+	/*static Eigen::Vector3f VecToEig(const vec3f& v)
+	{
+		return Eigen::Vector3f(v[0], v[1], v[2]);
+	}
+
+	static vec3f EigToVec(const Eigen::Vector3f& v)
+	{
+		return vec3f(v[0], v[1], v[2]);
+	}
+
+	static vec3f EigToVec(const D3DXMATRIX v)
+	{
+		return vec3f(v[0], v[1], v[2]);
+	}*/
+	
+
+
+	// dx/cuda conversion
+	/*static vec3f toMlib(const D3DXVECTOR3& v) {
+		return vec3f(v.x, v.y, v.z);
+	}
+	static vec4f toMlib(const D3DXVECTOR4& v) {
+		return vec4f(v.x, v.y, v.z, v.w);
+	}
+	static mat4f toMlib(const D3DXMATRIX& m) {
+		mat4f c((const float*)&m);
+		return c.getTranspose();
+	}
+	static D3DXVECTOR3 toDX(const vec3f& v) {
+		return D3DXVECTOR3(v.x, v.y, v.z);
+	}
+	static D3DXVECTOR4 toDX(const vec4f& v) {
+		return D3DXVECTOR4(v.x, v.y, v.z, v.w);
+	}
+	static D3DXMATRIX toDX(const mat4f& m) {
+		D3DXMATRIX c((const float*)m.ptr());
+		D3DXMatrixTranspose(&c, &c);
+		return c;
+	}*/
+
+	/*static mat4f toMlib(const float4x4& m) {
+		return mat4f(m.ptr());
+	}
+	static vec4f toMlib(const float4& v) {
+		return vec4f(v.x, v.y, v.z, v.w);
+	}
+	static vec3f toMlib(const float3& v) {
+		return vec3f(v.x, v.y, v.z);
+	}
+	static vec4i toMlib(const int4& v) {
+		return vec4i(v.x, v.y, v.z, v.w);
+	}
+	static vec3i toMlib(const int3& v) {
+		return vec3i(v.x, v.y, v.z);
+	}
+	static float4x4 toCUDA(const mat4f& m) {
+		return float4x4(m.ptr());
+	}*/
+	static float4x4 toCUDA(const Eigen::Matrix4f& mat) {
+		return float4x4(mat.data()).getTranspose();
+	}
+
+	static Eigen::Matrix4f toEigen(const float4x4& m) {
+		return Eigen::Matrix4f(m.ptr()).transpose();
+	}
+
+	/*static float4 toCUDA(const vec4f& v) {
+		return make_float4(v.x, v.y, v.z, v.w);
+	}
+	static float3 toCUDA(const vec3f& v) {
+		return make_float3(v.x, v.y, v.z);
+	}
+	static int4 toCUDA(const vec4i& v) {
+		return make_int4(v.x, v.y, v.z, v.w);
+	}
+	static int3 toCUDA(const vec3i& v) {
+		return make_int3(v.x, v.y, v.z);
+	}*/
+
+
+	//doesnt really work
+	//template<class FloatType>
+	//point3d<FloatType> eulerAngles(const Matrix3x3<FloatType>& r) {
+	//	point3d<FloatType> res;
+
+	//	//check for gimbal lock
+	//	if (r(0,2) == (FloatType)-1.0) {
+	//		FloatType x = 0; //gimbal lock, value of x doesn't matter
+	//		FloatType y = (FloatType)M_PI / 2;
+	//		FloatType z = x + atan2(r(1,0), r(2,0));
+	//		res = point3d<FloatType>(x, y, z);
+	//	} else if (r(0,2) == (FloatType)1.0) {
+	//		FloatType x = 0;
+	//		FloatType y = -(FloatType)M_PI / 2;
+	//		FloatType z = -x + atan2(-r(1,0), -r(2,0));
+	//		res = point3d<FloatType>(x, y, z);
+	//	} else { //two solutions exist
+	//		FloatType x1 = -asin(r(0,2));
+	//		FloatType x2 = (FloatType)M_PI - x1;
+
+	//		FloatType y1 = atan2(r(1,2) / cos(x1), r(2,2) / cos(x1));
+	//		FloatType y2 = atan2(r(1,2) / cos(x2), r(2,2) / cos(x2));
+
+	//		FloatType z2 = atan2(r(0,1) / cos(x2), r(0,0) / cos(x2));
+	//		FloatType z1 = atan2(r(0,1) / cos(x1), r(0,0) / cos(x1));
+
+	//		//choose one solution to return
+	//		//for example the "shortest" rotation
+	//		if ((std::abs(x1) + std::abs(y1) + std::abs(z1)) <= (std::abs(x2) + std::abs(y2) + std::abs(z2))) {
+	//			res = point3d<FloatType>(x1, y1, z1);
+	//		} else {
+	//			res = point3d<FloatType>(x2, y2, z2);
+	//		}
+	//	}
+
+	//	res.x = math::radiansToDegrees(-res.x);
+	//	res.y = math::radiansToDegrees(-res.y);
+	//	res.z = math::radiansToDegrees(-res.z);
+
+	//	return res;
+	//}
+}
--- a/applications/reconstruct/include/ftl/ray_cast_params.hpp
+++ b/applications/reconstruct/include/ftl/ray_cast_params.hpp
+#include <ftl/cuda_util.hpp>
+
+#include <ftl/cuda_matrix_util.hpp>
+
+struct __align__(16) RayCastParams {
+	float4x4 m_viewMatrix;
+	float4x4 m_viewMatrixInverse;
+	float4x4 m_intrinsics;
+	float4x4 m_intrinsicsInverse;
+
+	unsigned int m_width;
+	unsigned int m_height;
+
+	unsigned int m_numOccupiedSDFBlocks;
+	unsigned int m_maxNumVertices;
+	int m_splatMinimum;
+
+	float m_minDepth;
+	float m_maxDepth;
+	float m_rayIncrement;
+	float m_thresSampleDist;
+	float m_thresDist;
+	bool  m_useGradients;
+
+	uint dummy0;
+};
--- a/applications/reconstruct/include/ftl/ray_cast_sdf.hpp
+++ b/applications/reconstruct/include/ftl/ray_cast_sdf.hpp
+#pragma once
+
+#include <ftl/configurable.hpp>
+#include <ftl/matrix_conversion.hpp>
+#include <ftl/cuda_matrix_util.hpp>
+#include <ftl/depth_camera.hpp>
+#include <ftl/ray_cast_util.hpp>
+#include <nlohmann/json.hpp>
+
+// #include "DX11RayIntervalSplatting.h"
+
+class CUDARayCastSDF : public ftl::Configurable
+{
+public:
+	CUDARayCastSDF(nlohmann::json& config) : ftl::Configurable(config) {
+		create(parametersFromConfig(config));
+		hash_render_ = config.value("hash_renderer", false);
+	}
+
+	~CUDARayCastSDF(void) {
+		destroy();
+	}
+
+	static RayCastParams parametersFromConfig(const nlohmann::json& gas) {
+		RayCastParams params;
+		params.m_width = gas["adapterWidth"].get<unsigned int>();
+		params.m_height = gas["adapterHeight"].get<unsigned int>();
+		params.m_intrinsics = MatrixConversion::toCUDA(Eigen::Matrix4f());
+		params.m_intrinsicsInverse = MatrixConversion::toCUDA(Eigen::Matrix4f());
+		params.m_minDepth = gas["sensorDepthMin"].get<float>();
+		params.m_maxDepth = gas["sensorDepthMax"].get<float>();
+		params.m_rayIncrement = gas["SDFRayIncrementFactor"].get<float>() * gas["SDFTruncation"].get<float>();
+		params.m_thresSampleDist = gas["SDFRayThresSampleDistFactor"].get<float>() * params.m_rayIncrement;
+		params.m_thresDist = gas["SDFRayThresDistFactor"].get<float>() * params.m_rayIncrement;
+		params.m_useGradients = gas["SDFUseGradients"].get<bool>();
+
+		params.m_maxNumVertices = gas["hashNumSDFBlocks"].get<unsigned int>() * 6;
+
+		return params;
+	}
+
+	void render(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& cameraData, const Eigen::Matrix4f& lastRigidTransform);
+
+	const RayCastData& getRayCastData(void) {
+		return m_data;
+	}
+	const RayCastParams& getRayCastParams() const {
+		return m_params;
+	}
+
+
+	// debugging
+	void convertToCameraSpace(const DepthCameraData& cameraData);
+
+private:
+
+	void create(const RayCastParams& params);
+	void destroy(void);
+
+	void rayIntervalSplatting(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& cameraData, const Eigen::Matrix4f& lastRigidTransform); // rasterize
+
+	RayCastParams m_params;
+	RayCastData m_data;
+	bool hash_render_;
+
+	// DX11RayIntervalSplatting m_rayIntervalSplatting;
+};
--- a/applications/reconstruct/include/ftl/ray_cast_util.hpp
+++ b/applications/reconstruct/include/ftl/ray_cast_util.hpp
+#pragma once
+
+#include <ftl/cuda_matrix_util.hpp>
+#include <ftl/depth_camera.hpp>
+#include <ftl/voxel_hash.hpp>
+
+#include <ftl/ray_cast_params.hpp>
+
+struct RayCastSample
+{
+	float sdf;
+	float alpha;
+	uint weight;
+	//uint3 color;
+};
+
+#ifndef MINF
+#define MINF asfloat(0xff800000)
+#endif
+
+extern __constant__ RayCastParams c_rayCastParams;
+extern "C" void updateConstantRayCastParams(const RayCastParams& params);
+
+
+struct RayCastData {
+
+	///////////////
+	// Host part //
+	///////////////
+
+	__device__ __host__
+	RayCastData() {
+		d_depth = NULL;
+		d_depth3 = NULL;
+		d_normals = NULL;
+		d_colors = NULL;
+
+		//d_vertexBuffer = NULL;
+		point_cloud_ = nullptr;
+
+		d_rayIntervalSplatMinArray = NULL;
+		d_rayIntervalSplatMaxArray = NULL;
+	}
+
+#ifndef __CUDACC__
+	__host__
+	void allocate(const RayCastParams& params) {
+		//point_cloud_ = new cv::cuda::GpuMat(params.m_width*params.m_height, 1, CV_32FC3);
+		cudaSafeCall(cudaMalloc(&d_depth, sizeof(float) * params.m_width * params.m_height));
+		cudaSafeCall(cudaMalloc(&d_depth3, sizeof(float3) * params.m_width * params.m_height));
+		cudaSafeCall(cudaMalloc(&d_normals, sizeof(float4) * params.m_width * params.m_height));
+		cudaSafeCall(cudaMalloc(&d_colors, sizeof(uchar3) * params.m_width * params.m_height));
+		//cudaSafeCall(cudaMalloc(&point_cloud_, sizeof(float3) * params.m_width * params.m_height));
+		//printf("Allocate ray cast data: %lld \n", (unsigned long long)point_cloud_);
+	}
+
+	__host__
+	void updateParams(const RayCastParams& params) {
+		updateConstantRayCastParams(params);
+	}
+
+	__host__ void download(float3 *points, uchar3 *colours, const RayCastParams& params) const {
+		//printf("Download: %d,%d\n", params.m_width, params.m_height);
+		//cudaSafeCall(cudaMemcpy(points, d_depth3, sizeof(float3) * params.m_width * params.m_height, cudaMemcpyDeviceToHost));
+		cudaSafeCall(cudaMemcpy(colours, d_colors, sizeof(uchar3) * params.m_width * params.m_height, cudaMemcpyDeviceToHost));
+	}
+
+	__host__
+		void free() {
+			//cudaFree(point_cloud_);
+			cudaFree(d_depth);
+			cudaFree(d_depth3);
+			cudaFree(d_normals);
+			cudaFree(d_colors);
+	}
+#endif
+
+	/////////////////
+	// Device part //
+	/////////////////
+#ifdef __CUDACC__
+
+	__device__
+		const RayCastParams& params() const {
+			return c_rayCastParams;
+	}
+
+	__device__
+	float frac(float val) const {
+		return (val - floorf(val));
+	}
+	__device__
+	float3 frac(const float3& val) const {
+			return make_float3(frac(val.x), frac(val.y), frac(val.z));
+	}
+	
+	__device__
+	bool trilinearInterpolationSimpleFastFast(const ftl::voxhash::HashData& hash, const float3& pos, float& dist, uchar3& color) const {
+		const float oSet = c_hashParams.m_virtualVoxelSize;
+		const float3 posDual = pos-make_float3(oSet/2.0f, oSet/2.0f, oSet/2.0f);
+		float3 weight = frac(hash.worldToVirtualVoxelPosFloat(pos));
+
+		dist = 0.0f;
+		float3 colorFloat = make_float3(0.0f, 0.0f, 0.0f);
+		ftl::voxhash::Voxel v = hash.getVoxel(posDual+make_float3(0.0f, 0.0f, 0.0f)); if(v.weight == 0) return false; float3 vColor = make_float3(v.color.x, v.color.y, v.color.z); dist+= (1.0f-weight.x)*(1.0f-weight.y)*(1.0f-weight.z)*v.sdf; colorFloat+= (1.0f-weight.x)*(1.0f-weight.y)*(1.0f-weight.z)*vColor;
+		      v = hash.getVoxel(posDual+make_float3(oSet, 0.0f, 0.0f)); if(v.weight == 0) return false;		   vColor = make_float3(v.color.x, v.color.y, v.color.z); dist+=	   weight.x *(1.0f-weight.y)*(1.0f-weight.z)*v.sdf; colorFloat+=	   weight.x *(1.0f-weight.y)*(1.0f-weight.z)*vColor;
+		      v = hash.getVoxel(posDual+make_float3(0.0f, oSet, 0.0f)); if(v.weight == 0) return false;		   vColor = make_float3(v.color.x, v.color.y, v.color.z); dist+= (1.0f-weight.x)*	   weight.y *(1.0f-weight.z)*v.sdf; colorFloat+= (1.0f-weight.x)*	   weight.y *(1.0f-weight.z)*vColor;
+		      v = hash.getVoxel(posDual+make_float3(0.0f, 0.0f, oSet)); if(v.weight == 0) return false;		   vColor = make_float3(v.color.x, v.color.y, v.color.z); dist+= (1.0f-weight.x)*(1.0f-weight.y)*	   weight.z *v.sdf; colorFloat+= (1.0f-weight.x)*(1.0f-weight.y)*	   weight.z *vColor;
+		      v = hash.getVoxel(posDual+make_float3(oSet, oSet, 0.0f)); if(v.weight == 0) return false;		   vColor = make_float3(v.color.x, v.color.y, v.color.z); dist+=	   weight.x *	   weight.y *(1.0f-weight.z)*v.sdf; colorFloat+=	   weight.x *	   weight.y *(1.0f-weight.z)*vColor;
+		      v = hash.getVoxel(posDual+make_float3(0.0f, oSet, oSet)); if(v.weight == 0) return false;		   vColor = make_float3(v.color.x, v.color.y, v.color.z); dist+= (1.0f-weight.x)*	   weight.y *	   weight.z *v.sdf; colorFloat+= (1.0f-weight.x)*	   weight.y *	   weight.z *vColor;
+		      v = hash.getVoxel(posDual+make_float3(oSet, 0.0f, oSet)); if(v.weight == 0) return false;		   vColor = make_float3(v.color.x, v.color.y, v.color.z); dist+=	   weight.x *(1.0f-weight.y)*	   weight.z *v.sdf; colorFloat+=	   weight.x *(1.0f-weight.y)*	   weight.z *vColor;
+		      v = hash.getVoxel(posDual+make_float3(oSet, oSet, oSet)); if(v.weight == 0) return false;		   vColor = make_float3(v.color.x, v.color.y, v.color.z); dist+=	   weight.x *	   weight.y *	   weight.z *v.sdf; colorFloat+=	   weight.x *	   weight.y *	   weight.z *vColor;
+
+		color = make_uchar3(colorFloat.x, colorFloat.y, colorFloat.z);//v.color;
+		
+		return true;
+	}
+	//__device__
+	//bool trilinearInterpolationSimpleFastFast(const HashData& hash, const float3& pos, float& dist, uchar3& color) const {
+	//	const float oSet = c_hashParams.m_virtualVoxelSize;
+	//	const float3 posDual = pos-make_float3(oSet/2.0f, oSet/2.0f, oSet/2.0f);
+	//	float3 weight = frac(hash.worldToVirtualVoxelPosFloat(pos));
+
+	//	dist = 0.0f;
+	//	Voxel v = hash.getVoxel(posDual+make_float3(0.0f, 0.0f, 0.0f)); if(v.weight == 0) return false; dist+= (1.0f-weight.x)*(1.0f-weight.y)*(1.0f-weight.z)*v.sdf;
+	//	v = hash.getVoxel(posDual+make_float3(oSet, 0.0f, 0.0f)); if(v.weight == 0) return false;		dist+=	   weight.x *(1.0f-weight.y)*(1.0f-weight.z)*v.sdf;
+	//	v = hash.getVoxel(posDual+make_float3(0.0f, oSet, 0.0f)); if(v.weight == 0) return false;		dist+= (1.0f-weight.x)*	   weight.y *(1.0f-weight.z)*v.sdf;
+	//	v = hash.getVoxel(posDual+make_float3(0.0f, 0.0f, oSet)); if(v.weight == 0) return false;		dist+= (1.0f-weight.x)*(1.0f-weight.y)*	   weight.z *v.sdf;
+	//	v = hash.getVoxel(posDual+make_float3(oSet, oSet, 0.0f)); if(v.weight == 0) return false;		dist+=	   weight.x *	   weight.y *(1.0f-weight.z)*v.sdf;
+	//	v = hash.getVoxel(posDual+make_float3(0.0f, oSet, oSet)); if(v.weight == 0) return false;		dist+= (1.0f-weight.x)*	   weight.y *	   weight.z *v.sdf;
+	//	v = hash.getVoxel(posDual+make_float3(oSet, 0.0f, oSet)); if(v.weight == 0) return false;		dist+=	   weight.x *(1.0f-weight.y)*	   weight.z *v.sdf;
+	//	v = hash.getVoxel(posDual+make_float3(oSet, oSet, oSet)); if(v.weight == 0) return false;		dist+=	   weight.x *	   weight.y *	   weight.z *v.sdf;
+
+	//	color = v.color;
+
+	//	return true;
+	//}
+
+
+	__device__
+	float findIntersectionLinear(float tNear, float tFar, float dNear, float dFar) const
+	{
+		return tNear+(dNear/(dNear-dFar))*(tFar-tNear);
+	}
+	
+	static const unsigned int nIterationsBisection = 3;
+	
+	// d0 near, d1 far
+	__device__
+		bool findIntersectionBisection(const ftl::voxhash::HashData& hash, const float3& worldCamPos, const float3& worldDir, float d0, float r0, float d1, float r1, float& alpha, uchar3& color) const
+	{
+		float a = r0; float aDist = d0;
+		float b = r1; float bDist = d1;
+		float c = 0.0f;
+
+#pragma unroll 1
+		for(uint i = 0; i<nIterationsBisection; i++)
+		{
+			c = findIntersectionLinear(a, b, aDist, bDist);
+
+			float cDist;
+			if(!trilinearInterpolationSimpleFastFast(hash, worldCamPos+c*worldDir, cDist, color)) return false;
+
+			if(aDist*cDist > 0.0) { a = c; aDist = cDist; }
+			else { b = c; bDist = cDist; }
+		}
+
+		alpha = c;
+
+		return true;
+	}
+	
+	
+	__device__
+	float3 gradientForPoint(const ftl::voxhash::HashData& hash, const float3& pos) const
+	{
+		const float voxelSize = c_hashParams.m_virtualVoxelSize;
+		float3 offset = make_float3(voxelSize, voxelSize, voxelSize);
+
+		float distp00; uchar3 colorp00; trilinearInterpolationSimpleFastFast(hash, pos-make_float3(0.5f*offset.x, 0.0f, 0.0f), distp00, colorp00);
+		float dist0p0; uchar3 color0p0; trilinearInterpolationSimpleFastFast(hash, pos-make_float3(0.0f, 0.5f*offset.y, 0.0f), dist0p0, color0p0);
+		float dist00p; uchar3 color00p; trilinearInterpolationSimpleFastFast(hash, pos-make_float3(0.0f, 0.0f, 0.5f*offset.z), dist00p, color00p);
+
+		float dist100; uchar3 color100; trilinearInterpolationSimpleFastFast(hash, pos+make_float3(0.5f*offset.x, 0.0f, 0.0f), dist100, color100);
+		float dist010; uchar3 color010; trilinearInterpolationSimpleFastFast(hash, pos+make_float3(0.0f, 0.5f*offset.y, 0.0f), dist010, color010);
+		float dist001; uchar3 color001; trilinearInterpolationSimpleFastFast(hash, pos+make_float3(0.0f, 0.0f, 0.5f*offset.z), dist001, color001);
+
+		float3 grad = make_float3((distp00-dist100)/offset.x, (dist0p0-dist010)/offset.y, (dist00p-dist001)/offset.z);
+
+		float l = length(grad);
+		if(l == 0.0f) {
+			return make_float3(0.0f, 0.0f, 0.0f);
+		}
+
+		return -grad/l;
+	}
+
+	__device__
+	void traverseCoarseGridSimpleSampleAll(const ftl::voxhash::HashData& hash, const DepthCameraData& cameraData, const float3& worldCamPos, const float3& worldDir, const float3& camDir, const int3& dTid, float minInterval, float maxInterval) const
+	{
+		const RayCastParams& rayCastParams = c_rayCastParams;
+
+		// Last Sample
+		RayCastSample lastSample; lastSample.sdf = 0.0f; lastSample.alpha = 0.0f; lastSample.weight = 0; // lastSample.color = int3(0, 0, 0);
+		const float depthToRayLength = 1.0f/camDir.z; // scale factor to convert from depth to ray length
+		
+		float rayCurrent = depthToRayLength * max(rayCastParams.m_minDepth, minInterval);	// Convert depth to raylength
+		float rayEnd = depthToRayLength * min(rayCastParams.m_maxDepth, maxInterval);		// Convert depth to raylength
+		//float rayCurrent = depthToRayLength * rayCastParams.m_minDepth;	// Convert depth to raylength
+		//float rayEnd = depthToRayLength * rayCastParams.m_maxDepth;		// Convert depth to raylength
+
+		//printf("MINMAX: %f,%f\n", minInterval, maxInterval);
+
+#pragma unroll 1
+		while(rayCurrent < rayEnd)
+		{
+			float3 currentPosWorld = worldCamPos+rayCurrent*worldDir;
+			float dist;	uchar3 color;
+
+			//printf("RAY LOOP: %f,%f,%f\n", currentPosWorld.x, currentPosWorld.y, currentPosWorld.z);
+
+			if(trilinearInterpolationSimpleFastFast(hash, currentPosWorld, dist, color))
+			{
+				if(lastSample.weight > 0 && lastSample.sdf > 0.0f && dist < 0.0f) // current sample is always valid here 
+				{
+
+					float alpha; // = findIntersectionLinear(lastSample.alpha, rayCurrent, lastSample.sdf, dist);
+					uchar3 color2;
+					bool b = findIntersectionBisection(hash, worldCamPos, worldDir, lastSample.sdf, lastSample.alpha, dist, rayCurrent, alpha, color2);
+					
+					float3 currentIso = worldCamPos+alpha*worldDir;
+					if(b && abs(lastSample.sdf - dist) < rayCastParams.m_thresSampleDist)
+					{
+						if(abs(dist) < rayCastParams.m_thresDist)
+						{
+							float depth = alpha / depthToRayLength; // Convert ray length to depth depthToRayLength
+
+							d_depth[dTid.y*rayCastParams.m_width+dTid.x] = depth;
+							d_depth3[dTid.y*rayCastParams.m_width+dTid.x] = cameraData.kinectDepthToSkeleton(dTid.x, dTid.y, depth);
+							d_colors[dTid.y*rayCastParams.m_width+dTid.x] = make_uchar3(color2.x, color2.y, color2.z);
+
+							if(rayCastParams.m_useGradients)
+							{
+								float3 normal = -gradientForPoint(hash, currentIso);
+								float4 n = rayCastParams.m_viewMatrix * make_float4(normal, 0.0f);
+								d_normals[dTid.y*rayCastParams.m_width+dTid.x] = make_float4(n.x, n.y, n.z, 1.0f);
+							}
+
+							return;
+						}
+					}
+				}
+
+				lastSample.sdf = dist;
+				lastSample.alpha = rayCurrent;
+				// lastSample.color = color;
+				lastSample.weight = 1;
+				rayCurrent += rayCastParams.m_rayIncrement;
+			} else {
+				lastSample.weight = 0;
+				rayCurrent += rayCastParams.m_rayIncrement;
+			}
+
+			
+		}
+		
+	}
+
+#endif // __CUDACC__
+
+	union {
+	float*  d_depth;
+	int* d_depth_i;
+	};
+	float3* d_depth3;
+	float4* d_normals;
+	uchar3* d_colors;
+
+	//float4* d_vertexBuffer; // ray interval splatting triangles, mapped from directx (memory lives there)
+	float3 *point_cloud_;
+
+	cudaArray* d_rayIntervalSplatMinArray;
+	cudaArray* d_rayIntervalSplatMaxArray;
+};
\ No newline at end of file
--- a/applications/reconstruct/include/ftl/scene_rep_hash_sdf.hpp
+++ b/applications/reconstruct/include/ftl/scene_rep_hash_sdf.hpp
+// From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/CUDASceneRepHashSDF.h
+
+#pragma once
+
+#include <cuda_runtime.h>
+
+#include <ftl/configurable.hpp>
+#include <ftl/matrix_conversion.hpp>
+#include <ftl/voxel_hash.hpp>
+#include <ftl/depth_camera.hpp>
+#include <unordered_set>
+//#include "CUDAScan.h"
+// #include "CUDATimer.h"
+
+// #include "GlobalAppState.h"
+// #include "TimingLog.h"
+
+#define 	SAFE_DELETE_ARRAY(a)   { delete [] (a); (a) = NULL; }
+
+extern "C" void resetCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
+extern "C" void resetHashBucketMutexCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
+extern "C" void allocCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, const unsigned int* d_bitMask);
+extern "C" void fillDecisionArrayCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData);
+extern "C" void compactifyHashCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
+extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
+extern "C" void integrateDepthMapCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams);
+extern "C" void bindInputDepthColorTextures(const DepthCameraData& depthCameraData);
+
+extern "C" void starveVoxelsKernelCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
+extern "C" void garbageCollectIdentifyCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
+extern "C" void garbageCollectFreeCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
+
+namespace ftl {
+namespace voxhash {
+
+class SceneRep : public ftl::Configurable {
+	public:
+	SceneRep(nlohmann::json &config) : Configurable(config) {
+		REQUIRED({
+			{"hashNumBuckets", "Desired hash entries divide bucket size", "number"},
+			{"hashMaxCollisionLinkedListSize", "", "number"},
+			{"hashNumSDFBlocks", "", "number"},
+			{"SDFVoxelSize", "Size in meters of one voxel", "number"},
+			{"SDFMaxIntegrationDistance", "", "number"},
+			{"SDFTruncation", "Base error size", "number"},
+			{"SDFTruncationScale", "Error size scale with depth", "number"},
+			{"SDFIntegrationWeightSample", "", "number"},
+			{"SDFIntegrationWeightMax", "", "number"}
+		});
+		create(parametersFromConfig(config));
+	}
+	~SceneRep() {
+		destroy();
+	}
+
+	static HashParams parametersFromConfig(nlohmann::json &config) {
+		HashParams params;
+		// First camera view is set to identity pose to be at the centre of
+		// the virtual coordinate space.
+		params.m_rigidTransform.setIdentity();
+		params.m_rigidTransformInverse.setIdentity();
+		params.m_hashNumBuckets = config["hashNumBuckets"];
+		params.m_hashBucketSize = HASH_BUCKET_SIZE;
+		params.m_hashMaxCollisionLinkedListSize = config["hashMaxCollisionLinkedListSize"];
+		params.m_SDFBlockSize = SDF_BLOCK_SIZE;
+		params.m_numSDFBlocks = config["hashNumSDFBlocks"];
+		params.m_virtualVoxelSize = config["SDFVoxelSize"];
+		params.m_maxIntegrationDistance = config["SDFMaxIntegrationDistance"];
+		params.m_truncation = config["SDFTruncation"];
+		params.m_truncScale = config["SDFTruncationScale"];
+		params.m_integrationWeightSample = config["SDFIntegrationWeightSample"];
+		params.m_integrationWeightMax = config["SDFIntegrationWeightMax"];
+		// Note (Nick): We are not streaming voxels in/out of GPU
+		//params.m_streamingVoxelExtents = MatrixConversion::toCUDA(gas.s_streamingVoxelExtents);
+		//params.m_streamingGridDimensions = MatrixConversion::toCUDA(gas.s_streamingGridDimensions);
+		//params.m_streamingMinGridPos = MatrixConversion::toCUDA(gas.s_streamingMinGridPos);
+		//params.m_streamingInitialChunkListSize = gas.s_streamingInitialChunkListSize;
+		return params;
+	}
+
+	void bindDepthCameraTextures(const DepthCameraData& depthCameraData) {
+		//bindInputDepthColorTextures(depthCameraData);
+	}
+
+	/**
+	 * Note: lastRigidTransform appears to be the estimated camera pose.
+	 * Note: bitMask can be nullptr if not streaming out voxels from GPU
+	 */
+	void integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask) {
+		
+		setLastRigidTransform(lastRigidTransform);
+
+		//make the rigid transform available on the GPU
+		m_hashData.updateParams(m_hashParams);
+
+		//allocate all hash blocks which are corresponding to depth map entries
+		alloc(depthCameraData, depthCameraParams, d_bitMask);
+
+		//generate a linear hash array with only occupied entries
+		compactifyHashEntries(depthCameraData);
+
+		//volumetrically integrate the depth data into the depth SDFBlocks
+		integrateDepthMap(depthCameraData, depthCameraParams);
+
+		garbageCollect(depthCameraData);
+
+		m_numIntegratedFrames++;
+	}
+
+	void setLastRigidTransform(const Eigen::Matrix4f& lastRigidTransform) {
+		m_hashParams.m_rigidTransform = MatrixConversion::toCUDA(lastRigidTransform);
+		m_hashParams.m_rigidTransformInverse = m_hashParams.m_rigidTransform.getInverse();
+	}
+
+	void setLastRigidTransformAndCompactify(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData) {
+		setLastRigidTransform(lastRigidTransform);
+		compactifyHashEntries(depthCameraData);
+	}
+
+
+	const Eigen::Matrix4f getLastRigidTransform() const {
+		return MatrixConversion::toEigen(m_hashParams.m_rigidTransform);
+	}
+
+	/* Nick: To reduce weights between frames */
+	void nextFrame() {
+		starveVoxelsKernelCUDA(m_hashData, m_hashParams);
+		m_numIntegratedFrames = 0;
+	}
+
+	//! resets the hash to the initial state (i.e., clears all data)
+	void reset() {
+		m_numIntegratedFrames = 0;
+
+		m_hashParams.m_rigidTransform.setIdentity();
+		m_hashParams.m_rigidTransformInverse.setIdentity();
+		m_hashParams.m_numOccupiedBlocks = 0;
+		m_hashData.updateParams(m_hashParams);
+		resetCUDA(m_hashData, m_hashParams);
+	}
+
+
+	ftl::voxhash::HashData& getHashData() {
+		return m_hashData;
+	} 
+
+	const HashParams& getHashParams() const {
+		return m_hashParams;
+	}
+
+
+	//! debug only!
+	unsigned int getHeapFreeCount() {
+		unsigned int count;
+		cudaSafeCall(cudaMemcpy(&count, m_hashData.d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
+		return count+1;	//there is one more free than the address suggests (0 would be also a valid address)
+	}
+
+	//! debug only!
+	void debugHash() {
+		HashEntry* hashCPU = new HashEntry[m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets];
+		unsigned int* heapCPU = new unsigned int[m_hashParams.m_numSDFBlocks];
+		unsigned int heapCounterCPU;
+
+		cudaSafeCall(cudaMemcpy(&heapCounterCPU, m_hashData.d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
+		heapCounterCPU++;	//points to the first free entry: number of blocks is one more
+
+		cudaSafeCall(cudaMemcpy(heapCPU, m_hashData.d_heap, sizeof(unsigned int)*m_hashParams.m_numSDFBlocks, cudaMemcpyDeviceToHost));
+		cudaSafeCall(cudaMemcpy(hashCPU, m_hashData.d_hash, sizeof(HashEntry)*m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets, cudaMemcpyDeviceToHost));
+
+		//Check for duplicates
+		class myint3Voxel {
+		public:
+			myint3Voxel() {}
+			~myint3Voxel() {}
+			bool operator<(const myint3Voxel& other) const {
+				if (x == other.x) {
+					if (y == other.y) {
+						return z < other.z;
+					}
+					return y < other.y;
+				}
+				return x < other.x;
+			}
+
+			bool operator==(const myint3Voxel& other) const {
+				return x == other.x && y == other.y && z == other.z;
+			}
+
+			int x,y,z, i;
+			int offset;
+			int ptr;
+		}; 
+
+
+		std::unordered_set<unsigned int> pointersFreeHash;
+		std::vector<unsigned int> pointersFreeVec(m_hashParams.m_numSDFBlocks, 0);
+		for (unsigned int i = 0; i < heapCounterCPU; i++) {
+			pointersFreeHash.insert(heapCPU[i]);
+			pointersFreeVec[heapCPU[i]] = FREE_ENTRY;
+		}
+		if (pointersFreeHash.size() != heapCounterCPU) {
+			throw std::runtime_error("ERROR: duplicate free pointers in heap array");
+		}
+		 
+
+		unsigned int numOccupied = 0;
+		unsigned int numMinusOne = 0;
+		unsigned int listOverallFound = 0;
+
+		std::list<myint3Voxel> l;
+		//std::vector<myint3Voxel> v;
+		
+		for (unsigned int i = 0; i < m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets; i++) {
+			if (hashCPU[i].ptr == -1) {
+				numMinusOne++;
+			}
+
+			if (hashCPU[i].ptr != -2) {
+				numOccupied++;	// != FREE_ENTRY
+				myint3Voxel a;	
+				a.x = hashCPU[i].pos.x;
+				a.y = hashCPU[i].pos.y;
+				a.z = hashCPU[i].pos.z;
+				l.push_back(a);
+				//v.push_back(a);
+
+				unsigned int linearBlockSize = m_hashParams.m_SDFBlockSize*m_hashParams.m_SDFBlockSize*m_hashParams.m_SDFBlockSize;
+				if (pointersFreeHash.find(hashCPU[i].ptr / linearBlockSize) != pointersFreeHash.end()) {
+					throw std::runtime_error("ERROR: ptr is on free heap, but also marked as an allocated entry");
+				}
+				pointersFreeVec[hashCPU[i].ptr / linearBlockSize] = LOCK_ENTRY;
+			}
+		}
+
+		unsigned int numHeapFree = 0;
+		unsigned int numHeapOccupied = 0;
+		for (unsigned int i = 0; i < m_hashParams.m_numSDFBlocks; i++) {
+			if		(pointersFreeVec[i] == FREE_ENTRY) numHeapFree++;
+			else if (pointersFreeVec[i] == LOCK_ENTRY) numHeapOccupied++;
+			else {
+				throw std::runtime_error("memory leak detected: neither free nor allocated");
+			}
+		}
+		if (numHeapFree + numHeapOccupied == m_hashParams.m_numSDFBlocks) std::cout << "HEAP OK!" << std::endl;
+		else throw std::runtime_error("HEAP CORRUPTED");
+
+		l.sort();
+		size_t sizeBefore = l.size();
+		l.unique();
+		size_t sizeAfter = l.size();
+
+
+		std::cout << "diff: " << sizeBefore - sizeAfter << std::endl;
+		std::cout << "minOne: " << numMinusOne << std::endl;
+		std::cout << "numOccupied: " << numOccupied << "\t numFree: " << getHeapFreeCount() << std::endl;
+		std::cout << "numOccupied + free: " << numOccupied + getHeapFreeCount() << std::endl;
+		std::cout << "numInFrustum: " << m_hashParams.m_numOccupiedBlocks << std::endl;
+
+		SAFE_DELETE_ARRAY(heapCPU);
+		SAFE_DELETE_ARRAY(hashCPU);
+
+		//getchar();
+	}
+private:
+
+	void create(const HashParams& params) {
+		m_hashParams = params;
+		m_hashData.allocate(m_hashParams);
+
+		reset();
+	}
+
+	void destroy() {
+		m_hashData.free();
+	}
+
+	void alloc(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, const unsigned int* d_bitMask) {
+		//Start Timing
+		//if (GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.start(); }
+
+		// NOTE (nick): We might want this later...
+		if (true) {
+			//allocate until all blocks are allocated
+			unsigned int prevFree = getHeapFreeCount();
+			while (1) {
+				resetHashBucketMutexCUDA(m_hashData, m_hashParams);
+				allocCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams, d_bitMask);
+
+				unsigned int currFree = getHeapFreeCount();
+
+				if (prevFree != currFree) {
+					prevFree = currFree;
+				}
+				else {
+					break;
+				}
+			}
+		}
+		else {
+			//this version is faster, but it doesn't guarantee that all blocks are allocated (staggers alloc to the next frame)
+			resetHashBucketMutexCUDA(m_hashData, m_hashParams);
+			allocCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams, d_bitMask);
+		}
+
+
+
+
+		// Stop Timing
+		//if(GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.stop(); TimingLog::totalTimeAlloc += m_timer.getElapsedTimeMS(); TimingLog::countTimeAlloc++; }
+	}
+
+
+	void compactifyHashEntries(const DepthCameraData& depthCameraData) {
+		//Start Timing
+		//if(GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.start(); }
+
+		//CUDATimer t;
+
+		//t.startEvent("fillDecisionArray");
+		//fillDecisionArrayCUDA(m_hashData, m_hashParams, depthCameraData);
+		//t.endEvent();
+
+		//t.startEvent("prefixSum");
+		//m_hashParams.m_numOccupiedBlocks = 
+		//	m_cudaScan.prefixSum(
+		//		m_hashParams.m_hashNumBuckets*m_hashParams.m_hashBucketSize,
+		//		m_hashData.d_hashDecision,
+		//		m_hashData.d_hashDecisionPrefix);
+		//t.endEvent();
+
+		//t.startEvent("compactifyHash");
+		//m_hashData.updateParams(m_hashParams);	//make sure numOccupiedBlocks is updated on the GPU
+		//compactifyHashCUDA(m_hashData, m_hashParams);
+		//t.endEvent();
+
+		//t.startEvent("compactifyAllInOne");
+		m_hashParams.m_numOccupiedBlocks = compactifyHashAllInOneCUDA(m_hashData, m_hashParams);		//this version uses atomics over prefix sums, which has a much better performance
+		std::cout << "Occ blocks = " << m_hashParams.m_numOccupiedBlocks << std::endl;
+		m_hashData.updateParams(m_hashParams);	//make sure numOccupiedBlocks is updated on the GPU
+		//t.endEvent();
+		//t.evaluate();
+
+		// Stop Timing
+		//if(GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.stop(); TimingLog::totalTimeCompactifyHash += m_timer.getElapsedTimeMS(); TimingLog::countTimeCompactifyHash++; }
+
+		//std::cout << "numOccupiedBlocks: " << m_hashParams.m_numOccupiedBlocks << std::endl;
+	}
+
+	void integrateDepthMap(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams) {
+		//Start Timing
+		//if(GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.start(); }
+
+		integrateDepthMapCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams);
+
+		// Stop Timing
+		//if(GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.stop(); TimingLog::totalTimeIntegrate += m_timer.getElapsedTimeMS(); TimingLog::countTimeIntegrate++; }
+	}
+
+	void garbageCollect(const DepthCameraData& depthCameraData) {
+		//only perform if enabled by global app state
+		//if (GlobalAppState::get().s_garbageCollectionEnabled) {
+
+			garbageCollectIdentifyCUDA(m_hashData, m_hashParams);
+			resetHashBucketMutexCUDA(m_hashData, m_hashParams);	//needed if linked lists are enabled -> for memeory deletion
+			garbageCollectFreeCUDA(m_hashData, m_hashParams);
+		//} 
+	}
+
+
+
+	HashParams		m_hashParams;
+	ftl::voxhash::HashData		m_hashData;
+
+	//CUDAScan		m_cudaScan;
+	unsigned int	m_numIntegratedFrames;	//used for garbage collect
+
+	// static Timer m_timer;
+};
+
+};  // namespace voxhash
+};  // namespace ftl
--- a/applications/reconstruct/include/ftl/sdf_util.hpp
+++ b/applications/reconstruct/include/ftl/sdf_util.hpp
--- a/applications/reconstruct/include/ftl/voxel_hash.hpp
+++ b/applications/reconstruct/include/ftl/voxel_hash.hpp
--- a/applications/reconstruct/include/ftl/voxel_hash_params.hpp
+++ b/applications/reconstruct/include/ftl/voxel_hash_params.hpp
+// From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/CUDAHashParams.h
+
+#pragma once
+
+//#include <cutil_inline.h>
+//#include <cutil_math.h>
+#include <vector_types.h>
+#include <cuda_runtime.h>
+
+#include <ftl/cuda_matrix_util.hpp>
+
+namespace ftl {
+namespace voxhash {
+
+//TODO might have to be split into static and dynamics
+struct __align__(16) HashParams {
+	HashParams() {
+	}
+
+	float4x4		m_rigidTransform;
+	float4x4		m_rigidTransformInverse;
+
+	unsigned int	m_hashNumBuckets;
+	unsigned int	m_hashBucketSize;
+	unsigned int	m_hashMaxCollisionLinkedListSize;
+	unsigned int	m_numSDFBlocks;
+
+	int				m_SDFBlockSize;
+	float			m_virtualVoxelSize;
+	unsigned int	m_numOccupiedBlocks;	//occupied blocks in the viewing frustum
+	
+	float			m_maxIntegrationDistance;
+	float			m_truncScale;
+	float			m_truncation;
+	unsigned int	m_integrationWeightSample;
+	unsigned int	m_integrationWeightMax;
+
+	float3			m_streamingVoxelExtents;
+	int3			m_streamingGridDimensions;
+	int3			m_streamingMinGridPos;
+	unsigned int	m_streamingInitialChunkListSize;
+	uint2			m_dummy;
+
+};
+
+}  // namespace voxhash
+}  // namespace ftl
--- a/applications/reconstruct/src/camera_util.cu
+++ b/applications/reconstruct/src/camera_util.cu
--- a/applications/reconstruct/src/main.cpp
+++ b/applications/reconstruct/src/main.cpp
@@ -7,6 +7,9 @@
 #include <glog/logging.h>
 #include <ftl/config.h>
 #include <ftl/configuration.hpp>
+#include <ftl/depth_camera.hpp>
+#include <ftl/scene_rep_hash_sdf.hpp>
+#include <ftl/ray_cast_sdf.hpp>
 #include <ftl/rgbd.hpp>

 // #include <zlib.h>
@@ -88,14 +91,14 @@ PointCloud<PointXYZRGB>::Ptr ftl::rgbd::createPointCloud(RGBDSource *src) {
 	for(int i=0;i<rgb.rows;i++) {
 		const float *sptr = depth.ptr<float>(i);
 		for(int j=0;j<rgb.cols;j++) {
-			float d = sptr[j] * 1000.0f;
+			float d = sptr[j]; // * 1000.0f;

 			pcl::PointXYZRGB point;
 			point.x = (((double)j + CX) / FX) * d;
 			point.y = (((double)i + CY) / FY) * d;
 			point.z = d;

-			if (point.x == INFINITY || point.y == INFINITY || point.z > 20000.0f || point.z < 0.04f) {
+			if (point.x == INFINITY || point.y == INFINITY || point.z > 20.0f || point.z < 0.04f) {
 				point.x = 0.0f; point.y = 0.0f; point.z = 0.0f;
 			}

@@ -127,14 +130,14 @@ PointCloud<PointXYZRGB>::Ptr ftl::rgbd::createPointCloud(RGBDSource *src, const
 	for(int i=0;i<rgb.rows;i++) {
 		const float *sptr = depth.ptr<float>(i);
 		for(int j=0;j<rgb.cols;j++) {
-			float d = sptr[j] * 1000.0f;
+			float d = sptr[j]; // * 1000.0f;

 			pcl::PointXYZRGB point;
 			point.x = (((double)j + CX) / FX) * d;
 			point.y = (((double)i + CY) / FY) * d;
 			point.z = d;

-			if (point.x == INFINITY || point.y == INFINITY || point.z > 20000.0f || point.z < 0.04f) {
+			if (point.x == INFINITY || point.y == INFINITY || point.z > 20.0f || point.z < 0.04f) {
 				point.x = 0.0f; point.y = 0.0f; point.z = 0.0f;
 			}

@@ -220,8 +223,14 @@ void saveRegistration(std::map<string, Eigen::Matrix4f> registration) {
 	file << save;
 }

+struct Cameras {
+	RGBDSource *source;
+	DepthCameraData gpu;
+	DepthCameraParams params;
+};
+
 template <template<class> class Container>
-std::map<string, Eigen::Matrix4f> runRegistration(ftl::net::Universe &net, Container<RGBDSource*> &inputs) {
+std::map<string, Eigen::Matrix4f> runRegistration(ftl::net::Universe &net, Container<Cameras> &inputs) {
 	
 	std::map<string, Eigen::Matrix4f> registration;
 	
@@ -244,7 +253,7 @@ std::map<string, Eigen::Matrix4f> runRegistration(ftl::net::Universe &net, Conta
 	size_t ref_i;
 	bool found = false;
 	for (size_t i = 0; i < inputs.size(); i++) {
-		if (inputs[i]->getConfig()["uri"] == ref_uri) {
+		if (inputs[i].source->getConfig()["uri"] == ref_uri) {
 			ref_i = i;
 			found = true;
 			break;
@@ -254,7 +263,7 @@ std::map<string, Eigen::Matrix4f> runRegistration(ftl::net::Universe &net, Conta
 	if (!found) { LOG(ERROR) << "Reference input not found!"; return registration; }
 	
 	for (auto &input : inputs) { 
-		cv::namedWindow("Registration: " + (string)input->getConfig()["uri"], cv::WINDOW_KEEPRATIO|cv::WINDOW_NORMAL);
+		cv::namedWindow("Registration: " + (string)input.source->getConfig()["uri"], cv::WINDOW_KEEPRATIO|cv::WINDOW_NORMAL);
 	}
 	
 	// vector for every input: vector of point clouds of patterns
@@ -266,7 +275,7 @@ std::map<string, Eigen::Matrix4f> runRegistration(ftl::net::Universe &net, Conta
 		vector<PointCloud<PointXYZ>::Ptr> patterns_iter(inputs.size());
 		
 		for (size_t i = 0; i < inputs.size(); i++) {
-			RGBDSource *input = inputs[i];
+			RGBDSource *input = inputs[i].source;
 			Mat rgb, depth;
 			
 			input->getRGBD(rgb, depth);
@@ -289,7 +298,7 @@ std::map<string, Eigen::Matrix4f> runRegistration(ftl::net::Universe &net, Conta
 		else { LOG(WARNING) << "Pattern not detected on all inputs";}
 	}
 	
-	for (auto &input : inputs) { cv::destroyWindow("Registration: " + (string)input->getConfig()["uri"]); }
+	for (auto &input : inputs) { cv::destroyWindow("Registration: " + (string)input.source->getConfig()["uri"]); }
 	
 	for (size_t i = 0; i < inputs.size(); i++) {
 		Eigen::Matrix4f T;
@@ -299,13 +308,48 @@ std::map<string, Eigen::Matrix4f> runRegistration(ftl::net::Universe &net, Conta
 		else {
 			T = ftl::registration::findTransformation(patterns[i], patterns[ref_i]);
 		}
-		registration[(string)inputs[i]->getConfig()["uri"]] = T;
+		registration[(string)inputs[i].source->getConfig()["uri"]] = T;
 	}
 	saveRegistration(registration);
 	return registration;
 }
 #endif

+template<class T>
+Eigen::Matrix<T,4,4> lookAt
+(
+	Eigen::Matrix<T,3,1> const & eye,
+	Eigen::Matrix<T,3,1> const & center,
+	Eigen::Matrix<T,3,1> const & up
+)
+{
+	typedef Eigen::Matrix<T,4,4> Matrix4;
+	typedef Eigen::Matrix<T,3,1> Vector3;
+
+	Vector3 f = (center - eye).normalized();
+	Vector3 u = up.normalized();
+	Vector3 s = f.cross(u).normalized();
+	u = s.cross(f);
+
+	Matrix4 res;
+	res <<	s.x(),s.y(),s.z(),-s.dot(eye),
+			u.x(),u.y(),u.z(),-u.dot(eye),
+			-f.x(),-f.y(),-f.z(),f.dot(eye),
+			0,0,0,1;
+
+	return res;
+}
+
+using MouseAction = std::function<void(int, int, int, int)>;
+
+static void setMouseAction(const std::string& winName, const MouseAction &action)
+{
+  cv::setMouseCallback(winName,
+                       [] (int event, int x, int y, int flags, void* userdata) {
+    (*(MouseAction*)userdata)(event, x, y, flags);
+  }, (void*)&action);
+}
+
 static void run() {
 	Universe net(config["net"]);
 	
@@ -317,8 +361,8 @@ static void run() {
 		return;
 	}
 	
-	std::deque<RGBDSource*> inputs;
-	std::vector<Display> displays;
+	std::deque<Cameras> inputs;
+	//std::vector<Display> displays;

 	// TODO Allow for non-net source types
 	for (auto &src : config["sources"]) {		
@@ -326,9 +370,20 @@ static void run() {
 		if (!in) {
 			LOG(ERROR) << "Unrecognised source: " << src;
 		} else {
-			inputs.push_back(in);
-			displays.emplace_back(config["display"], src["uri"]);
-			LOG(INFO) << (string)src["uri"] << " loaded";
+			auto &cam = inputs.emplace_back();
+			cam.source = in;
+			cam.params.fx = in->getParameters().fx;
+			cam.params.fy = in->getParameters().fy;
+			cam.params.mx = -in->getParameters().cx;
+			cam.params.my = -in->getParameters().cy;
+			cam.params.m_imageWidth = in->getParameters().width;
+			cam.params.m_imageHeight = in->getParameters().height;
+			cam.params.m_sensorDepthWorldMax = in->getParameters().maxDepth;
+			cam.params.m_sensorDepthWorldMin = in->getParameters().minDepth;
+			cam.gpu.alloc(cam.params);
+			
+			//displays.emplace_back(config["display"], src["uri"]);
+			LOG(INFO) << (string)src["uri"] << " loaded " << cam.params.m_imageWidth;
 		}
 	}
 	
@@ -336,63 +391,126 @@ static void run() {
 	
 	// load point cloud transformations
 	
-#ifdef HAVE_PCL
 	std::map<string, Eigen::Matrix4f> registration;
 	if (config["registration"]["calibration"]["run"]) {
 		registration = runRegistration(net, inputs);
 	}
 	else {
+		LOG(INFO) << "LOAD REG";
 		registration = loadRegistration();
 	}
+
+	LOG(INFO) << "Assigning poses";
 	vector<Eigen::Matrix4f> T;
 	for (auto &input : inputs) {
-		Eigen::Matrix4f RT = (registration.count((string)input->getConfig()["uri"]) > 0) ? registration[(string)input->getConfig()["uri"]] : registration["default"];
+		LOG(INFO) << (unsigned long long)input.source;
+		Eigen::Matrix4f RT = (registration.count(input.source->getConfig()["uri"].get<string>()) > 0) ? registration[(string)input.source->getConfig()["uri"]] : registration["default"];
 		T.push_back(RT);
+		input.source->setPose(RT);
 	}
 	
-	//
-	vector<PointCloud<PointXYZRGB>::Ptr> clouds(inputs.size());
+	//vector<PointCloud<PointXYZRGB>::Ptr> clouds(inputs.size());
 	Display display_merged(config["display"], "Merged"); // todo
+	CUDARayCastSDF rays(config["voxelhash"]);
+	ftl::voxhash::SceneRep scene(config["voxelhash"]);
+
+	cv::Mat colour_array(cv::Size(rays.getRayCastParams().m_width,rays.getRayCastParams().m_height), CV_8UC3);
+
+	// Force window creation
+	display_merged.render(colour_array);
+	display_merged.wait(1);
+
+	unsigned char frameCount = 0;
+	bool paused = false;
+	float cam_x = 0.0f;
+	float cam_z = 0.0f;
+
+	Eigen::Vector3f eye(0.0f, 0.0f, 0.0f);
+	Eigen::Vector3f centre(0.0f, 0.0f, -4.0f);
+	Eigen::Vector3f up(0,1.0f,0);
+	Eigen::Vector3f lookPoint(0.0f,1.0f,-4.0f);
+	Eigen::Matrix4f viewPose;
+	float lerpSpeed = 0.4f;
+
+	// Keyboard camera controls
+	display_merged.onKey([&paused,&eye,&centre](int key) {
+		LOG(INFO) << "Key = " << key;
+		if (key == 32) paused = !paused;
+		else if (key == 81) eye[0] += 0.02f;
+		else if (key == 83) eye[0] -= 0.02f;
+		else if (key == 84) eye[2] += 0.02f;
+		else if (key == 82) eye[2] -= 0.02f;
+	});
+
+	// TODO(Nick) Calculate "camera" properties of viewport.
+	MouseAction mouseact = [&inputs,&lookPoint,&viewPose]( int event, int ux, int uy, int) {
+		LOG(INFO) << "Mouse " << ux << "," << uy;
+		if (event == 1) {   // click
+			const float x = ((float)ux-inputs[0].params.mx) / inputs[0].params.fx;
+			const float y = ((float)uy-inputs[0].params.my) / inputs[0].params.fy;
+			const float depth = -4.0f;
+			Eigen::Vector4f camPos(x*depth,y*depth,depth,1.0);
+			Eigen::Vector4f worldPos =  viewPose * camPos;
+			lookPoint = Eigen::Vector3f(worldPos[0],worldPos[1],worldPos[2]);
+		}
+	};
+	::setMouseAction("Image", mouseact);

-	int active = displays.size();
+	int active = inputs.size();
 	while (active > 0 && display_merged.active()) {
 		active = 0;

+		if (!paused) {
 			net.broadcast("grab");  // To sync cameras
-		
-		PointCloud<PointXYZRGB>::Ptr cloud(new PointCloud<PointXYZRGB>);
+			scene.nextFrame();
 		
 			for (size_t i = 0; i < inputs.size(); i++) {
+				//if (i == 1) continue;
 				//Display &display = displays[i];
-			RGBDSource *input = inputs[i];
+				RGBDSource *input = inputs[i].source;
 				Mat rgb, depth;
+				//LOG(INFO) << "GetRGB";
 				input->getRGBD(rgb,depth);
 				
 				//if (!display.active()) continue;
 				active += 1;

-			clouds[i] = ftl::rgbd::createPointCloud(input); //, (i==0) ? cv::Point3_<uchar>(255,0,0) : cv::Point3_<uchar>(0,0,255));
+				//clouds[i] = ftl::rgbd::createPointCloud(input);
 				
 				//display.render(rgb, depth,input->getParameters());
 				//display.render(clouds[i]);
 				//display.wait(5);
-		}

-		for (size_t i = 0; i < clouds.size(); i++) {
-			pcl::transformPointCloud(*clouds[i], *clouds[i], T[i]);
-			*cloud += *clouds[i];
+				//LOG(INFO) << "Data size: " << depth.cols << "," << depth.rows;
+				if (depth.cols == 0) continue;
+
+				Mat rgba;
+				cv::cvtColor(rgb,rgba, cv::COLOR_BGR2BGRA);
+
+				inputs[i].params.flags = frameCount;
+
+				// Send to GPU and merge view into scene
+				inputs[i].gpu.updateParams(inputs[i].params);
+				inputs[i].gpu.updateData(depth, rgba);
+				scene.integrate(inputs[i].source->getPose(), inputs[i].gpu, inputs[i].params, nullptr);
+			}
+		} else {
+			active = 1;
 		}

-		pcl::UniformSampling<PointXYZRGB> uniform_sampling;
-		uniform_sampling.setInputCloud(cloud);
-		uniform_sampling.setRadiusSearch(0.1f);
-		uniform_sampling.filter(*cloud);
+		frameCount++;
+
+		// Set virtual camera transformation matrix
+		//Eigen::Affine3f transform(Eigen::Translation3f(cam_x,0.0f,cam_z));
+		centre += (lookPoint - centre) * (lerpSpeed * 0.1f);
+		viewPose = lookAt<float>(eye,centre,up); // transform.matrix();

-		display_merged.render(cloud);
-		display_merged.wait(50);
+		// Call GPU renderer and download result into OpenCV mat
+		rays.render(scene.getHashData(), scene.getHashParams(), inputs[0].gpu, viewPose);
+		rays.getRayCastData().download(nullptr, (uchar3*)colour_array.data, rays.getRayCastParams());
+		display_merged.render(colour_array);
+		display_merged.wait(1);
 	}
-#endif
-	// TODO non-PCL (?)
 }

 int main(int argc, char **argv) {

--- a/applications/reconstruct/src/ray_cast_sdf.cpp
+++ b/applications/reconstruct/src/ray_cast_sdf.cpp
--- a/applications/reconstruct/src/ray_cast_sdf.cu
+++ b/applications/reconstruct/src/ray_cast_sdf.cu
--- a/applications/reconstruct/src/registration.cpp
+++ b/applications/reconstruct/src/registration.cpp
@@ -101,7 +101,7 @@ PointCloud<PointXYZ>::Ptr cornersToPointCloud(const vector<cv::Point2f> &corners
 	for (int i = 0; i < corners_len; i++) {
 		double x = corners[i].x;
 		double y = corners[i].y;
-		double d = disp.at<float>((int) y, (int) x) * 1000.0f; // todo: better estimation
+		double d = disp.at<float>((int) y, (int) x); // * 1000.0f; // todo: better estimation
 		
 		//cv::Vec4d homg_pt = Q_ * cv::Vec4d(x, y, d, 1.0);
 		//cv::Vec3d p = cv::Vec3d(homg_pt.val) / homg_pt[3];

--- a/applications/reconstruct/src/scene_rep_hash_sdf.cu
+++ b/applications/reconstruct/src/scene_rep_hash_sdf.cu