diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt
index 4db3fbf4285e8022c7af9f908d87ef1966e9ecd4..00e44a444ba45f6cb7b4f68ed2441a1c3ac4fd4c 100644
--- a/applications/reconstruct/CMakeLists.txt
+++ b/applications/reconstruct/CMakeLists.txt
@@ -9,15 +9,16 @@ set(REPSRC
 	src/compactors.cu
 	src/garbage.cu
 	src/integrators.cu
-	src/ray_cast_sdf.cu
+	#src/ray_cast_sdf.cu
 	src/splat_render.cu
 	src/camera_util.cu
 	src/voxel_hash.cu
 	src/voxel_hash.cpp
-	src/ray_cast_sdf.cpp
+	#src/ray_cast_sdf.cpp
 	src/registration.cpp
-	src/virtual_source.cpp
+	#src/virtual_source.cpp
 	src/splat_render.cpp
+	src/dibr.cu
 )
 
 add_executable(ftl-reconstruct ${REPSRC})
diff --git a/applications/reconstruct/include/ftl/cuda_operators.hpp b/applications/reconstruct/include/ftl/cuda_operators.hpp
index 21e109b89bdc98f6c504648eee0536875270c806..b7a3b44426bf6fcf490af67f38461ca849ade428 100644
--- a/applications/reconstruct/include/ftl/cuda_operators.hpp
+++ b/applications/reconstruct/include/ftl/cuda_operators.hpp
@@ -587,6 +587,14 @@ inline __host__ __device__ int3 make_int3(int i)
   int3 t; t.x = i; t.y = i; t.z = i; return t;
 }
 
+__device__ inline int3 make_int3(short3 s) {
+	return make_int3(s.x,s.y,s.z);
+}
+
+__device__ inline int3 make_int3(short4 s) {
+	return make_int3(s.x,s.y,s.z);
+}
+
 // negate
 inline __host__ __device__ int3 operator-(int3 &a)
 {
diff --git a/applications/reconstruct/include/ftl/depth_camera.hpp b/applications/reconstruct/include/ftl/depth_camera.hpp
index 641a9c251c7518f8b8bbe486ca2af32e008557d8..f40581b65cd71b9f239b2c0e82dfd616deb7ff28 100644
--- a/applications/reconstruct/include/ftl/depth_camera.hpp
+++ b/applications/reconstruct/include/ftl/depth_camera.hpp
@@ -12,19 +12,31 @@
 
 #include <ftl/depth_camera_params.hpp>
 
+#define MAX_CAMERAS 20
+
 
 //extern "C" void updateConstantDepthCameraParams(const DepthCameraParams& params);
 //extern __constant__ DepthCameraParams c_depthCameraParams;
 
+namespace ftl {
+namespace voxhash {
+
+struct __align__(16) DepthCameraCUDA {
+	cudaTextureObject_t depth;
+	cudaTextureObject_t colour;
+	DepthCameraParams params;
+	float4x4 pose;
+	float4x4 poseInverse;
+};
 
-struct DepthCameraData {
+struct DepthCamera {
 
 	///////////////
 	// Host part //
 	///////////////
 
-	__device__ __host__
-	DepthCameraData() {
+	__host__
+	DepthCamera() {
 		/*d_depthData = NULL;
 		d_colorData = NULL;
 		d_depthArray = NULL;
@@ -42,8 +54,9 @@ struct DepthCameraData {
 		colour_mat_ = new cv::cuda::GpuMat(params.m_imageHeight, params.m_imageWidth, CV_8UC4);
 		depth_tex_ = new ftl::cuda::TextureObject<float>((cv::cuda::PtrStepSz<float>)*depth_mat_);
 		colour_tex_ = new ftl::cuda::TextureObject<uchar4>((cv::cuda::PtrStepSz<uchar4>)*colour_mat_);
-		depth_obj_ = depth_tex_->cudaTexture();
-		colour_obj_ = colour_tex_->cudaTexture();
+		data.depth = depth_tex_->cudaTexture();
+		data.colour = colour_tex_->cudaTexture();
+		data.params = params;
 	}
 
 	//__host__
@@ -71,6 +84,10 @@ struct DepthCameraData {
 	cv::cuda::GpuMat *colour_mat_;
 	ftl::cuda::TextureObject<float> *depth_tex_;
 	ftl::cuda::TextureObject<uchar4> *colour_tex_;
-	cudaTextureObject_t depth_obj_;
-	cudaTextureObject_t colour_obj_;
+	//cudaTextureObject_t depth_obj_;
+	//cudaTextureObject_t colour_obj_;
+
+	DepthCameraCUDA data;
 };
+}
+}
diff --git a/applications/reconstruct/include/ftl/voxel_hash.hpp b/applications/reconstruct/include/ftl/voxel_hash.hpp
index 4a8e8ee8051705546a94afc035ef57c0415d1246..c0a228834337e60f5807313735eedee25eb51dcb 100644
--- a/applications/reconstruct/include/ftl/voxel_hash.hpp
+++ b/applications/reconstruct/include/ftl/voxel_hash.hpp
@@ -37,6 +37,7 @@ typedef signed char schar;
 #include <ftl/depth_camera.hpp>
 
 #define SDF_BLOCK_SIZE 8
+#define SDF_BLOCK_SIZE_OLAP 7
 
 #ifndef MINF
 #define MINF __int_as_float(0xff800000)
@@ -54,22 +55,39 @@ namespace voxhash {
 
 //status flags for hash entries
 static const int LOCK_ENTRY = -1;
-static const int FREE_ENTRY = -2;
+static const int FREE_ENTRY = -2147483648;
 static const int NO_OFFSET = 0;
 
+static const uint kFlagSurface = 0x00000001;
+
+struct __align__(16) HashEntryHead {
+	union {
+	short4 posXYZ;		// hash position (lower left corner of SDFBlock))
+	uint64_t pos;
+	};
+	int offset;	// offset for collisions
+	uint flags;
+};
+
 struct __align__(16) HashEntry 
 {
-	int3	pos;		//hash position (lower left corner of SDFBlock))
-	int		ptr;		//pointer into heap to SDFBlock
-	uint	offset;		//offset for collisions
-	uint	flags;		//for interframe checks (Nick)
+	HashEntryHead head;
+	uint voxels[16];  // 512 bits, 1 bit per voxel
 	
-	__device__ void operator=(const struct HashEntry& e) {
+	/*__device__ void operator=(const struct HashEntry& e) {
 		((long long*)this)[0] = ((const long long*)&e)[0];
 		((long long*)this)[1] = ((const long long*)&e)[1];
 		//((int*)this)[4] = ((const int*)&e)[4];
 		((long long*)this)[2] = ((const long long*)&e)[2];
-	}
+		((long long*)this)[2] = ((const long long*)&e)[3];
+		((long long*)this)[2] = ((const long long*)&e)[4];
+		((long long*)this)[2] = ((const long long*)&e)[5];
+		((long long*)this)[2] = ((const long long*)&e)[6];
+		((long long*)this)[2] = ((const long long*)&e)[7];
+		((long long*)this)[2] = ((const long long*)&e)[8];
+		((long long*)this)[2] = ((const long long*)&e)[9];
+		((long long*)this)[2] = ((const long long*)&e)[10];
+	}*/
 };
 
 struct __align__(8) Voxel {
@@ -95,14 +113,14 @@ struct HashData {
 
 	__device__ __host__
 	HashData() {
-		d_heap = NULL;
-		d_heapCounter = NULL;
+		//d_heap = NULL;
+		//d_heapCounter = NULL;
 		d_hash = NULL;
 		d_hashDecision = NULL;
 		d_hashDecisionPrefix = NULL;
 		d_hashCompactified = NULL;
 		d_hashCompactifiedCounter = NULL;
-		d_SDFBlocks = NULL;
+		//d_SDFBlocks = NULL;
 		d_hashBucketMutex = NULL;
 		m_bIsOnGPU = false;
 	}
@@ -166,18 +184,18 @@ struct HashData {
 		//out.color = 0.5f * (v0.color + v1.color);	//exponential running average 
 		
 
-		//float3 c0 = make_float3(v0.color.x, v0.color.y, v0.color.z);
-		//float3 c1 = make_float3(v1.color.x, v1.color.y, v1.color.z);
+		float3 c0 = make_float3(v0.color.x, v0.color.y, v0.color.z);
+		float3 c1 = make_float3(v1.color.x, v1.color.y, v1.color.z);
 
 		//float3 res = (c0.x+c0.y+c0.z == 0) ? c1 : 0.5f*c0 + 0.5f*c1;
 		//float3 res = (c0+c1)/2;
-		//float3 res = (c0 * (float)v0.weight + c1 * (float)v1.weight) / ((float)v0.weight + (float)v1.weight);
+		float3 res = (c0 * (float)v0.weight + c1 * (float)v1.weight) / ((float)v0.weight + (float)v1.weight);
 		//float3 res = c1;
 
-		//out.color.x = (uchar)(res.x+0.5f);	out.color.y = (uchar)(res.y+0.5f); out.color.z = (uchar)(res.z+0.5f);
+		out.color.x = (uchar)(res.x+0.5f);	out.color.y = (uchar)(res.y+0.5f); out.color.z = (uchar)(res.z+0.5f);
 		
 		// Nick: reduces colour flicker but not ideal..
-		out.color = v1.color;
+		//out.color = v1.color;
 
 		// Option 3 (Nick): Use colour with minimum SDF since it should be closest to surface.
 		// Results in stable but pixelated output
@@ -220,20 +238,20 @@ struct HashData {
 
 	__device__ 
 	int3 virtualVoxelPosToSDFBlock(int3 virtualVoxelPos) const {
-		if (virtualVoxelPos.x < 0) virtualVoxelPos.x -= SDF_BLOCK_SIZE-1;
-		if (virtualVoxelPos.y < 0) virtualVoxelPos.y -= SDF_BLOCK_SIZE-1;
-		if (virtualVoxelPos.z < 0) virtualVoxelPos.z -= SDF_BLOCK_SIZE-1;
+		if (virtualVoxelPos.x < 0) virtualVoxelPos.x -= SDF_BLOCK_SIZE_OLAP-1;
+		if (virtualVoxelPos.y < 0) virtualVoxelPos.y -= SDF_BLOCK_SIZE_OLAP-1;
+		if (virtualVoxelPos.z < 0) virtualVoxelPos.z -= SDF_BLOCK_SIZE_OLAP-1;
 
 		return make_int3(
-			virtualVoxelPos.x/SDF_BLOCK_SIZE,
-			virtualVoxelPos.y/SDF_BLOCK_SIZE,
-			virtualVoxelPos.z/SDF_BLOCK_SIZE);
+			virtualVoxelPos.x/SDF_BLOCK_SIZE_OLAP,
+			virtualVoxelPos.y/SDF_BLOCK_SIZE_OLAP,
+			virtualVoxelPos.z/SDF_BLOCK_SIZE_OLAP);
 	}
 
 	// Computes virtual voxel position of corner sample position
 	__device__ 
 	int3 SDFBlockToVirtualVoxelPos(const int3& sdfBlock) const	{
-		return sdfBlock*SDF_BLOCK_SIZE;
+		return sdfBlock*SDF_BLOCK_SIZE_OLAP;
 	}
 
 	__device__ 
@@ -300,7 +318,7 @@ struct HashData {
 
 		//! returns the hash entry for a given worldPos; if there was no hash entry the returned entry will have a ptr with FREE_ENTRY set
 	__device__ 
-	HashEntry getHashEntry(const float3& worldPos) const	{
+	int getHashEntry(const float3& worldPos) const	{
 		//int3 blockID = worldToSDFVirtualVoxelPos(worldPos)/SDF_BLOCK_SIZE;	//position of sdf block
 		int3 blockID = worldToSDFBlock(worldPos);
 		return getHashEntryForSDFBlockPos(blockID);
@@ -314,15 +332,15 @@ struct HashData {
 
 	__device__ 
 		void deleteHashEntry(HashEntry& hashEntry) {
-			hashEntry.pos = make_int3(0);
-			hashEntry.offset = 0;
-			hashEntry.ptr = FREE_ENTRY;
+			hashEntry.head.pos = 0;
+			hashEntry.head.offset = FREE_ENTRY;
+			for (int i=0; i<16; ++i) hashEntry.voxels[i] = 0;
 	}
 
 	__device__ 
 		bool voxelExists(const float3& worldPos) const	{
-			HashEntry hashEntry = getHashEntry(worldPos);
-			return (hashEntry.ptr != FREE_ENTRY);
+			int hashEntry = getHashEntry(worldPos);
+			return (hashEntry != -1);
 	}
 
 	__device__  
@@ -331,48 +349,44 @@ struct HashData {
 		v.weight = 0;
 		v.sdf = 0.0f;
 	}
-	__device__ 
-		void deleteVoxel(uint id) {
-			deleteVoxel(d_SDFBlocks[id]);
-	}
 
 
 	__device__ 
-	Voxel getVoxel(const float3& worldPos) const	{
-		HashEntry hashEntry = getHashEntry(worldPos);
-		Voxel v;
-		if (hashEntry.ptr == FREE_ENTRY) {
-			deleteVoxel(v);			
+	bool getVoxel(const float3& worldPos) const	{
+		int hashEntry = getHashEntry(worldPos);
+		if (hashEntry == -1) {
+			return false;		
 		} else {
 			int3 virtualVoxelPos = worldToVirtualVoxelPos(worldPos);
-			v = d_SDFBlocks[hashEntry.ptr + virtualVoxelPosToLocalSDFBlockIndex(virtualVoxelPos)];
+			int ix = virtualVoxelPosToLocalSDFBlockIndex(virtualVoxelPos);
+			return d_hash[hashEntry].voxels[ix/32] & (0x1 << (ix % 32));
 		}
-		return v;
 	}
 
 	__device__ 
-	Voxel getVoxel(const int3& virtualVoxelPos) const	{
-		HashEntry hashEntry = getHashEntryForSDFBlockPos(virtualVoxelPosToSDFBlock(virtualVoxelPos));
-		Voxel v;
-		if (hashEntry.ptr == FREE_ENTRY) {
-			deleteVoxel(v);			
+	bool getVoxel(const int3& virtualVoxelPos) const	{
+		int hashEntry = getHashEntryForSDFBlockPos(virtualVoxelPosToSDFBlock(virtualVoxelPos));
+		if (hashEntry == -1) {
+			return false;		
 		} else {
-			v = d_SDFBlocks[hashEntry.ptr + virtualVoxelPosToLocalSDFBlockIndex(virtualVoxelPos)];
+			int ix = virtualVoxelPosToLocalSDFBlockIndex(virtualVoxelPos);
+			return d_hash[hashEntry].voxels[ix >> 5] & (0x1 << (ix & 0x1F));
 		}
-		return v;
 	}
 	
-	__device__ 
-	void setVoxel(const int3& virtualVoxelPos, Voxel& voxelInput) const {
-		HashEntry hashEntry = getHashEntryForSDFBlockPos(virtualVoxelPosToSDFBlock(virtualVoxelPos));
-		if (hashEntry.ptr != FREE_ENTRY) {
+	/*__device__ 
+	void setVoxel(const int3& virtualVoxelPos, bool voxelInput) const {
+		int hashEntry = getHashEntryForSDFBlockPos(virtualVoxelPosToSDFBlock(virtualVoxelPos));
+		if (hashEntry == -1) {
 			d_SDFBlocks[hashEntry.ptr + virtualVoxelPosToLocalSDFBlockIndex(virtualVoxelPos)] = voxelInput;
+			int ix = virtualVoxelPosToLocalSDFBlockIndex(virtualVoxelPos);
+			d_hash[hashEntry].voxels[ix >> 5] |= (0x1 << (ix & 0x1F));
 		}
-	}
+	}*/
 
 	//! returns the hash entry for a given sdf block id; if there was no hash entry the returned entry will have a ptr with FREE_ENTRY set
 	__device__ 
-	HashEntry getHashEntryForSDFBlockPos(const int3& sdfBlock) const;
+	int getHashEntryForSDFBlockPos(const int3& sdfBlock) const;
 
 	//for histogram (no collision traversal)
 	__device__ 
@@ -383,22 +397,9 @@ struct HashData {
 	unsigned int getNumHashLinkedList(unsigned int bucketID);
 
 
-	__device__
-	uint consumeHeap() {
-		uint addr = atomicSub(&d_heapCounter[0], 1);
-		//TODO MATTHIAS check some error handling?
-		return d_heap[addr];
-	}
-	__device__
-	void appendHeap(uint ptr) {
-		uint addr = atomicAdd(&d_heapCounter[0], 1);
-		//TODO MATTHIAS check some error handling?
-		d_heap[addr+1] = ptr;
-	}
-
 	//pos in SDF block coordinates
 	__device__
-	void allocBlock(const int3& pos, const uchar frame);
+	void allocBlock(const int3& pos);
 
 	//!inserts a hash entry without allocating any memory: used by streaming: TODO MATTHIAS check the atomics in this function
 	__device__
@@ -410,14 +411,11 @@ struct HashData {
 
 #endif	//CUDACC
 
-	uint*		d_heap;						//heap that manages free memory
-	uint*		d_heapCounter;				//single element; used as an atomic counter (points to the next free block)
 	int*		d_hashDecision;				//
 	int*		d_hashDecisionPrefix;		//
 	HashEntry*	d_hash;						//hash that stores pointers to sdf blocks
-	HashEntry*	d_hashCompactified;			//same as before except that only valid pointers are there
+	HashEntry**	d_hashCompactified;			//same as before except that only valid pointers are there
 	int*		d_hashCompactifiedCounter;	//atomic counter to add compactified entries atomically 
-	Voxel*		d_SDFBlocks;				//sub-blocks that contain 8x8x8 voxels (linearized); are allocated by heap
 	int*		d_hashBucketMutex;			//binary flag per hash bucket; used for allocation to atomically lock a bucket
 
 	bool		m_bIsOnGPU;					//the class be be used on both cpu and gpu
diff --git a/applications/reconstruct/include/ftl/voxel_scene.hpp b/applications/reconstruct/include/ftl/voxel_scene.hpp
index e75a1b72429d642349281095712c83e3426bccbc..d594d479f1f35ffc9cece087b867be2f4b781ee0 100644
--- a/applications/reconstruct/include/ftl/voxel_scene.hpp
+++ b/applications/reconstruct/include/ftl/voxel_scene.hpp
@@ -17,7 +17,7 @@ namespace voxhash {
 
 struct Cameras {
 	ftl::rgbd::Source *source;
-	DepthCameraData gpu;
+	ftl::voxhash::DepthCamera gpu;
 	DepthCameraParams params;
 	cv::cuda::Stream stream;
 };
@@ -58,13 +58,14 @@ class SceneRep : public ftl::Configurable {
 	//! resets the hash to the initial state (i.e., clears all data)
 	void reset();
 
+	int cameraCount() const { return (int)cameras_.size(); }
+
 
 	ftl::voxhash::HashData& getHashData() { return m_hashData; } 
 
 	HashParams& getHashParams() { return m_hashParams; }
 
-	//! debug only!
-	unsigned int getHeapFreeCount();
+	unsigned int getOccupiedCount();
 
 	//! debug only!
 	void debugHash();
@@ -78,13 +79,14 @@ class SceneRep : public ftl::Configurable {
 	HashParams _parametersFromConfig();
 	void _create(const HashParams& params);
 	void _destroy();
-	void _alloc(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t);
+	void _alloc(int camid, cudaStream_t);
 	void _compactifyVisible(const DepthCameraParams &camera);
 	void _compactifyAllocated();
-	void _integrateDepthMap(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams);
+	//void _integrateDepthMap(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams);
+	void _integrateDepthMaps();
 	void _garbageCollect();
 
-
+	void _updateCameraConstant();
 
 	HashParams		m_hashParams;
 	ftl::voxhash::HashData		m_hashData;
diff --git a/applications/reconstruct/src/compactors.cu b/applications/reconstruct/src/compactors.cu
index 373a71fc241afab2925931e21f586ca5a75ed551..b7cdd5028f0f5ec78de47d8bf6f9099c5448a494 100644
--- a/applications/reconstruct/src/compactors.cu
+++ b/applications/reconstruct/src/compactors.cu
@@ -63,7 +63,7 @@ using ftl::voxhash::FREE_ENTRY;
 #endif
 }*/
 
-__global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams, DepthCameraParams camera)
+/*__global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams, DepthCameraParams camera)
 {
 	//const HashParams& hashParams = c_hashParams;
 	const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
@@ -122,7 +122,7 @@ void ftl::cuda::compactifyVisible(HashData& hashData, const HashParams& hashPara
 	//cutilCheckMsg(__FUNCTION__);
 #endif
 	//return res;
-}
+}*/
 
 __global__ void compactifyAllocatedKernel(HashData hashData)
 {
@@ -130,9 +130,9 @@ __global__ void compactifyAllocatedKernel(HashData hashData)
 	const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
 #ifdef COMPACTIFY_HASH_SIMPLE
 	if (idx < hashParams.m_hashNumBuckets) {
-		if (hashData.d_hash[idx].ptr != FREE_ENTRY) {
+		if (hashData.d_hash[idx].head.offset != FREE_ENTRY) {
 			int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1);
-			hashData.d_hashCompactified[addr] = hashData.d_hash[idx];
+			hashData.d_hashCompactified[addr] = &hashData.d_hash[idx];
 		}
 	}
 #else	
@@ -142,7 +142,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData)
 
 	int addrLocal = -1;
 	if (idx < hashParams.m_hashNumBuckets) {
-		if (hashData.d_hash[idx].ptr != FREE_ENTRY) {
+		if (hashData.d_hash[idx].head.offset != FREE_ENTRY) {
 			addrLocal = atomicAdd(&localCounter, 1);
 		}
 	}
@@ -157,7 +157,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData)
 
 	if (addrLocal != -1) {
 		const unsigned int addr = addrGlobal + addrLocal;
-		hashData.d_hashCompactified[addr] = hashData.d_hash[idx];
+		hashData.d_hashCompactified[addr] = &hashData.d_hash[idx];
 	}
 #endif
 }
@@ -178,3 +178,59 @@ void ftl::cuda::compactifyAllocated(HashData& hashData, const HashParams& hashPa
 #endif
 	//return res;
 }
+
+
+__global__ void compactifyOccupiedKernel(HashData hashData)
+{
+	const HashParams& hashParams = c_hashParams;
+	const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
+#ifdef COMPACTIFY_HASH_SIMPLE
+	if (idx < hashParams.m_hashNumBuckets) {
+		if (hashData.d_hash[idx].head.offset != FREE_ENTRY && hashData.d_hash[idx].head.flags & ftl::voxhash::kFlagSurface) {
+			int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1);
+			hashData.d_hashCompactified[addr] = &hashData.d_hash[idx];
+		}
+	}
+#else	
+	__shared__ int localCounter;
+	if (threadIdx.x == 0) localCounter = 0;
+	__syncthreads();
+
+	int addrLocal = -1;
+	if (idx < hashParams.m_hashNumBuckets) {
+		if (hashData.d_hash[idx].head.offset != FREE_ENTRY && (hashData.d_hash[idx].head.flags & ftl::voxhash::kFlagSurface)) {  // TODO:(Nick) Check voxels for all 0 or all 1
+			addrLocal = atomicAdd(&localCounter, 1);
+		}
+	}
+
+	__syncthreads();
+
+	__shared__ int addrGlobal;
+	if (threadIdx.x == 0 && localCounter > 0) {
+		addrGlobal = atomicAdd(hashData.d_hashCompactifiedCounter, localCounter);
+	}
+	__syncthreads();
+
+	if (addrLocal != -1) {
+		const unsigned int addr = addrGlobal + addrLocal;
+		hashData.d_hashCompactified[addr] = &hashData.d_hash[idx];
+	}
+#endif
+}
+
+void ftl::cuda::compactifyOccupied(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) {
+	const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK;
+	const dim3 gridSize((hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1);
+	const dim3 blockSize(threadsPerBlock, 1);
+
+	cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int), stream));
+	compactifyAllocatedKernel << <gridSize, blockSize, 0, stream >> >(hashData);
+	//unsigned int res = 0;
+	//cudaSafeCall(cudaMemcpyAsync(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream));
+
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	//cutilCheckMsg(__FUNCTION__);
+#endif
+	//return res;
+}
diff --git a/applications/reconstruct/src/compactors.hpp b/applications/reconstruct/src/compactors.hpp
index a633cfd22d3ec593c6fb9e3be06977d514eacd84..6c61985eea8448a078b8abe3e821992519c9425f 100644
--- a/applications/reconstruct/src/compactors.hpp
+++ b/applications/reconstruct/src/compactors.hpp
@@ -7,12 +7,13 @@ namespace ftl {
 namespace cuda {
 
 // Compact visible
-void compactifyVisible(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraParams &camera, cudaStream_t);
+//void compactifyVisible(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraParams &camera, cudaStream_t);
 
 // Compact allocated
 void compactifyAllocated(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t);
 
 // Compact visible surfaces
+void compactifyOccupied(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t stream);
 
 }
 }
diff --git a/applications/reconstruct/src/dibr.cu b/applications/reconstruct/src/dibr.cu
new file mode 100644
index 0000000000000000000000000000000000000000..575e3c45f9348c37c8397683db36d87edda677e4
--- /dev/null
+++ b/applications/reconstruct/src/dibr.cu
@@ -0,0 +1,118 @@
+#include "splat_render_cuda.hpp"
+#include <cuda_runtime.h>
+
+#include <ftl/cuda_matrix_util.hpp>
+
+#include "splat_params.hpp"
+#include <ftl/depth_camera.hpp>
+
+#define T_PER_BLOCK 8
+
+using ftl::cuda::TextureObject;
+using ftl::render::SplatParams;
+
+extern __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS];
+
+__global__ void clearColourKernel(TextureObject<uchar4> colour) {
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < colour.width() && y < colour.height()) {
+		//depth(x,y) = 0x7f800000; //PINF;
+		colour(x,y) = make_uchar4(76,76,82,0);
+	}
+}
+
+__global__ void dibr_kernel(
+    TextureObject<float> depth_in,
+    TextureObject<uchar4> colour_out, int numcams, SplatParams params) {
+
+    const int i = threadIdx.y*blockDim.y + threadIdx.x;
+    const int bx = blockIdx.x*blockDim.x;
+    const int by = blockIdx.y*blockDim.y;
+    const int x = bx + threadIdx.x;
+    const int y = by + threadIdx.y;
+    
+    for (int j=0; j<numcams; ++j) {
+        const ftl::voxhash::DepthCameraCUDA camera = c_cameras[j];
+	
+		float d = tex2D<float>(camera.depth, x, y);
+		if (d < 0.01f)	continue;
+        if (d >= params.camera.m_sensorDepthWorldMax) continue;
+        
+        const float3 worldPos = camera.pose * camera.params.kinectDepthToSkeleton(x, y, d);
+        
+        const float3 camPos = params.m_viewMatrix * worldPos;
+	    const float2 screenPosf = params.camera.cameraToKinectScreenFloat(camPos);
+	    const uint2 screenPos = make_uint2(make_int2(screenPosf));
+
+        if (camPos.z < params.camera.m_sensorDepthWorldMin) continue;
+
+        const unsigned int cx = screenPos.x;
+        const unsigned int cy = screenPos.y;
+
+
+        if (cx < colour_out.width() && cy < colour_out.height()) {
+            float camd = depth_in.tex2D((int)cx,(int)cy);
+            //atomicMin(&depth(x,y), idepth);
+            float camdiff = fabs(camPos.z-camd);
+            if (camdiff < 0.1f) {
+           		colour_out(cx,cy) = tex2D<uchar4>(camera.colour,x,y);
+            } else {
+				//colour_out(cx,cy) = make_uchar4(camdiff * 100, 0, 0, 255);
+			}
+        }
+    }
+}
+
+__global__ void dibr_kernel_rev(
+    TextureObject<float> depth_in,
+    TextureObject<uchar4> colour_out, int numcams, SplatParams params) {
+
+    const int i = threadIdx.y*blockDim.y + threadIdx.x;
+    const int bx = blockIdx.x*blockDim.x;
+    const int by = blockIdx.y*blockDim.y;
+    const int x = bx + threadIdx.x;
+	const int y = by + threadIdx.y;
+	
+	float camd = depth_in.tex2D((int)x,(int)y);
+	if (camd < 0.01f)	return;
+	if (camd >= params.camera.m_sensorDepthWorldMax) return;
+	
+	const float3 worldPos = params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x, y, camd);
+    
+    for (int j=0; j<numcams; ++j) {
+		const ftl::voxhash::DepthCameraCUDA camera = c_cameras[j];
+		
+		const float3 camPos = camera.poseInverse * worldPos;
+	    const float2 screenPosf = camera.params.cameraToKinectScreenFloat(camPos);
+		const uint2 screenPos = make_uint2(make_int2(screenPosf));
+		
+		if (camPos.z < params.camera.m_sensorDepthWorldMin) continue;
+
+		const unsigned int cx = screenPos.x;
+        const unsigned int cy = screenPos.y;
+
+        if (cx < camera.params.m_imageWidth && cy < camera.params.m_imageHeight) {
+			float d = tex2D<float>(camera.depth, (int)cx, (int)cy);
+			float camdiff = fabs(camPos.z-d);
+            if (camdiff < 0.1f) {
+            	colour_out(x,y) = tex2D<uchar4>(camera.colour,cx,cy);
+            } else {
+				//colour_out(x,y) = make_uchar4(camdiff * 100, 0, 0, 255);
+			}
+		}
+    }
+}
+
+void ftl::cuda::dibr(const TextureObject<float> &depth_in,
+    const TextureObject<uchar4> &colour_out, int numcams, const SplatParams &params, cudaStream_t stream) {
+
+    const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    clearColourKernel<<<gridSize, blockSize, 0, stream>>>(colour_out);
+
+    dibr_kernel_rev<<<gridSize, blockSize, 0, stream>>>(depth_in, colour_out, numcams, params);
+    cudaSafeCall( cudaGetLastError() );
+}
diff --git a/applications/reconstruct/src/garbage.cu b/applications/reconstruct/src/garbage.cu
index b6226e58826b4ba0ef594a87a8201562209df32d..b685e9e6b7d94434ff425eff268699a715261522 100644
--- a/applications/reconstruct/src/garbage.cu
+++ b/applications/reconstruct/src/garbage.cu
@@ -6,7 +6,7 @@ using namespace ftl::voxhash;
 #define T_PER_BLOCK 8
 #define NUM_CUDA_BLOCKS	10000
 
-__global__ void starveVoxelsKernel(HashData hashData) {
+/*__global__ void starveVoxelsKernel(HashData hashData) {
 	int ptr;
 
 	// Stride over all allocated blocks
@@ -32,21 +32,26 @@ void ftl::cuda::starveVoxels(HashData& hashData, const HashParams& hashParams, c
 	cudaSafeCall(cudaDeviceSynchronize());
 	//cutilCheckMsg(__FUNCTION__);
 #endif
-}
+}*/
+
+#define ENTRIES_PER_BLOCK 4
 
 __global__ void clearVoxelsKernel(HashData hashData) {
+	const int lane = threadIdx.x % 16;
+	const int halfWarp = threadIdx.x / 16;
 
 	// Stride over all allocated blocks
-	for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) {
+	for (int bi=blockIdx.x+halfWarp; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS*ENTRIES_PER_BLOCK) {
 
-	const HashEntry& entry = hashData.d_hashCompactified[bi];	
-	hashData.d_SDFBlocks[entry.ptr + threadIdx.x].weight = 0;
+	HashEntry *entry = hashData.d_hashCompactified[bi];	
+	//hashData.d_SDFBlocks[entry.ptr + threadIdx.x].weight = 0;
+	entry->voxels[lane] = 0;
 
 	}
 }
 
 void ftl::cuda::clearVoxels(HashData& hashData, const HashParams& hashParams) {
-	const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE;
+	const unsigned int threadsPerBlock = 16 * ENTRIES_PER_BLOCK;
 	const dim3 gridSize(NUM_CUDA_BLOCKS, 1);
 	const dim3 blockSize(threadsPerBlock, 1);
 
@@ -54,65 +59,20 @@ void ftl::cuda::clearVoxels(HashData& hashData, const HashParams& hashParams) {
 }
 
 
-__shared__ float	shared_MinSDF[SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE / 2];
-__shared__ uint		shared_MaxWeight[SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE / 2];
-
-
 __global__ void garbageCollectIdentifyKernel(HashData hashData) {
+	const int lane = threadIdx.x % 16;
+	const int halfWarp = threadIdx.x / 16;
 
 	// Stride over all allocated blocks
-	for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) {
-
-	const HashEntry& entry = hashData.d_hashCompactified[bi];
-
-	// Entire block was not touched in this frame, so remove (Nick)
-	/*if (entry.flags != cameraParams.flags & 0xFF) {
-		hashData.d_hashDecision[hashIdx] = 1;
-		return;
-	}*/
-	
-	//uint h = hashData.computeHashPos(entry.pos);
-	//hashData.d_hashDecision[hashIdx] = 1;
-	//if (hashData.d_hashBucketMutex[h] == LOCK_ENTRY)	return;
-
-	//if (entry.ptr == FREE_ENTRY) return; //should never happen since we did compactify before
-	//const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE;
+	for (int bi=blockIdx.x+halfWarp; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS * ENTRIES_PER_BLOCK) {
 
-	const unsigned int idx0 = entry.ptr + 2*threadIdx.x+0;
-	const unsigned int idx1 = entry.ptr + 2*threadIdx.x+1;
+	const HashEntry *entry = hashData.d_hashCompactified[bi];
 
-	Voxel v0 = hashData.d_SDFBlocks[idx0];
-	Voxel v1 = hashData.d_SDFBlocks[idx1];
+	const uint v = entry->voxels[lane];
+	const uint mask = (halfWarp & 0x1) ? 0xFFFF0000 : 0x0000FFFF;
+	uint ballot_result = __ballot_sync(mask, v == 0 || v == 0xFFFFFFFF);
 
-	if (v0.weight == 0)	v0.sdf = PINF;
-	if (v1.weight == 0)	v1.sdf = PINF;
-
-	shared_MinSDF[threadIdx.x] = min(fabsf(v0.sdf), fabsf(v1.sdf));	//init shared memory
-	shared_MaxWeight[threadIdx.x] = max(v0.weight, v1.weight);
-		
-#pragma unroll 1
-	for (uint stride = 2; stride <= blockDim.x; stride <<= 1) {
-		__syncthreads();
-		if ((threadIdx.x  & (stride-1)) == (stride-1)) {
-			shared_MinSDF[threadIdx.x] = min(shared_MinSDF[threadIdx.x-stride/2], shared_MinSDF[threadIdx.x]);
-			shared_MaxWeight[threadIdx.x] = max(shared_MaxWeight[threadIdx.x-stride/2], shared_MaxWeight[threadIdx.x]);
-		}
-	}
-
-	__syncthreads();
-
-	if (threadIdx.x == blockDim.x - 1) {
-		float minSDF = shared_MinSDF[threadIdx.x];
-		uint maxWeight = shared_MaxWeight[threadIdx.x];
-
-		float t = hashData.getTruncation(5.0f); // NICK should not be hardcoded	//MATTHIAS TODO check whether this is a reasonable metric
-
-		if (minSDF >= t || maxWeight == 0) {
-			hashData.d_hashDecision[bi] = 1;
-		} else {
-			hashData.d_hashDecision[bi] = 0; 
-		}
-	}
+	if (lane == 0) hashData.d_hashDecision[bi] = (ballot_result == mask) ? 1 : 0;
 
 	}
 }
@@ -138,18 +98,20 @@ __global__ void garbageCollectFreeKernel(HashData hashData) {
 	// Stride over all allocated blocks
 	for (int bi=blockIdx.x*blockDim.x + threadIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS*blockDim.x) {
 
-	if (hashData.d_hashDecision[bi] != 0) {	//decision to delete the hash entry
+	HashEntry *entry = hashData.d_hashCompactified[bi];
 
-		const HashEntry& entry = hashData.d_hashCompactified[bi];
-		//if (entry.ptr == FREE_ENTRY) return; //should never happen since we did compactify before
+	if ((entry->head.flags & ftl::voxhash::kFlagSurface) == 0) {	//decision to delete the hash entry
+
+		
+		//if (entry->head.offset == FREE_ENTRY) return; //should never happen since we did compactify before
 
-		if (hashData.deleteHashEntryElement(entry.pos)) {	//delete hash entry from hash (and performs heap append)
-			const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE;
+		int3 posI3 = make_int3(entry->head.posXYZ.x, entry->head.posXYZ.y, entry->head.posXYZ.z);
 
-			#pragma unroll 1
-			for (uint i = 0; i < linBlockSize; i++) {	//clear sdf block: CHECK TODO another kernel?
-				hashData.deleteVoxel(entry.ptr + i);
-			}
+		if (hashData.deleteHashEntryElement(posI3)) {	//delete hash entry from hash (and performs heap append)
+			//#pragma unroll
+			//for (uint i = 0; i < 16; i++) {	//clear sdf block: CHECK TODO another kernel?
+			//	entry->voxels[i] = 0;
+			//}
 		}
 	}
 
diff --git a/applications/reconstruct/src/integrators.cu b/applications/reconstruct/src/integrators.cu
index d34c021254a66b956ff612e4b646a5be7bd19d9e..1259210b4ea1b292f1adf00dfd0517bf04734c40 100644
--- a/applications/reconstruct/src/integrators.cu
+++ b/applications/reconstruct/src/integrators.cu
@@ -3,6 +3,8 @@
 #include <vector_types.h>
 #include <cuda_runtime.h>
 #include <ftl/cuda_matrix_util.hpp>
+#include <ftl/cuda_util.hpp>
+#include <ftl/cuda_common.hpp>
 
 #define T_PER_BLOCK 8
 
@@ -10,6 +12,7 @@ using ftl::voxhash::HashData;
 using ftl::voxhash::HashParams;
 using ftl::voxhash::Voxel;
 using ftl::voxhash::HashEntry;
+using ftl::voxhash::HashEntryHead;
 using ftl::voxhash::FREE_ENTRY;
 
 __device__ float4 make_float4(uchar4 c) {
@@ -58,21 +61,26 @@ __device__ float colourDistance(const uchar4 &c1, const uchar3 &c2) {
 	return x*x + y*y + z*z;
 }
 
+__device__ bool colordiff(const uchar4 &pa, const uchar3 &pb, float epsilon) {
+	float x_2 = pb.x * pb.x + pb.y * pb.y + pb.z * pb.z;
+	float v_2 = pa.x * pa.x + pa.y * pa.y + pa.z * pa.z;
+	float xv_2 = pow(pb.x * pa.x + pb.y * pa.y + pb.z * pa.z, 2);
+	float p_2 = xv_2 / v_2;
+	return sqrt(x_2 - p_2) < epsilon;
+}
+
 #define NUM_CUDA_BLOCKS		10000
 
-__global__ void integrateDepthMapKernel(HashData hashData, HashParams hashParams, DepthCameraParams cameraParams, cudaTextureObject_t depthT, cudaTextureObject_t colourT) {
-	//const HashParams& hashParams = c_hashParams;
-	//const DepthCameraParams& cameraParams = c_depthCameraParams;
+
+
+/*__global__ void integrateRegistrationKernel(HashData hashData, HashParams hashParams, DepthCameraParams cameraParams, cudaTextureObject_t depthT, cudaTextureObject_t colourT) {
 
 	// Stride over all allocated blocks
 	for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) {
 
 	//TODO check if we should load this in shared memory
 	HashEntry& entry = hashData.d_hashCompactified[bi];
-	//if (entry.ptr == FREE_ENTRY) {
-	//	printf("invliad integrate");
-	//	return; //should never happen since we did the compactification before
-	//}
+
 
 	int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(entry.pos);
 
@@ -91,13 +99,7 @@ __global__ void integrateDepthMapKernel(HashData hashData, HashParams hashParams
 		//if (depth > 20.0f) return;
 
 		uchar4 color  = make_uchar4(0, 0, 0, 0);
-		//if (cameraData.d_colorData) {
-			color = tex2D<uchar4>(colourT, screenPos.x, screenPos.y);
-			//color = bilinearFilterColor(cameraData.cameraToKinectScreenFloat(pf));
-		//}
-
-		//printf("screen pos %d\n", color.x);
-		//return;
+		color = tex2D<uchar4>(colourT, screenPos.x, screenPos.y);
 
 		// Depth is within accepted max distance from camera
 		if (depth > 0.01f && depth < hashParams.m_maxIntegrationDistance) { // valid depth and color (Nick: removed colour check)
@@ -107,165 +109,180 @@ __global__ void integrateDepthMapKernel(HashData hashData, HashParams hashParams
 			float sdf = depth - pf.z;
 			float truncation = hashData.getTruncation(depth);
 
-			// Is this voxel close enough to cam for depth map value
-			// CHECK Nick: If is too close then free space violation so remove?
-			// This isn't enough if the disparity has occlusions that don't cause violations
-			// Could RGB changes also cause removals if depth can't be confirmed?
-			/*if (sdf > truncation) {
-				uint idx = entry.ptr + i;
-				hashData.d_SDFBlocks[idx].weight = 0;
-				//hashData.d_SDFBlocks[idx].sdf = PINF;
-				hashData.d_SDFBlocks[idx].color = make_uchar3(0,0,0);
-			}*/
-			if (sdf > -truncation) // && depthZeroOne >= 0.0f && depthZeroOne <= 1.0f) //check if in truncation range should already be made in depth map computation
-			{
-				/*if (sdf >= 0.0f) {
-					sdf = fminf(truncation, sdf);
-				} else {
-					sdf = fmaxf(-truncation, sdf);
-				}*/
-
-
-				//printf("SDF: %f\n", sdf);
-				//float weightUpdate = g_WeightSample;
-				//weightUpdate = (1-depthZeroOne)*5.0f + depthZeroOne*0.05f;
-				//weightUpdate *= g_WeightSample;
+			if (sdf > -truncation) {
 				float weightUpdate = max(hashParams.m_integrationWeightSample * 1.5f * (1.0f-depthZeroOne), 1.0f);
 
 				Voxel curr;	//construct current voxel
 				curr.sdf = sdf;
 				curr.weight = weightUpdate;
 				curr.color = make_uchar3(color.x, color.y, color.z);
-				
 
 				uint idx = entry.ptr + i;
-
-				//if (entry.flags != cameraParams.flags & 0xFF) {
-				//	entry.flags = cameraParams.flags & 0xFF;
-					//hashData.d_SDFBlocks[idx].color = make_uchar3(0,0,0);
-				//}
 				
-				Voxel newVoxel;
-				//if (color.x == MINF) hashData.combineVoxelDepthOnly(hashData.d_SDFBlocks[idx], curr, newVoxel);
-				//else hashData.combineVoxel(hashData.d_SDFBlocks[idx], curr, newVoxel);
-				hashData.combineVoxel(hashData.d_SDFBlocks[idx], curr, newVoxel);
+				Voxel out;
+				const Voxel &v1 = curr;
+				const Voxel &v0 = hashData.d_SDFBlocks[idx];
 
-				hashData.d_SDFBlocks[idx] = newVoxel;
+				float redshift = (v0.weight > 0) ? 1.0f - ((v1.sdf - v0.sdf) / hashParams.m_truncation)*0.5f : 1.0f;
+
+				out.color.x = min(max(v1.color.x*redshift,0.0f),255.0f);
+				out.color.y = min(max(v1.color.y*redshift,0.0f),255.0f);
+				out.color.z = min(max(v1.color.z*(1.0f / redshift),0.0f),255.0f);
+
+				out.sdf = (v0.sdf * (float)v0.weight + v1.sdf * (float)v1.weight) / ((float)v0.weight + (float)v1.weight);
+				out.weight = min(c_hashParams.m_integrationWeightMax, (unsigned int)v0.weight + (unsigned int)v1.weight);
+
+				hashData.d_SDFBlocks[idx] = out;
 
-				//Voxel prev = getVoxel(g_SDFBlocksSDFUAV, g_SDFBlocksRGBWUAV, idx);
-				//Voxel newVoxel = combineVoxel(curr, prev);
-				//setVoxel(g_SDFBlocksSDFUAV, g_SDFBlocksRGBWUAV, idx, newVoxel);
 			}
-		} else {
-			// Depth is invalid so what to do here?
-			// TODO(Nick) Use past voxel if available (set weight from 0 to 1)
-
-			//uint idx = entry.ptr + i;
-			//float coldist = colourDistance(color, hashData.d_SDFBlocks[idx].color);
-			//if ((depth > 39.99f || depth < 0.01f) && coldist > 100.0f) {
-				//hashData.d_SDFBlocks[idx].color = make_uchar3(0,0,(uchar)(coldist));
-			//	hashData.d_SDFBlocks[idx].weight = hashData.d_SDFBlocks[idx].weight >> 1;
-			//}
 		}
 	}
 
-	}
-}
-
-
-void ftl::cuda::integrateDepthMap(HashData& hashData, const HashParams& hashParams,
-		const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t stream) {
-	const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE;
-	const dim3 gridSize(NUM_CUDA_BLOCKS, 1);
-	const dim3 blockSize(threadsPerBlock, 1);
+	}  // Stride loop
+}*/
 
-	//if (hashParams.m_numOccupiedBlocks > 0) {	//this guard is important if there is no depth in the current frame (i.e., no blocks were allocated)
-		integrateDepthMapKernel << <gridSize, blockSize, 0, stream >> >(hashData, hashParams, depthCameraParams, depthCameraData.depth_obj_, depthCameraData.colour_obj_);
-	//}
+extern __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS];
 
-	//cudaSafeCall( cudaGetLastError() );
-#ifdef _DEBUG
-	cudaSafeCall(cudaDeviceSynchronize());
-	//cutilCheckMsg(__FUNCTION__);
-#endif
-}
+#define WARP_SIZE 32
 
+__global__ void integrateDepthMapsKernel(HashData hashData, HashParams hashParams, int numcams) {
+	__shared__ uint all_warp_ballot;
+	__shared__ uint voxels[16];
 
-__global__ void integrateRegistrationKernel(HashData hashData, HashParams hashParams, DepthCameraParams cameraParams, cudaTextureObject_t depthT, cudaTextureObject_t colourT) {
+	const uint i = threadIdx.x;	//inside of an SDF block
+	const int3 po = make_int3(hashData.delinearizeVoxelIndex(i));
 
 	// Stride over all allocated blocks
 	for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) {
 
 	//TODO check if we should load this in shared memory
-	HashEntry& entry = hashData.d_hashCompactified[bi];
+	//HashEntryHead entry = hashData.d_hashCompactified[bi]->head;
 
+	int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(make_int3(hashData.d_hashCompactified[bi]->head.posXYZ));
 
-	int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(entry.pos);
+	//uint idx = entry.offset + i;
+	int3 pi = pi_base + po;
+	float3 pfb = hashData.virtualVoxelPosToWorld(pi);
+	int count = 0;
+	//float camdepths[MAX_CAMERAS];
 
-	uint i = threadIdx.x;	//inside of an SDF block
-	int3 pi = pi_base + make_int3(hashData.delinearizeVoxelIndex(i));
-	float3 pf = hashData.virtualVoxelPosToWorld(pi);
+	Voxel oldVoxel; // = hashData.d_SDFBlocks[idx];
+	hashData.deleteVoxel(oldVoxel);
 
-	pf = hashParams.m_rigidTransformInverse * pf;
-	uint2 screenPos = make_uint2(cameraParams.cameraToKinectScreenInt(pf));
+	for (uint cam=0; cam<numcams; ++cam) {
+		const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
+	
+		float3 pf = camera.poseInverse * pfb;
+		uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf));
 
-	// For this voxel in hash, get its screen position and check it is on screen
-	if (screenPos.x < cameraParams.m_imageWidth && screenPos.y < cameraParams.m_imageHeight) {	//on screen
+		// For this voxel in hash, get its screen position and check it is on screen
+		if (screenPos.x < camera.params.m_imageWidth && screenPos.y < camera.params.m_imageHeight) {	//on screen
 
-		//float depth = g_InputDepth[screenPos];
-		float depth = tex2D<float>(depthT, screenPos.x, screenPos.y);
-		//if (depth > 20.0f) return;
-
-		uchar4 color  = make_uchar4(0, 0, 0, 0);
-		color = tex2D<uchar4>(colourT, screenPos.x, screenPos.y);
-
-		// Depth is within accepted max distance from camera
-		if (depth > 0.01f && depth < hashParams.m_maxIntegrationDistance) { // valid depth and color (Nick: removed colour check)
-			float depthZeroOne = cameraParams.cameraToKinectProjZ(depth);
-
-			// Calculate SDF of this voxel wrt the depth map value
-			float sdf = depth - pf.z;
-			float truncation = hashData.getTruncation(depth);
-
-			if (sdf > -truncation) {
-				float weightUpdate = max(hashParams.m_integrationWeightSample * 1.5f * (1.0f-depthZeroOne), 1.0f);
+			//float depth = g_InputDepth[screenPos];
+			float depth = tex2D<float>(camera.depth, screenPos.x, screenPos.y);
+			//if (depth > 20.0f) return;
 
-				Voxel curr;	//construct current voxel
-				curr.sdf = sdf;
-				curr.weight = weightUpdate;
-				curr.color = make_uchar3(color.x, color.y, color.z);
-
-				uint idx = entry.ptr + i;
-				
-				Voxel out;
-				const Voxel &v1 = curr;
-				const Voxel &v0 = hashData.d_SDFBlocks[idx];
+			//uchar4 color  = make_uchar4(0, 0, 0, 0);
+			//if (cameraData.d_colorData) {
+				//color = (cam == 0) ? make_uchar4(255,0,0,255) : make_uchar4(0,0,255,255);
+				//color = tex2D<uchar4>(camera.colour, screenPos.x, screenPos.y);
+				//color = bilinearFilterColor(cameraData.cameraToKinectScreenFloat(pf));
+			//}
 
-				float redshift = (v0.weight > 0) ? 1.0f - ((v1.sdf - v0.sdf) / hashParams.m_truncation)*0.5f : 1.0f;
+			//printf("screen pos %d\n", color.x);
+			//return;
+
+			// Depth is within accepted max distance from camera
+			if (depth > 0.01f && depth < hashParams.m_maxIntegrationDistance) { // valid depth and color (Nick: removed colour check)
+				//camdepths[count] = depth;
+				++count;
+
+				// Calculate SDF of this voxel wrt the depth map value
+				float sdf = depth - pf.z;
+				float truncation = hashData.getTruncation(depth);
+				float depthZeroOne = camera.params.cameraToKinectProjZ(depth);
+
+				// Is this voxel close enough to cam for depth map value
+				// CHECK Nick: If is too close then free space violation so remove?
+				if (sdf > -truncation) // && depthZeroOne >= 0.0f && depthZeroOne <= 1.0f) //check if in truncation range should already be made in depth map computation
+				{
+					float weightUpdate = max(hashParams.m_integrationWeightSample * 1.5f * (1.0f-depthZeroOne), 1.0f);
+
+					Voxel curr;	//construct current voxel
+					curr.sdf = sdf;
+					curr.weight = weightUpdate;
+					//curr.color = make_uchar3(color.x, color.y, color.z);
+
+
+					//if (entry.flags != cameraParams.flags & 0xFF) {
+					//	entry.flags = cameraParams.flags & 0xFF;
+						//hashData.d_SDFBlocks[idx].color = make_uchar3(0,0,0);
+					//}
+					
+					Voxel newVoxel;
+					//if (color.x == MINF) hashData.combineVoxelDepthOnly(hashData.d_SDFBlocks[idx], curr, newVoxel);
+					//else hashData.combineVoxel(hashData.d_SDFBlocks[idx], curr, newVoxel);
+					hashData.combineVoxel(oldVoxel, curr, newVoxel);
+
+					oldVoxel = newVoxel;
+
+					//Voxel prev = getVoxel(g_SDFBlocksSDFUAV, g_SDFBlocksRGBWUAV, idx);
+					//Voxel newVoxel = combineVoxel(curr, prev);
+					//setVoxel(g_SDFBlocksSDFUAV, g_SDFBlocksRGBWUAV, idx, newVoxel);
+				}
+			} else {
+				// Depth is invalid so what to do here?
+				// TODO(Nick) Use past voxel if available (set weight from 0 to 1)
+
+				// Naive: need to know if this is a foreground voxel
+				//bool coldist = colordiff(color, hashData.d_SDFBlocks[idx].color, 5.0f);
+				//if (!coldist) ++count;
 
-				out.color.x = min(max(v1.color.x*redshift,0.0f),255.0f);
-				out.color.y = min(max(v1.color.y*redshift,0.0f),255.0f);
-				out.color.z = min(max(v1.color.z*(1.0f / redshift),0.0f),255.0f);
+			}
+		}
+	}
 
-				out.sdf = (v0.sdf * (float)v0.weight + v1.sdf * (float)v1.weight) / ((float)v0.weight + (float)v1.weight);
-				out.weight = min(c_hashParams.m_integrationWeightMax, (unsigned int)v0.weight + (unsigned int)v1.weight);
+	// Calculate voxel sign values across a warp
+	int warpNum = i / WARP_SIZE;
+	uint ballot_result = __ballot_sync(0xFFFFFFFF, (oldVoxel.sdf >= 0.0f) ? 0 : 1);
 
-				hashData.d_SDFBlocks[idx] = out;
+	// Aggregate each warp result into voxel mask
+	if (i % WARP_SIZE == 0) {
+		voxels[warpNum] = ballot_result;
+	}
 
-			}
+	__syncthreads();
+
+	// Work out if block is occupied or not and save voxel masks
+	if (i < 16) {
+		const uint v = voxels[i];
+		hashData.d_hashCompactified[bi]->voxels[i] = v;
+		const uint mask = 0x0000FFFF;
+		uint b1 = __ballot_sync(mask, v == 0xFFFFFFFF);
+		uint b2 = __ballot_sync(mask, v == 0);
+		if (i == 0) {
+			if (b1 != mask && b2 != mask) hashData.d_hashCompactified[bi]->head.flags |= ftl::voxhash::kFlagSurface;
+			else hashData.d_hashCompactified[bi]->head.flags &= ~ftl::voxhash::kFlagSurface;
 		}
 	}
 
-	}  // Stride loop
+	}
 }
 
 
-void ftl::cuda::integrateRegistration(HashData& hashData, const HashParams& hashParams,
-		const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t stream) {
-	const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE;
-	const dim3 gridSize(NUM_CUDA_BLOCKS, 1);
-	const dim3 blockSize(threadsPerBlock, 1);
 
-	integrateRegistrationKernel << <gridSize, blockSize, 0, stream >> >(hashData, hashParams, depthCameraParams, depthCameraData.depth_obj_, depthCameraData.colour_obj_);
+void ftl::cuda::integrateDepthMaps(HashData& hashData, const HashParams& hashParams, int numcams, cudaStream_t stream) {
+const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE;
+const dim3 gridSize(NUM_CUDA_BLOCKS, 1);
+const dim3 blockSize(threadsPerBlock, 1);
+
+//if (hashParams.m_numOccupiedBlocks > 0) {	//this guard is important if there is no depth in the current frame (i.e., no blocks were allocated)
+	integrateDepthMapsKernel << <gridSize, blockSize, 0, stream >> >(hashData, hashParams, numcams);
+//}
 
-}
\ No newline at end of file
+//cudaSafeCall( cudaGetLastError() );
+#ifdef _DEBUG
+cudaSafeCall(cudaDeviceSynchronize());
+//cutilCheckMsg(__FUNCTION__);
+#endif
+}
diff --git a/applications/reconstruct/src/integrators.hpp b/applications/reconstruct/src/integrators.hpp
index 756ff2ea45e556592714665af504191108880f3d..789551dd1fa7347bf02c518c8c5a73f6ae4269b4 100644
--- a/applications/reconstruct/src/integrators.hpp
+++ b/applications/reconstruct/src/integrators.hpp
@@ -7,11 +7,14 @@
 namespace ftl {
 namespace cuda {
 
-void integrateDepthMap(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams,
+/*void integrateDepthMap(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams,
 		const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t stream);
 
 void integrateRegistration(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams,
 		const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t stream);
+*/
+
+void integrateDepthMaps(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, int numcams, cudaStream_t stream);
 
 }
 }
diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp
index 650c74b61f7a3ebdaf79d396ee425b43a6fffba2..4f05ef2d32644a728d9d31874de29c8e2a87087d 100644
--- a/applications/reconstruct/src/main.cpp
+++ b/applications/reconstruct/src/main.cpp
@@ -126,11 +126,12 @@ static void run(ftl::Configurable *root) {
 			//stream->wait();
 			cudaSafeCall(cudaStreamSynchronize(scene->getIntegrationStream()));
 
-			//LOG(INFO) << "Heap: " << scene->getHeapFreeCount();
 
 			// Merge new frames into the voxel structure
 			scene->integrate();
 
+			//LOG(INFO) << "Allocated: " << scene->getOccupiedCount();
+
 			// Remove any redundant voxels
 			scene->garbage();
 
diff --git a/applications/reconstruct/src/scene_rep_hash_sdf.cu b/applications/reconstruct/src/scene_rep_hash_sdf.cu
index 4f57bf370b355148662444b393d29622cb0c792b..5c5ebf6fdb73be59bf028d850ce4fa41e6986925 100644
--- a/applications/reconstruct/src/scene_rep_hash_sdf.cu
+++ b/applications/reconstruct/src/scene_rep_hash_sdf.cu
@@ -26,6 +26,7 @@ using ftl::voxhash::FREE_ENTRY;
 __device__ __constant__ HashParams c_hashParams;
 __device__ __constant__ RayCastParams c_rayCastParams;
 //__device__ __constant__ DepthCameraParams c_depthCameraParams;
+__device__ __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS];
 
 extern "C" void updateConstantHashParams(const HashParams& params) {
 
@@ -40,7 +41,7 @@ extern "C" void updateConstantHashParams(const HashParams& params) {
 	}
 
 
-extern "C" void updateConstantRayCastParams(const RayCastParams& params) {
+/*extern "C" void updateConstantRayCastParams(const RayCastParams& params) {
 	//printf("Update ray cast params\n");
 	size_t size;
 	cudaSafeCall(cudaGetSymbolSize(&size, c_rayCastParams));
@@ -51,30 +52,23 @@ extern "C" void updateConstantRayCastParams(const RayCastParams& params) {
 	//cutilCheckMsg(__FUNCTION__);
 #endif
 
-}
+}*/
 
-/*extern "C" void updateConstantDepthCameraParams(const DepthCameraParams& params) {
+extern "C" void updateCUDACameraConstant(ftl::voxhash::DepthCameraCUDA *data, int count) {
+	if (count == 0 || count >= MAX_CAMERAS) return;
 	//printf("Update depth camera params\n");
 	size_t size;
-	cudaSafeCall(cudaGetSymbolSize(&size, c_depthCameraParams));
-	cudaSafeCall(cudaMemcpyToSymbol(c_depthCameraParams, &params, size, 0, cudaMemcpyHostToDevice));
+	cudaSafeCall(cudaGetSymbolSize(&size, c_cameras));
+	cudaSafeCall(cudaMemcpyToSymbol(c_cameras, data, sizeof(ftl::voxhash::DepthCameraCUDA)*count, 0, cudaMemcpyHostToDevice));
 	
 #ifdef DEBUG
 	cudaSafeCall(cudaDeviceSynchronize());
 	//cutilCheckMsg(__FUNCTION__);
 #endif
 
-}*/
-
-extern "C" void bindInputDepthColorTextures(const DepthCameraData& depthCameraData) 
-{
-	/*cudaSafeCall(cudaBindTextureToArray(depthTextureRef, depthCameraData.d_depthArray, depthCameraData.h_depthChannelDesc));
-	cudaSafeCall(cudaBindTextureToArray(colorTextureRef, depthCameraData.d_colorArray, depthCameraData.h_colorChannelDesc));
-	depthTextureRef.filterMode = cudaFilterModePoint;
-	colorTextureRef.filterMode = cudaFilterModePoint;*/
 }
 
-__global__ void resetHeapKernel(HashData hashData) 
+/*__global__ void resetHeapKernel(HashData hashData) 
 {
 	const HashParams& hashParams = c_hashParams;
 	unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
@@ -92,7 +86,7 @@ __global__ void resetHeapKernel(HashData hashData)
 			hashData.deleteVoxel(base_idx+i);
 		}
 	}
-}
+}*/
 
 __global__ void resetHashKernel(HashData hashData) 
 {
@@ -100,7 +94,7 @@ __global__ void resetHashKernel(HashData hashData)
 	const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
 	if (idx < hashParams.m_hashNumBuckets) {
 		hashData.deleteHashEntry(hashData.d_hash[idx]);
-		hashData.deleteHashEntry(hashData.d_hashCompactified[idx]);
+		//hashData.deleteHashEntry(hashData.d_hashCompactified[idx]);
 	}
 }
 
@@ -116,20 +110,20 @@ __global__ void resetHashBucketMutexKernel(HashData hashData)
 
 extern "C" void resetCUDA(HashData& hashData, const HashParams& hashParams)
 {
-	{
+	//{
 		//resetting the heap and SDF blocks
-		const dim3 gridSize((hashParams.m_numSDFBlocks + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1);
-		const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1);
+		//const dim3 gridSize((hashParams.m_numSDFBlocks + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1);
+		//const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1);
 
-		resetHeapKernel<<<gridSize, blockSize>>>(hashData);
+		//resetHeapKernel<<<gridSize, blockSize>>>(hashData);
 
 
-		#ifdef _DEBUG
-			cudaSafeCall(cudaDeviceSynchronize());
+		//#ifdef _DEBUG
+		//	cudaSafeCall(cudaDeviceSynchronize());
 			//cutilCheckMsg(__FUNCTION__);
-		#endif
+		//#endif
 
-	}
+	//}
 
 	{
 		//resetting the hash
@@ -212,17 +206,18 @@ bool isSDFBlockStreamedOut(const int3& sdfBlock, const HashData& hashData, const
 // Note: bitMask used for Streaming out code... could be set to nullptr if not streaming out
 // Note: Allocations might need to be around fat rays since multiple voxels could correspond
 // to same depth map pixel at larger distances.
-__global__ void allocKernel(HashData hashData, DepthCameraData cameraData, HashParams hashParams, DepthCameraParams cameraParams) 
+__global__ void allocKernel(HashData hashData, HashParams hashParams, int camnum) 
 {
 	//const HashParams& hashParams = c_hashParams;
-	//const DepthCameraParams& cameraParams = c_depthCameraParams;
+	const ftl::voxhash::DepthCameraCUDA camera = c_cameras[camnum];
+	const DepthCameraParams &cameraParams = camera.params;
 
 	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
 	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
 
 	if (x < cameraParams.m_imageWidth && y < cameraParams.m_imageHeight)
 	{
-		float d = tex2D<float>(cameraData.depth_obj_, x, y);
+		float d = tex2D<float>(camera.depth, x, y);
 		//if (d == MINF || d < cameraParams.m_sensorDepthWorldMin || d > cameraParams.m_sensorDepthWorldMax)	return;
 		if (d == MINF || d == 0.0f)	return;
 
@@ -239,10 +234,10 @@ __global__ void allocKernel(HashData hashData, DepthCameraData cameraData, HashP
 		// camera intrinsics? Same as what reprojectTo3D does in OpenCV?
 		float3 rayMin = cameraParams.kinectDepthToSkeleton(x, y, minDepth);
 		// Is the rigid transform then the estimated camera pose?
-		rayMin = hashParams.m_rigidTransform * rayMin;
+		rayMin = camera.pose * rayMin;
 		//printf("Ray min: %f,%f,%f\n", rayMin.x, rayMin.y, rayMin.z);
 		float3 rayMax = cameraParams.kinectDepthToSkeleton(x, y, maxDepth);
-		rayMax = hashParams.m_rigidTransform * rayMax;
+		rayMax = camera.pose * rayMax;
 
 		float3 rayDir = normalize(rayMax - rayMin);
 	
@@ -278,7 +273,7 @@ __global__ void allocKernel(HashData hashData, DepthCameraData cameraData, HashP
 
 			//check if it's in the frustum and not checked out
 			if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, cameraParams, idCurrentVoxel)) { //} && !isSDFBlockStreamedOut(idCurrentVoxel, hashData, d_bitMask)) {		
-				hashData.allocBlock(idCurrentVoxel, cameraParams.flags & 0xFF);
+				hashData.allocBlock(idCurrentVoxel);
 				//printf("Allocate block: %d\n",idCurrentVoxel.x);
 			}
 
@@ -304,7 +299,7 @@ __global__ void allocKernel(HashData hashData, DepthCameraData cameraData, HashP
 	}
 }
 
-extern "C" void allocCUDA(HashData& hashData, const HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t stream) 
+extern "C" void allocCUDA(HashData& hashData, const HashParams& hashParams, int camnum, const DepthCameraParams &depthCameraParams, cudaStream_t stream) 
 {
 
 	//printf("Allocating: %d\n",depthCameraParams.m_imageWidth);
@@ -312,7 +307,7 @@ extern "C" void allocCUDA(HashData& hashData, const HashParams& hashParams, cons
 	const dim3 gridSize((depthCameraParams.m_imageWidth + T_PER_BLOCK - 1)/T_PER_BLOCK, (depthCameraParams.m_imageHeight + T_PER_BLOCK - 1)/T_PER_BLOCK);
 	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
-	allocKernel<<<gridSize, blockSize, 0, stream>>>(hashData, depthCameraData, hashParams, depthCameraParams);
+	allocKernel<<<gridSize, blockSize, 0, stream>>>(hashData, hashParams, camnum);
 
 
 	#ifdef _DEBUG
diff --git a/applications/reconstruct/src/splat_render.cpp b/applications/reconstruct/src/splat_render.cpp
index cb513c1da0b4b6952ffebb8a186a9d062070bbfe..a9d5e57dc712829b592f56e8226abe0f563dc665 100644
--- a/applications/reconstruct/src/splat_render.cpp
+++ b/applications/reconstruct/src/splat_render.cpp
@@ -47,13 +47,15 @@ void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) {
 	params.camera.m_sensorDepthWorldMax = camera.maxDepth;
 	params.camera.m_sensorDepthWorldMin = camera.minDepth;
 
-	ftl::cuda::compactifyVisible(scene_->getHashData(), scene_->getHashParams(), params.camera, stream);
-	ftl::cuda::isosurface_point_image(scene_->getHashData(), depth1_, colour1_, params, stream);
-	ftl::cuda::splat_points(depth1_, colour1_, depth2_, colour2_, params, stream);
+	ftl::cuda::compactifyAllocated(scene_->getHashData(), scene_->getHashParams(), stream);
+	LOG(INFO) << "Occupied: " << scene_->getOccupiedCount();
+	ftl::cuda::isosurface_point_image(scene_->getHashData(), depth1_, params, stream);
+	ftl::cuda::splat_points(depth1_, depth2_, params, stream);
+	ftl::cuda::dibr(depth2_, colour1_, scene_->cameraCount(), params, stream);
 
 	// TODO: Second pass
 
-	src->writeFrames(colour2_, depth2_, stream);
+	src->writeFrames(colour1_, depth2_, stream);
 }
 
 void Splatter::setOutputDevice(int device) {
diff --git a/applications/reconstruct/src/splat_render.cu b/applications/reconstruct/src/splat_render.cu
index 12be18e048443ce05d3907311c0a2f0c43e9cda3..0e3598f129bf40c72294ab19501b1fd3c95bb1e9 100644
--- a/applications/reconstruct/src/splat_render.cu
+++ b/applications/reconstruct/src/splat_render.cu
@@ -13,13 +13,13 @@
 using ftl::cuda::TextureObject;
 using ftl::render::SplatParams;
 
-__global__ void clearDepthKernel(ftl::voxhash::HashData hashData, TextureObject<uint> depth, TextureObject<uchar4> colour) {
+__global__ void clearDepthKernel(ftl::voxhash::HashData hashData, TextureObject<uint> depth) {
 	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
 	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
 
 	if (x < depth.width() && y < depth.height()) {
 		depth(x,y) = 0x7f800000; //PINF;
-		colour(x,y) = make_uchar4(76,76,82,0);
+		//colour(x,y) = make_uchar4(76,76,82,0);
 	}
 }
 
@@ -72,10 +72,14 @@ __device__ inline uint blockLinear(int x, int y, int z) {
 	return x + (y << 1) + (z << 2);
 }
 
-__global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, TextureObject<uint> depth, TextureObject<uchar4> colour, SplatParams params) {
+__device__ inline bool getVoxel(uint *voxels, int ix) {
+	return voxels[ix/32] & (0x1 << (ix % 32));
+}
+
+__global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, TextureObject<uint> depth, SplatParams params) {
 	// TODO:(Nick) Reduce bank conflicts by aligning these
-	__shared__ ftl::voxhash::Voxel voxels[SDF_BLOCK_BUFFER];
-	__shared__ ftl::voxhash::HashEntry block;
+	__shared__ uint voxels[16];
+	__shared__ ftl::voxhash::HashEntryHead block;
 
 	// Stride over all allocated blocks
 	for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) {
@@ -83,34 +87,38 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture
 
 	const uint i = threadIdx.x;	//inside of an SDF block
 
-	if (i == 0) block = hashData.d_hashCompactified[bi];
+	if (i == 0) block = hashData.d_hashCompactified[bi]->head;
+	if (i < 16) voxels[i] = hashData.d_hashCompactified[bi]->voxels[i];
 
 	// Make sure all hash entries are cached
 	__syncthreads();
 
-	const int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(block.pos);
+	const int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(make_int3(block.posXYZ));
 	const int3 vp = make_int3(hashData.delinearizeVoxelIndex(i));
 	const int3 pi = pi_base + vp;
-	const uint j = plinVoxelPos(vp);  // Padded linear index
+	//const uint j = plinVoxelPos(vp);  // Padded linear index
 	const float3 worldPos = hashData.virtualVoxelPosToWorld(pi);
 
 	// Load distances and colours into shared memory + padding
-	const ftl::voxhash::Voxel &v = hashData.d_SDFBlocks[block.ptr + i];
-	voxels[j] = v;
+	//const ftl::voxhash::Voxel &v = hashData.d_SDFBlocks[block.ptr + i];
+	//voxels[j] = v;
+	const bool v = getVoxel(voxels, i);
 
-	__syncthreads();
+	//__syncthreads();
 
-	if (voxels[j].weight == 0) continue;
+	//if (voxels[j].weight == 0) continue;
+	if (vp.x == 7 || vp.y == 7 || vp.z == 7) continue;
 
 
 	int edgeX = (vp.x == 0 ) ? 1 : 0;
 	int edgeY = (vp.y == 0 ) ? 1 : 0;
 	int edgeZ = (vp.z == 0 ) ? 1 : 0;
 
-	bool is_surface = ((params.m_flags & kShowBlockBorders) && edgeX + edgeY + edgeZ >= 2);
-	if (is_surface) voxels[j].color = make_uchar3(255,(vp.x == 0 && vp.y == 0 && vp.z == 0) ? 255 : 0,0);
+	uchar4 color = make_uchar4(255,0,0,255);
+	bool is_surface = false; //((params.m_flags & ftl::render::kShowBlockBorders) && edgeX + edgeY + edgeZ >= 2);
+	//if (is_surface) color = make_uchar4(255,(vp.x == 0 && vp.y == 0 && vp.z == 0) ? 255 : 0,0,255);
 
-	if (!is_surface && voxels[j].sdf <= 0.0f) continue;
+	if (!is_surface && v) continue;
 
 	//if (vp.z == 7) voxels[j].color = make_uchar3(0,255,(voxels[j].sdf < 0.0f) ? 255 : 0);
 
@@ -125,10 +133,10 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture
 				const int3 uvi = make_int3(vp.x+u,vp.y+v,vp.z+w);
 
 				// Skip these cases since we didn't load voxels properly
-				if (uvi.x == 8 || uvi.z == 8 || uvi.y == 8) continue;
+				//if (uvi.x == 8 || uvi.z == 8 || uvi.y == 8) continue;
 
-				const auto &vox = voxels[plinVoxelPos(uvi)];
-				if (vox.weight > 0 && vox.sdf <= 0.0f) {
+				const bool vox = getVoxel(voxels, hashData.linearizeVoxelPos(uvi));
+				if (vox) {
 					is_surface = true;
 					// Should break but is slower?
 				}
@@ -139,6 +147,8 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture
 	// Only for surface voxels, work out screen coordinates
 	if (!is_surface) continue;
 
+	// TODO: For each original camera, render a new depth map
+
 	const float3 camPos = params.m_viewMatrix * worldPos;
 	const float2 screenPosf = params.camera.cameraToKinectScreenFloat(camPos);
 	const uint2 screenPos = make_uint2(make_int2(screenPosf)); //  + make_float2(0.5f, 0.5f)
@@ -157,27 +167,7 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture
 
 	// See: Gunther et al. 2013. A GPGPU-based Pipeline for Accelerated Rendering of Point Clouds
 	if (x < depth.width() && y < depth.height()) {
-		// TODO:(Nick) Would warp and shared mem considerations improve things?
-		// Probably not given we now have thin surfaces...
-
-		// FIXME: Needs a lock here to ensure correct colour is written.
-		if (atomicMin(&depth(x,y), idepth) > idepth) {
-			colour(x,y) = make_uchar4(voxels[j].color.x, voxels[j].color.y, voxels[j].color.z, 255);
-		}
-
-		/*bool p = false;
-		while (!p) {
-			int ld = atomicExch(&depth(x,y), LOCKED);
-			if (ld != LOCKED) {
-				p = true;
-				if (ld > idepth) {
-					colour(x,y) = make_uchar4(voxels[j].color.x, voxels[j].color.y, voxels[j].color.z, 255);
-					depth(x,y) = idepth;
-				} else {
-					depth(x,y) = ld;
-				}
-			}
-		}*/
+		atomicMin(&depth(x,y), idepth);
 	}
 
 	}  // Stride
@@ -185,13 +175,12 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture
 
 void ftl::cuda::isosurface_point_image(const ftl::voxhash::HashData& hashData,
 			const TextureObject<uint> &depth,
-			const TextureObject<uchar4> &colour,
 			const SplatParams &params, cudaStream_t stream) {
 
 	const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
 	const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
-	clearDepthKernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(hashData, depth, colour);
+	clearDepthKernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(hashData, depth);
 
 	//cudaSafeCall( cudaDeviceSynchronize() );
 
@@ -199,7 +188,7 @@ void ftl::cuda::isosurface_point_image(const ftl::voxhash::HashData& hashData,
 	const dim3 gridSize(NUM_CUDA_BLOCKS, 1);
 	const dim3 blockSize(threadsPerBlock, 1);
 
-	isosurface_image_kernel<<<gridSize, blockSize, 0, stream>>>(hashData, depth, colour, params);
+	isosurface_image_kernel<<<gridSize, blockSize, 0, stream>>>(hashData, depth, params);
 
 	cudaSafeCall( cudaGetLastError() );
 	//cudaSafeCall( cudaDeviceSynchronize() );
@@ -221,9 +210,7 @@ __device__ float distance2(float3 a, float3 b) {
 
 __global__ void splatting_kernel(
 		TextureObject<uint> depth_in,
-		TextureObject<uchar4> colour_in,
-		TextureObject<float> depth_out,
-		TextureObject<uchar4> colour_out, SplatParams params) {
+		TextureObject<float> depth_out, SplatParams params) {
 	// Read an NxN region and
 	// - interpolate a depth value for this pixel
 	// - interpolate an rgb value for this pixel
@@ -291,11 +278,11 @@ __global__ void splatting_kernel(
 
 	if (minidx == -1) {
 		depth_out(x,y) = 0.0f;
-		colour_out(x,y) = make_uchar4(76,76,82,255);
+		//colour_out(x,y) = make_uchar4(76,76,82,255);
 		return;
 	}
 
-	float3 colour = make_float3(0.0f, 0.0f, 0.0f);
+	//float3 colour = make_float3(0.0f, 0.0f, 0.0f);
 	float depth = 0.0f;
 	float contrib = 0.0f;
 	float3 pos = params.camera.kinectDepthToSkeleton(x, y, mindepth);  // TODO:(Nick) Mindepth assumption is poor choice.
@@ -314,32 +301,32 @@ __global__ void splatting_kernel(
 
 			// Fast and simple trilinear interpolation
 			float c = fabs((1.0f - delta.x) * (1.0f - delta.y) * (1.0f - delta.z));
-			uchar4 col = colour_in.tex2D((int)sx, (int)sy);
-			colour.x += col.x*c;
-			colour.y += col.y*c;
-			colour.z += col.z*c;
+			//uchar4 col = colour_in.tex2D((int)sx, (int)sy);
+			//colour.x += col.x*c;
+			//colour.y += col.y*c;
+			//colour.z += col.z*c;
 			contrib += c;
 			depth += posp.z * c;
 		}
 	}
 
 	// Normalise
-	colour.x /= contrib;
-	colour.y /= contrib;
-	colour.z /= contrib;
+	//colour.x /= contrib;
+	//colour.y /= contrib;
+	//colour.z /= contrib;
 	depth /= contrib;
 
 	depth_out(x,y) = depth;
-	colour_out(x,y) = make_uchar4(colour.x, colour.y, colour.z, 255);
+	//colour_out(x,y) = make_uchar4(colour.x, colour.y, colour.z, 255);
 }
 
-void ftl::cuda::splat_points(const TextureObject<uint> &depth_in, const TextureObject<uchar4> &colour_in,
-		const TextureObject<float> &depth_out, const TextureObject<uchar4> &colour_out, const SplatParams &params, cudaStream_t stream) 
+void ftl::cuda::splat_points(const TextureObject<uint> &depth_in,
+		const TextureObject<float> &depth_out, const SplatParams &params, cudaStream_t stream) 
 {
 
 	const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
 	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
-	splatting_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, colour_out, params);
+	splatting_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, params);
 	cudaSafeCall( cudaGetLastError() );
 }
diff --git a/applications/reconstruct/src/splat_render.hpp b/applications/reconstruct/src/splat_render.hpp
index 1e03801349fe0049b5eab1000a1566541383e35f..a36e80ad833f258eced3acd63560e6b315995391 100644
--- a/applications/reconstruct/src/splat_render.hpp
+++ b/applications/reconstruct/src/splat_render.hpp
@@ -5,7 +5,7 @@
 #include <ftl/rgbd/source.hpp>
 #include <ftl/depth_camera.hpp>
 #include <ftl/voxel_scene.hpp>
-#include <ftl/ray_cast_util.hpp>
+//#include <ftl/ray_cast_util.hpp>
 #include <ftl/cuda_common.hpp>
 
 #include "splat_params.hpp"
diff --git a/applications/reconstruct/src/splat_render_cuda.hpp b/applications/reconstruct/src/splat_render_cuda.hpp
index c4bd724add53d72c478cd169c323edcc20ad3d9e..deb702e1661ca8f36fcea7aa6b4b6a115439cc7b 100644
--- a/applications/reconstruct/src/splat_render_cuda.hpp
+++ b/applications/reconstruct/src/splat_render_cuda.hpp
@@ -3,7 +3,7 @@
 
 #include <ftl/depth_camera.hpp>
 #include <ftl/voxel_hash.hpp>
-#include <ftl/ray_cast_util.hpp>
+//#include <ftl/ray_cast_util.hpp>
 
 #include "splat_params.hpp"
 
@@ -16,7 +16,6 @@ namespace cuda {
  */
 void isosurface_point_image(const ftl::voxhash::HashData& hashData,
 			const ftl::cuda::TextureObject<uint> &depth,
-			const ftl::cuda::TextureObject<uchar4> &colour,
 			const ftl::render::SplatParams &params, cudaStream_t stream);
 
 //void isosurface_point_image_stereo(const ftl::voxhash::HashData& hashData,
@@ -27,9 +26,11 @@ void isosurface_point_image(const ftl::voxhash::HashData& hashData,
 // TODO: isosurface_point_cloud
 
 void splat_points(const ftl::cuda::TextureObject<uint> &depth_in,
-		const ftl::cuda::TextureObject<uchar4> &colour_in,
 		const ftl::cuda::TextureObject<float> &depth_out,
-		const ftl::cuda::TextureObject<uchar4> &colour_out,
+		const ftl::render::SplatParams &params, cudaStream_t stream);
+
+void dibr(const ftl::cuda::TextureObject<float> &depth_in,
+		const ftl::cuda::TextureObject<uchar4> &colour_out, int numcams,
 		const ftl::render::SplatParams &params, cudaStream_t stream);
 
 }
diff --git a/applications/reconstruct/src/voxel_hash.cpp b/applications/reconstruct/src/voxel_hash.cpp
index d939556cfe364190597e3adc41295bed83ac41cf..14cb75078652001275404e87c09f77f0b9d19385 100644
--- a/applications/reconstruct/src/voxel_hash.cpp
+++ b/applications/reconstruct/src/voxel_hash.cpp
@@ -6,24 +6,18 @@ using ftl::voxhash::HashParams;
 void HashData::allocate(const HashParams& params, bool dataOnGPU) {
 	m_bIsOnGPU = dataOnGPU;
 	if (m_bIsOnGPU) {
-		cudaSafeCall(cudaMalloc(&d_heap, sizeof(unsigned int) * params.m_numSDFBlocks));
-		cudaSafeCall(cudaMalloc(&d_heapCounter, sizeof(unsigned int)));
 		cudaSafeCall(cudaMalloc(&d_hash, sizeof(HashEntry)* params.m_hashNumBuckets));
 		cudaSafeCall(cudaMalloc(&d_hashDecision, sizeof(int)* params.m_hashNumBuckets));
 		cudaSafeCall(cudaMalloc(&d_hashDecisionPrefix, sizeof(int)* params.m_hashNumBuckets));
-		cudaSafeCall(cudaMalloc(&d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets));
+		cudaSafeCall(cudaMalloc(&d_hashCompactified, sizeof(HashEntry*)* params.m_hashNumBuckets));
 		cudaSafeCall(cudaMalloc(&d_hashCompactifiedCounter, sizeof(int)));
-		cudaSafeCall(cudaMalloc(&d_SDFBlocks, sizeof(Voxel) * params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize));
 		cudaSafeCall(cudaMalloc(&d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets));
 	} else {
-		d_heap = new unsigned int[params.m_numSDFBlocks];
-		d_heapCounter = new unsigned int[1];
 		d_hash = new HashEntry[params.m_hashNumBuckets];
 		d_hashDecision = new int[params.m_hashNumBuckets];
 		d_hashDecisionPrefix = new int[params.m_hashNumBuckets];
-		d_hashCompactified = new HashEntry[params.m_hashNumBuckets];
+		d_hashCompactified = new HashEntry*[params.m_hashNumBuckets];
 		d_hashCompactifiedCounter = new int[1];
-		d_SDFBlocks = new Voxel[params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize];
 		d_hashBucketMutex = new int[params.m_hashNumBuckets];
 	}
 
@@ -38,35 +32,26 @@ void HashData::updateParams(const HashParams& params) {
 
 void HashData::free() {
 	if (m_bIsOnGPU) {
-		cudaSafeCall(cudaFree(d_heap));
-		cudaSafeCall(cudaFree(d_heapCounter));
 		cudaSafeCall(cudaFree(d_hash));
 		cudaSafeCall(cudaFree(d_hashDecision));
 		cudaSafeCall(cudaFree(d_hashDecisionPrefix));
 		cudaSafeCall(cudaFree(d_hashCompactified));
 		cudaSafeCall(cudaFree(d_hashCompactifiedCounter));
-		cudaSafeCall(cudaFree(d_SDFBlocks));
 		cudaSafeCall(cudaFree(d_hashBucketMutex));
 	} else {
-		if (d_heap) delete[] d_heap;
-		if (d_heapCounter) delete[] d_heapCounter;
 		if (d_hash) delete[] d_hash;
 		if (d_hashDecision) delete[] d_hashDecision;
 		if (d_hashDecisionPrefix) delete[] d_hashDecisionPrefix;
 		if (d_hashCompactified) delete[] d_hashCompactified;
 		if (d_hashCompactifiedCounter) delete[] d_hashCompactifiedCounter;
-		if (d_SDFBlocks) delete[] d_SDFBlocks;
 		if (d_hashBucketMutex) delete[] d_hashBucketMutex;
 	}
 
 	d_hash = NULL;
-	d_heap = NULL;
-	d_heapCounter = NULL;
 	d_hashDecision = NULL;
 	d_hashDecisionPrefix = NULL;
 	d_hashCompactified = NULL;
 	d_hashCompactifiedCounter = NULL;
-	d_SDFBlocks = NULL;
 	d_hashBucketMutex = NULL;
 }
 
@@ -76,14 +61,11 @@ HashData HashData::download() const {
 	
 	HashData hashData;
 	hashData.allocate(params, false);	//allocate the data on the CPU
-	cudaSafeCall(cudaMemcpy(hashData.d_heap, d_heap, sizeof(unsigned int) * params.m_numSDFBlocks, cudaMemcpyDeviceToHost));
-	cudaSafeCall(cudaMemcpy(hashData.d_heapCounter, d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
 	cudaSafeCall(cudaMemcpy(hashData.d_hash, d_hash, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
 	cudaSafeCall(cudaMemcpy(hashData.d_hashDecision, d_hashDecision, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
 	cudaSafeCall(cudaMemcpy(hashData.d_hashDecisionPrefix, d_hashDecisionPrefix, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
-	cudaSafeCall(cudaMemcpy(hashData.d_hashCompactified, d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
+	cudaSafeCall(cudaMemcpy(hashData.d_hashCompactified, d_hashCompactified, sizeof(HashEntry*)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
 	cudaSafeCall(cudaMemcpy(hashData.d_hashCompactifiedCounter, d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
-	cudaSafeCall(cudaMemcpy(hashData.d_SDFBlocks, d_SDFBlocks, sizeof(Voxel) * params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize, cudaMemcpyDeviceToHost));
 	cudaSafeCall(cudaMemcpy(hashData.d_hashBucketMutex, d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
 	
 	return hashData;
@@ -95,21 +77,18 @@ HashData HashData::upload() const {
 	
 	HashData hashData;
 	hashData.allocate(params, false);	//allocate the data on the CPU
-	cudaSafeCall(cudaMemcpy(hashData.d_heap, d_heap, sizeof(unsigned int) * params.m_numSDFBlocks, cudaMemcpyHostToDevice));
-	cudaSafeCall(cudaMemcpy(hashData.d_heapCounter, d_heapCounter, sizeof(unsigned int), cudaMemcpyHostToDevice));
 	cudaSafeCall(cudaMemcpy(hashData.d_hash, d_hash, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyHostToDevice));
 	cudaSafeCall(cudaMemcpy(hashData.d_hashDecision, d_hashDecision, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyHostToDevice));
 	cudaSafeCall(cudaMemcpy(hashData.d_hashDecisionPrefix, d_hashDecisionPrefix, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyHostToDevice));
 	cudaSafeCall(cudaMemcpy(hashData.d_hashCompactified, d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyHostToDevice));
 	cudaSafeCall(cudaMemcpy(hashData.d_hashCompactifiedCounter, d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyHostToDevice));
-	cudaSafeCall(cudaMemcpy(hashData.d_SDFBlocks, d_SDFBlocks, sizeof(Voxel) * params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize, cudaMemcpyHostToDevice));
 	cudaSafeCall(cudaMemcpy(hashData.d_hashBucketMutex, d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets, cudaMemcpyHostToDevice));
 	
 	return hashData;
 }
 
-size_t HashData::getAllocatedBlocks() const {
+/*size_t HashData::getAllocatedBlocks() const {
 	unsigned int count;
 	cudaSafeCall(cudaMemcpy(d_heapCounter, &count, sizeof(unsigned int), cudaMemcpyDeviceToHost));
 	return count;
-}
+}*/
diff --git a/applications/reconstruct/src/voxel_hash.cu b/applications/reconstruct/src/voxel_hash.cu
index 7447a1c54eea72e7e8ee888a148fa3ee73a9473e..c2d07c391a6e48d2b45cc23dbf32b00878ffd5c9 100644
--- a/applications/reconstruct/src/voxel_hash.cu
+++ b/applications/reconstruct/src/voxel_hash.cu
@@ -2,35 +2,42 @@
 
 using namespace ftl::voxhash;
 
-#define COLLISION_LIST_SIZE 5
+#define COLLISION_LIST_SIZE 6
+
+__device__ inline uint64_t compactPosition(const int3 &pos) {
+	union __align__(8) {
+	short4 posXYZ;
+	uint64_t pos64;
+	};
+	posXYZ.x = pos.x; posXYZ.y = pos.y; posXYZ.z = pos.z; posXYZ.w = 0;
+	return pos64;
+}
 
 //! returns the hash entry for a given sdf block id; if there was no hash entry the returned entry will have a ptr with FREE_ENTRY set
 __device__ 
-HashEntry HashData::getHashEntryForSDFBlockPos(const int3& sdfBlock) const {
+int HashData::getHashEntryForSDFBlockPos(const int3& sdfBlock) const {
 	uint h = computeHashPos(sdfBlock); //hash
-	int3 pos = sdfBlock;
+	uint64_t pos = compactPosition(sdfBlock);
 
-	HashEntry curr;
+	HashEntryHead curr;
 
 	int i = h;
 	unsigned int maxIter = 0;
 
 	#pragma unroll 2
 	while (maxIter < COLLISION_LIST_SIZE) {
-		curr = d_hash[i];
+		curr = d_hash[i].head;
 
-		if (curr.pos == pos && curr.ptr != FREE_ENTRY) return curr;
-		if (curr.offset == 0) break;
+		if (curr.pos == pos && curr.offset != FREE_ENTRY) return i;
+		if (curr.offset == 0 || curr.offset == FREE_ENTRY) break;
 
 		i +=  curr.offset;  //go to next element in the list
 		i %= (params().m_hashNumBuckets);  //check for overflow
 		++maxIter;
 	}
 
-	// Could not find so return dummy
-	curr.pos = pos;
-	curr.ptr = FREE_ENTRY;
-	return curr;
+	// Could not find
+	return -1;
 }
 
 //for histogram (collisions traversal only)
@@ -39,15 +46,15 @@ unsigned int HashData::getNumHashLinkedList(unsigned int bucketID) {
 	unsigned int listLen = 0;
 
 	unsigned int i = bucketID;	//start with the last entry of the current bucket
-	HashEntry curr;	curr.offset = 0;
+	HashEntryHead curr;	curr.offset = 0;
 
 	unsigned int maxIter = 0;
 
 	#pragma unroll 2 
 	while (maxIter < COLLISION_LIST_SIZE) {
-		curr = d_hash[i];
+		curr = d_hash[i].head;
 
-		if (curr.offset == 0) break;
+		if (curr.offset == 0 || curr.offset == FREE_ENTRY) break;
 
 		i += curr.offset;		//go to next element in the list
 		i %= (params().m_hashNumBuckets);	//check for overflow
@@ -60,18 +67,19 @@ unsigned int HashData::getNumHashLinkedList(unsigned int bucketID) {
 
 //pos in SDF block coordinates
 __device__
-void HashData::allocBlock(const int3& pos, const uchar frame) {
+void HashData::allocBlock(const int3& pos) {
 	uint h = computeHashPos(pos);				//hash bucket
 	uint i = h;
-	HashEntry curr;	curr.offset = 0;
+	HashEntryHead curr;	//curr.offset = 0;
+	const uint64_t pos64 = compactPosition(pos);
 
 	unsigned int maxIter = 0;
 	#pragma  unroll 2
 	while (maxIter < COLLISION_LIST_SIZE) {
 		//offset = curr.offset;
-		curr = d_hash[i];	//TODO MATTHIAS do by reference
-		if (curr.pos == pos && curr.ptr != FREE_ENTRY) return;
-		if (curr.offset == 0) break;
+		curr = d_hash[i].head;	//TODO MATTHIAS do by reference
+		if (curr.pos == pos64 && curr.offset != FREE_ENTRY) return;
+		if (curr.offset == 0 || curr.offset == FREE_ENTRY) break;
 
 		i += curr.offset;		//go to next element in the list
 		i %= (params().m_hashNumBuckets);	//check for overflow
@@ -79,25 +87,32 @@ void HashData::allocBlock(const int3& pos, const uchar frame) {
 	}
 
 	// Limit reached...
-	if (curr.offset != 0) return;
+	//if (maxIter == COLLISION_LIST_SIZE) return;
 
-	int j = i+1;
+	int j = i;
 	while (maxIter < COLLISION_LIST_SIZE) {
 		//offset = curr.offset;
-		curr = d_hash[j];	//TODO MATTHIAS do by reference
-		if (curr.ptr == FREE_ENTRY) {
+
+		if (curr.offset == FREE_ENTRY) {
 			int prevValue = atomicExch(&d_hashBucketMutex[i], LOCK_ENTRY);
 			if (prevValue != LOCK_ENTRY) {
-				//InterlockedExchange(g_HashBucketMutex[h], LOCK_ENTRY, prevValue);	//lock the hash bucket where we have found a free entry
-				prevValue = atomicExch(&d_hashBucketMutex[j], LOCK_ENTRY);
-				if (prevValue != LOCK_ENTRY) {	//only proceed if the bucket has been locked
-					HashEntry& entry = d_hash[j];
-					entry.pos = pos;
+				if (i == j) {
+					HashEntryHead& entry = d_hash[j].head;
+					entry.pos = pos64;
 					entry.offset = 0;
-					entry.flags = 0;  // Flag block as valid in this frame (Nick)		
-					entry.ptr = consumeHeap() * SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE;	//memory alloc
-					d_hash[i].offset = j-i;
-					//setHashEntry(g_Hash, idxLastEntryInBucket, lastEntryInBucket);
+					entry.flags = 0;
+				} else {
+					//InterlockedExchange(g_HashBucketMutex[h], LOCK_ENTRY, prevValue);	//lock the hash bucket where we have found a free entry
+					prevValue = atomicExch(&d_hashBucketMutex[j], LOCK_ENTRY);
+					if (prevValue != LOCK_ENTRY) {	//only proceed if the bucket has been locked
+						HashEntryHead& entry = d_hash[j].head;
+						entry.pos = pos64;
+						entry.offset = 0;
+						entry.flags = 0;  // Flag block as valid in this frame (Nick)		
+						//entry.ptr = consumeHeap() * SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE;	//memory alloc
+						d_hash[i].head.offset = j-i;
+						//setHashEntry(g_Hash, idxLastEntryInBucket, lastEntryInBucket);
+					}
 				}
 			} 
 			return;	//bucket was already locked
@@ -105,6 +120,7 @@ void HashData::allocBlock(const int3& pos, const uchar frame) {
 
 		++j;
 		j %= (params().m_hashNumBuckets);	//check for overflow
+		curr = d_hash[j].head;	//TODO MATTHIAS do by reference
 		++maxIter;
 	}
 }
@@ -194,18 +210,19 @@ bool HashData::insertHashEntry(HashEntry entry)
 __device__
 bool HashData::deleteHashEntryElement(const int3& sdfBlock) {
 	uint h = computeHashPos(sdfBlock);	//hash bucket
+	const uint64_t pos = compactPosition(sdfBlock);
 
 	int i = h;
 	int prev = -1;
-	HashEntry curr;
+	HashEntryHead curr;
 	unsigned int maxIter = 0;
 
 	#pragma  unroll 2 
 	while (maxIter < COLLISION_LIST_SIZE) {
-		curr = d_hash[i];
+		curr = d_hash[i].head;
 	
 		//found that dude that we need/want to delete
-		if (curr.pos == sdfBlock && curr.ptr != FREE_ENTRY) {
+		if (curr.pos == pos && curr.offset != FREE_ENTRY) {
 			//int prevValue = 0;
 			//InterlockedExchange(bucketMutex[h], LOCK_ENTRY, prevValue);	//lock the hash bucket
 			int prevValue = atomicExch(&d_hashBucketMutex[i], LOCK_ENTRY);
@@ -214,19 +231,19 @@ bool HashData::deleteHashEntryElement(const int3& sdfBlock) {
 				prevValue = (prev >= 0) ? atomicExch(&d_hashBucketMutex[prev], LOCK_ENTRY) : 0;
 				if (prevValue == LOCK_ENTRY)	return false;
 				if (prevValue != LOCK_ENTRY) {
-					const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE;
-					appendHeap(curr.ptr / linBlockSize);
+					//const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE;
+					//appendHeap(curr.ptr / linBlockSize);
 					deleteHashEntry(i);
 
 					if (prev >= 0) {
-						d_hash[prev].offset = curr.offset;
+						d_hash[prev].head.offset = curr.offset;
 					}
 					return true;
 				}
 			}
 		}
 
-		if (curr.offset == 0) {	//we have found the end of the list
+		if (curr.offset == 0 || curr.offset == FREE_ENTRY) {	//we have found the end of the list
 			return false;	//should actually never happen because we need to find that guy before
 		}
 		prev = i;
diff --git a/applications/reconstruct/src/voxel_scene.cpp b/applications/reconstruct/src/voxel_scene.cpp
index e392f55ee5a7f2f23ad3046dcb7b3a83f61132d5..51f9bc75b88b0d8ae73d41d8ba05fb248cb5ae00 100644
--- a/applications/reconstruct/src/voxel_scene.cpp
+++ b/applications/reconstruct/src/voxel_scene.cpp
@@ -17,11 +17,11 @@ using std::vector;
 
 extern "C" void resetCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
 extern "C" void resetHashBucketMutexCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t);
-extern "C" void allocCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t);
+extern "C" void allocCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, int camid, const DepthCameraParams &depthCameraParams, cudaStream_t);
 //extern "C" void fillDecisionArrayCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData);
 //extern "C" void compactifyHashCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
 //extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
-extern "C" void integrateDepthMapCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t);
+//extern "C" void integrateDepthMapCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t);
 //extern "C" void bindInputDepthColorTextures(const DepthCameraData& depthCameraData);
 
 
@@ -97,6 +97,18 @@ void SceneRep::addSource(ftl::rgbd::Source *src) {
 	cam.params.m_imageWidth = 0;
 }
 
+extern "C" void updateCUDACameraConstant(ftl::voxhash::DepthCameraCUDA *data, int count);
+
+void SceneRep::_updateCameraConstant() {
+	std::vector<ftl::voxhash::DepthCameraCUDA> cams(cameras_.size());
+	for (size_t i=0; i<cameras_.size(); ++i) {
+		cams[i] = cameras_[i].gpu.data;
+		cams[i].pose = MatrixConversion::toCUDA(cameras_[i].source->getPose().cast<float>());
+		cams[i].poseInverse = MatrixConversion::toCUDA(cameras_[i].source->getPose().cast<float>().inverse());
+	}
+	updateCUDACameraConstant(cams.data(), cams.size());
+}
+
 int SceneRep::upload() {
 	int active = 0;
 
@@ -130,6 +142,15 @@ int SceneRep::upload() {
 			}
 		}
 
+		cam.params.flags = m_frameCount;
+	}
+
+	_updateCameraConstant();
+	//cudaSafeCall(cudaDeviceSynchronize());
+
+	for (size_t i=0; i<cameras_.size(); ++i) {
+		auto &cam = cameras_[i];
+
 		// Get the RGB-Depth frame from input
 		Source *input = cam.source;
 		Mat rgb, depth;
@@ -145,13 +166,11 @@ int SceneRep::upload() {
 		Mat rgba;
 		cv::cvtColor(rgb,rgba, cv::COLOR_BGR2BGRA);
 
-		cam.params.flags = m_frameCount;
-
 		// Send to GPU and merge view into scene
 		//cam.gpu.updateParams(cam.params);
 		cam.gpu.updateData(depth, rgba, cam.stream);
 
-		setLastRigidTransform(input->getPose().cast<float>());
+		//setLastRigidTransform(input->getPose().cast<float>());
 
 		//make the rigid transform available on the GPU
 		//m_hashData.updateParams(m_hashParams, cv::cuda::StreamAccessor::getStream(cam.stream));
@@ -159,7 +178,7 @@ int SceneRep::upload() {
 		//if (i > 0) cudaSafeCall(cudaStreamSynchronize(cv::cuda::StreamAccessor::getStream(cameras_[i-1].stream)));
 
 		//allocate all hash blocks which are corresponding to depth map entries
-		_alloc(cam.gpu, cam.params, cv::cuda::StreamAccessor::getStream(cam.stream));
+		_alloc(i, cv::cuda::StreamAccessor::getStream(cam.stream));
 	}
 
 	// Must have finished all allocations and rendering before next integration
@@ -169,7 +188,7 @@ int SceneRep::upload() {
 }
 
 void SceneRep::integrate() {
-	for (size_t i=0; i<cameras_.size(); ++i) {
+	/*for (size_t i=0; i<cameras_.size(); ++i) {
 		auto &cam = cameras_[i];
 
 		setLastRigidTransform(cam.source->getPose().cast<float>());
@@ -179,16 +198,19 @@ void SceneRep::integrate() {
 		_compactifyVisible(cam.params);
 
 		//volumetrically integrate the depth data into the depth SDFBlocks
-		_integrateDepthMap(cam.gpu, cam.params);
+		//_integrateDepthMap(cam.gpu, cam.params);
 
 		//_garbageCollect();
 
 		m_numIntegratedFrames++;
-	}
+	}*/
+
+	_compactifyAllocated();
+	_integrateDepthMaps();
 }
 
 void SceneRep::garbage() {
-	_compactifyAllocated();
+	//_compactifyAllocated();
 	_garbageCollect();
 
 	//cudaSafeCall(cudaStreamSynchronize(integ_stream_));
@@ -248,127 +270,19 @@ void SceneRep::nextFrame() {
 void SceneRep::reset() {
 	m_numIntegratedFrames = 0;
 
-	m_hashParams.m_rigidTransform.setIdentity();
-	m_hashParams.m_rigidTransformInverse.setIdentity();
+	//m_hashParams.m_rigidTransform.setIdentity();
+	//m_hashParams.m_rigidTransformInverse.setIdentity();
 	m_hashParams.m_numOccupiedBlocks = 0;
 	m_hashData.updateParams(m_hashParams);
 	resetCUDA(m_hashData, m_hashParams);
 }
 
-//! debug only!
-unsigned int SceneRep::getHeapFreeCount() {
+unsigned int SceneRep::getOccupiedCount() {
 	unsigned int count;
-	cudaSafeCall(cudaMemcpy(&count, m_hashData.d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
+	cudaSafeCall(cudaMemcpy(&count, m_hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
 	return count+1;	//there is one more free than the address suggests (0 would be also a valid address)
 }
 
-//! debug only!
-void SceneRep::debugHash() {
-	HashEntry* hashCPU = new HashEntry[m_hashParams.m_hashNumBuckets];
-	unsigned int* heapCPU = new unsigned int[m_hashParams.m_numSDFBlocks];
-	unsigned int heapCounterCPU;
-
-	cudaSafeCall(cudaMemcpy(&heapCounterCPU, m_hashData.d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
-	heapCounterCPU++;	//points to the first free entry: number of blocks is one more
-
-	cudaSafeCall(cudaMemcpy(heapCPU, m_hashData.d_heap, sizeof(unsigned int)*m_hashParams.m_numSDFBlocks, cudaMemcpyDeviceToHost));
-	cudaSafeCall(cudaMemcpy(hashCPU, m_hashData.d_hash, sizeof(HashEntry)*m_hashParams.m_hashNumBuckets, cudaMemcpyDeviceToHost));
-
-	//Check for duplicates
-	class myint3Voxel {
-	public:
-		myint3Voxel() {}
-		~myint3Voxel() {}
-		bool operator<(const myint3Voxel& other) const {
-			if (x == other.x) {
-				if (y == other.y) {
-					return z < other.z;
-				}
-				return y < other.y;
-			}
-			return x < other.x;
-		}
-
-		bool operator==(const myint3Voxel& other) const {
-			return x == other.x && y == other.y && z == other.z;
-		}
-
-		int x,y,z, i;
-		int offset;
-		int ptr;
-	}; 
-
-
-	std::unordered_set<unsigned int> pointersFreeHash;
-	std::vector<int> pointersFreeVec(m_hashParams.m_numSDFBlocks, 0);  // CHECK Nick Changed to int from unsigned in
-	for (unsigned int i = 0; i < heapCounterCPU; i++) {
-		pointersFreeHash.insert(heapCPU[i]);
-		pointersFreeVec[heapCPU[i]] = FREE_ENTRY;
-	}
-	if (pointersFreeHash.size() != heapCounterCPU) {
-		throw std::runtime_error("ERROR: duplicate free pointers in heap array");
-	}
-		
-
-	unsigned int numOccupied = 0;
-	unsigned int numMinusOne = 0;
-	//unsigned int listOverallFound = 0;
-
-	std::list<myint3Voxel> l;
-	//std::vector<myint3Voxel> v;
-	
-	for (unsigned int i = 0; i < m_hashParams.m_hashNumBuckets; i++) {
-		if (hashCPU[i].ptr == -1) {
-			numMinusOne++;
-		}
-
-		if (hashCPU[i].ptr != -2) {
-			numOccupied++;	// != FREE_ENTRY
-			myint3Voxel a;	
-			a.x = hashCPU[i].pos.x;
-			a.y = hashCPU[i].pos.y;
-			a.z = hashCPU[i].pos.z;
-			l.push_back(a);
-			//v.push_back(a);
-
-			unsigned int linearBlockSize = m_hashParams.m_SDFBlockSize*m_hashParams.m_SDFBlockSize*m_hashParams.m_SDFBlockSize;
-			if (pointersFreeHash.find(hashCPU[i].ptr / linearBlockSize) != pointersFreeHash.end()) {
-				throw std::runtime_error("ERROR: ptr is on free heap, but also marked as an allocated entry");
-			}
-			pointersFreeVec[hashCPU[i].ptr / linearBlockSize] = LOCK_ENTRY;
-		}
-	}
-
-	unsigned int numHeapFree = 0;
-	unsigned int numHeapOccupied = 0;
-	for (unsigned int i = 0; i < m_hashParams.m_numSDFBlocks; i++) {
-		if		(pointersFreeVec[i] == FREE_ENTRY) numHeapFree++;
-		else if (pointersFreeVec[i] == LOCK_ENTRY) numHeapOccupied++;
-		else {
-			throw std::runtime_error("memory leak detected: neither free nor allocated");
-		}
-	}
-	if (numHeapFree + numHeapOccupied == m_hashParams.m_numSDFBlocks) std::cout << "HEAP OK!" << std::endl;
-	else throw std::runtime_error("HEAP CORRUPTED");
-
-	l.sort();
-	size_t sizeBefore = l.size();
-	l.unique();
-	size_t sizeAfter = l.size();
-
-
-	std::cout << "diff: " << sizeBefore - sizeAfter << std::endl;
-	std::cout << "minOne: " << numMinusOne << std::endl;
-	std::cout << "numOccupied: " << numOccupied << "\t numFree: " << getHeapFreeCount() << std::endl;
-	std::cout << "numOccupied + free: " << numOccupied + getHeapFreeCount() << std::endl;
-	std::cout << "numInFrustum: " << m_hashParams.m_numOccupiedBlocks << std::endl;
-
-	SAFE_DELETE_ARRAY(heapCPU);
-	SAFE_DELETE_ARRAY(hashCPU);
-
-	//getchar();
-}
-
 HashParams SceneRep::_parametersFromConfig() {
 	//auto &cfg = ftl::config::resolve(config);
 	HashParams params;
@@ -404,15 +318,15 @@ void SceneRep::_destroy() {
 	m_hashData.free();
 }
 
-void SceneRep::_alloc(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t stream) {
+void SceneRep::_alloc(int camid, cudaStream_t stream) {
 	// NOTE (nick): We might want this later...
-	if (false) {
+	/*if (false) {
 		// TODO(Nick) Make this work without memcpy to host first
-		//allocate until all blocks are allocated
-		unsigned int prevFree = getHeapFreeCount();
+		allocate until all blocks are allocated
+		unsigned int prevFree = 0; //getHeapFreeCount();
 		while (1) {
 			resetHashBucketMutexCUDA(m_hashData, m_hashParams, stream);
-			allocCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams, stream);
+			allocCUDA(m_hashData, m_hashParams, camid, cameras_[camid].params, stream);
 
 			unsigned int currFree = getHeapFreeCount();
 
@@ -424,16 +338,16 @@ void SceneRep::_alloc(const DepthCameraData& depthCameraData, const DepthCameraP
 			}
 		}
 	}
-	else {
+	else {*/
 		//this version is faster, but it doesn't guarantee that all blocks are allocated (staggers alloc to the next frame)
 		resetHashBucketMutexCUDA(m_hashData, m_hashParams, stream);
-		allocCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams, stream);
-	}
+		allocCUDA(m_hashData, m_hashParams, camid, cameras_[camid].params, stream);
+	//}
 }
 
 
 void SceneRep::_compactifyVisible(const DepthCameraParams &camera) { //const DepthCameraData& depthCameraData) {
-	ftl::cuda::compactifyVisible(m_hashData, m_hashParams, camera, integ_stream_);		//this version uses atomics over prefix sums, which has a much better performance
+	ftl::cuda::compactifyOccupied(m_hashData, m_hashParams, integ_stream_);		//this version uses atomics over prefix sums, which has a much better performance
 	//m_hashData.updateParams(m_hashParams);	//make sure numOccupiedBlocks is updated on the GPU
 }
 
@@ -443,13 +357,17 @@ void SceneRep::_compactifyAllocated() {
 	//m_hashData.updateParams(m_hashParams);	//make sure numOccupiedBlocks is updated on the GPU
 }
 
-void SceneRep::_integrateDepthMap(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams) {
+/*void SceneRep::_integrateDepthMap(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams) {
 	if (!reg_mode_) ftl::cuda::integrateDepthMap(m_hashData, m_hashParams, depthCameraData, depthCameraParams, integ_stream_);
 	else ftl::cuda::integrateRegistration(m_hashData, m_hashParams, depthCameraData, depthCameraParams, integ_stream_);
+}*/
+
+void SceneRep::_integrateDepthMaps() {
+	ftl::cuda::integrateDepthMaps(m_hashData, m_hashParams, cameras_.size(), integ_stream_);
 }
 
 void SceneRep::_garbageCollect() {
-	ftl::cuda::garbageCollectIdentify(m_hashData, m_hashParams, integ_stream_);
+	//ftl::cuda::garbageCollectIdentify(m_hashData, m_hashParams, integ_stream_);
 	resetHashBucketMutexCUDA(m_hashData, m_hashParams, integ_stream_);	//needed if linked lists are enabled -> for memeory deletion
 	ftl::cuda::garbageCollectFree(m_hashData, m_hashParams, integ_stream_);
 }
diff --git a/components/rgbd-sources/src/realsense_source.cpp b/components/rgbd-sources/src/realsense_source.cpp
index a4f34c2360c3d3cf9c528277f1cc5f21278af905..64b0d328274a61b8baeca4c2706b38de8f521fe4 100644
--- a/components/rgbd-sources/src/realsense_source.cpp
+++ b/components/rgbd-sources/src/realsense_source.cpp
@@ -1,5 +1,7 @@
 #include "realsense_source.hpp"
 #include <loguru.hpp>
+#include <ftl/threads.hpp>
+#include <ftl/rgbd/source.hpp>
 
 using ftl::rgbd::detail::RealsenseSource;
 using std::string;
@@ -41,7 +43,7 @@ RealsenseSource::~RealsenseSource() {
 bool RealsenseSource::grab(int n, int b) {
     rs2::frameset frames = pipe_.wait_for_frames();
     //rs2::align align(RS2_STREAM_DEPTH);
-    frames = align_to_depth_.process(frames); //align_to_depth_.process(frames);
+    //frames = align_to_depth_.process(frames); //align_to_depth_.process(frames);
 
     rs2::depth_frame depth = frames.get_depth_frame();
     float w = depth.get_width();