Merge branch 'feature/voxelhash-altrender' into 'feature/voxelhash'

Feature/voxelhash altrender See merge request nicolas.pope/ftl!11

Merge branch 'feature/voxelhash-altrender' into 'feature/voxelhash'
901b66d8 · Nicolas Pope · 18627aa3 · 1313392f · 901b66d8 · 901b66d8
Commit 901b66d8 authored 6 years ago by Nicolas Pope
--- a/applications/reconstruct/include/ftl/cuda_operators.hpp
+++ b/applications/reconstruct/include/ftl/cuda_operators.hpp
@@ -271,6 +271,16 @@ inline __host__ __device__ float3 make_float3(float4 f)
    return make_float3(f.x,f.y,f.z);
 }

+inline __host__ __device__ float3 make_float3(uchar3 c)
+{
+    return make_float3(static_cast<float>(c.x), static_cast<float>(c.y), static_cast<float>(c.z));
+}
+
+inline __host__ __device__ uchar3 make_uchar3(float3 f)
+{
+    return make_uchar3(static_cast<unsigned char>(f.x), static_cast<unsigned char>(f.y), static_cast<unsigned char>(f.z));
+}
+
 inline __host__ __device__ float3 make_float3(float f)
 {
    return make_float3(f,f,f);

--- a/applications/reconstruct/include/ftl/ray_cast_sdf.hpp
+++ b/applications/reconstruct/include/ftl/ray_cast_sdf.hpp
 #pragma once

+#include <ftl/configurable.hpp>
 #include <ftl/matrix_conversion.hpp>
 #include <ftl/cuda_matrix_util.hpp>
 #include <ftl/depth_camera.hpp>
@@ -8,23 +9,24 @@

 // #include "DX11RayIntervalSplatting.h"

-class CUDARayCastSDF
+class CUDARayCastSDF : public ftl::Configurable
 {
 public:
-	CUDARayCastSDF(const RayCastParams& params) {
-		create(params);
+	CUDARayCastSDF(nlohmann::json& config) : ftl::Configurable(config) {
+		create(parametersFromConfig(config));
+		hash_render_ = config.value("hash_renderer", false);
 	}

 	~CUDARayCastSDF(void) {
 		destroy();
 	}

-	static RayCastParams parametersFromConfig(const nlohmann::json& gas, const Eigen::Matrix4f& intrinsics, const Eigen::Matrix4f& intrinsicsInv) {
+	static RayCastParams parametersFromConfig(const nlohmann::json& gas) {
 		RayCastParams params;
 		params.m_width = gas["adapterWidth"].get<unsigned int>();
 		params.m_height = gas["adapterHeight"].get<unsigned int>();
-		params.m_intrinsics = MatrixConversion::toCUDA(intrinsics);
-		params.m_intrinsicsInverse = MatrixConversion::toCUDA(intrinsicsInv);
+		params.m_intrinsics = MatrixConversion::toCUDA(Eigen::Matrix4f());
+		params.m_intrinsicsInverse = MatrixConversion::toCUDA(Eigen::Matrix4f());
 		params.m_minDepth = gas["sensorDepthMin"].get<float>();
 		params.m_maxDepth = gas["sensorDepthMax"].get<float>();
 		params.m_rayIncrement = gas["SDFRayIncrementFactor"].get<float>() * gas["SDFTruncation"].get<float>();
@@ -59,6 +61,7 @@ private:

 	RayCastParams m_params;
 	RayCastData m_data;
+	bool hash_render_;

 	// DX11RayIntervalSplatting m_rayIntervalSplatting;
 };
--- a/applications/reconstruct/src/main.cpp
+++ b/applications/reconstruct/src/main.cpp
@@ -388,16 +388,15 @@ static void run() {
 	vector<PointCloud<PointXYZRGB>::Ptr> clouds(inputs.size());
 	Display display_merged(config["display"], "Merged"); // todo

-	auto rayparams = CUDARayCastSDF::parametersFromConfig(config["voxelhash"], Eigen::Matrix4f(), Eigen::Matrix4f());
-	CUDARayCastSDF rays(rayparams);
+	CUDARayCastSDF rays(config["voxelhash"]);

 	LOG(INFO) << "About to create scene";
 	ftl::voxhash::SceneRep scene(config["voxelhash"]);
 	LOG(INFO) << "Scene created";

-	float3 *point_array = new float3[rayparams.m_width*rayparams.m_height];
+	float3 *point_array = new float3[rays.getRayCastParams().m_width*rays.getRayCastParams().m_height];
 	//uchar3 *colour_array = new uchar3[rayparams.m_width*rayparams.m_height];
-	cv::Mat colour_array(cv::Size(rayparams.m_width,rayparams.m_height), CV_8UC3);
+	cv::Mat colour_array(cv::Size(rays.getRayCastParams().m_width,rays.getRayCastParams().m_height), CV_8UC3);

 	float bounce = 0.0;
 	int bounce_dir = 1;
@@ -414,16 +413,16 @@ static void run() {
 		PointCloud<PointXYZRGB>::Ptr cloud(new PointCloud<PointXYZRGB>);

 		// TODO (Nick) Should be able to only partially clear...
-		LOG(INFO) << "RESET";
+		//LOG(INFO) << "RESET";
 		//scene.reset();
-		//scene.nextFrame();
+		scene.nextFrame();
 		
 		for (size_t i = 0; i < inputs.size(); i++) {
 			//if (i == 1) continue;
 			//Display &display = displays[i];
 			RGBDSource *input = inputs[i].source;
 			Mat rgb, depth;
-			LOG(INFO) << "GetRGB";
+			//LOG(INFO) << "GetRGB";
 			input->getRGBD(rgb,depth);
 			
 			//if (!display.active()) continue;
@@ -443,12 +442,12 @@ static void run() {

 			inputs[i].params.flags = frameCount;

-			LOG(INFO) << "Upload params";
+			//LOG(INFO) << "Upload params";
 			inputs[i].gpu.updateParams(inputs[i].params);
-			LOG(INFO) << "Upload data";
+			//LOG(INFO) << "Upload data";
 			inputs[i].gpu.updateData(depth, rgba);

-			LOG(INFO) << "Scene integration: " << i;
+			//LOG(INFO) << "Scene integration: " << i;
 			scene.integrate(inputs[i].source->getPose(), inputs[i].gpu, inputs[i].params, nullptr);
 			//LOG(INFO) << "Scene integration complete";
 		}
@@ -474,11 +473,11 @@ static void run() {

 		//viewPose = inputs[0].source->getPose();

-		LOG(INFO) << "Render vertex data";
+		//LOG(INFO) << "Render vertex data";
 		rays.render(scene.getHashData(), scene.getHashParams(), inputs[0].gpu, viewPose);
-		LOG(INFO) << "Download points";
+		//LOG(INFO) << "Download points";
 		
-		rays.getRayCastData().download(point_array, (uchar3*)colour_array.data, rayparams);
+		rays.getRayCastData().download(point_array, (uchar3*)colour_array.data, rays.getRayCastParams());

 		int pc = 0;


--- a/applications/reconstruct/src/ray_cast_sdf.cpp
+++ b/applications/reconstruct/src/ray_cast_sdf.cpp
@@ -20,6 +20,9 @@ extern "C" void resetRayIntervalSplatCUDA(RayCastData& data, const RayCastParams
 extern "C" void rayIntervalSplatCUDA(const ftl::voxhash::HashData& hashData, const DepthCameraData& cameraData,
 								 const RayCastData &rayCastData, const RayCastParams &rayCastParams);

+extern "C" void nickRenderCUDA(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const RayCastData &rayCastData, const DepthCameraData &cameraData, const RayCastParams &params);
+
+

 void CUDARayCastSDF::create(const RayCastParams& params)
 {
@@ -40,7 +43,8 @@ void CUDARayCastSDF::render(const ftl::voxhash::HashData& hashData, const ftl::v
 	//m_data.d_rayIntervalSplatMinArray = m_rayIntervalSplatting.mapMinToCuda();
 	//m_data.d_rayIntervalSplatMaxArray = m_rayIntervalSplatting.mapMaxToCuda();

-	renderCS(hashData, m_data, cameraData, m_params);
+	if (hash_render_) nickRenderCUDA(hashData, hashParams, m_data, cameraData, m_params);
+	else renderCS(hashData, m_data, cameraData, m_params);

 	//convertToCameraSpace(cameraData);
 	if (!m_params.m_useGradients)

--- a/applications/reconstruct/src/ray_cast_sdf.cu
+++ b/applications/reconstruct/src/ray_cast_sdf.cu
@@ -74,6 +74,278 @@ extern "C" void renderCS(const ftl::voxhash::HashData& hashData, const RayCastDa
 #endif
 }

+////////////////////////////////////////////////////////////////////////////////
+//  Nicks render approach
+////////////////////////////////////////////////////////////////////////////////
+
+__global__ void clearDepthKernel(ftl::voxhash::HashData hashData, RayCastData rayCastData, DepthCameraData cameraData) 
+{
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	const RayCastParams& rayCastParams = c_rayCastParams;
+
+	if (x < rayCastParams.m_width && y < rayCastParams.m_height) {
+		rayCastData.d_depth[y*rayCastParams.m_width+x] = PINF;
+		rayCastData.d_colors[y*rayCastParams.m_width+x] = make_uchar3(0,0,0);
+	}
+}
+
+#define SDF_BLOCK_SIZE_PAD 9
+#define SDF_BLOCK_BUFFER 1024  // > 9x9x9
+#define SDF_DX 1
+#define SDF_DY SDF_BLOCK_SIZE_PAD
+#define SDF_DZ (SDF_BLOCK_SIZE_PAD*SDF_BLOCK_SIZE_PAD)
+
+__device__
+float frac(float val) {
+	return (val - floorf(val));
+}
+__device__
+float3 frac(const float3& val) {
+		return make_float3(frac(val.x), frac(val.y), frac(val.z));
+}
+
+__host__ size_t nickSharedMem() {
+	return sizeof(float)*SDF_BLOCK_BUFFER +
+		sizeof(uchar)*SDF_BLOCK_BUFFER +
+		sizeof(float)*SDF_BLOCK_BUFFER +
+		sizeof(float3)*SDF_BLOCK_BUFFER;
+}
+
+/*__device__ void loadVoxel(const ftl::voxhash::HashData &hash, const int3 &vox, float *sdf, uint *weight, float3 *colour) {
+	ftl::voxhash::Voxel &v = hashData.getVoxel(vox);
+	*sdf = v.sdf;
+	*weight = v.weight;
+	*colour = v.color;
+}*/
+
+//! computes the (local) virtual voxel pos of an index; idx in [0;511]
+__device__ 
+int3 pdelinVoxelIndex(uint idx)	{
+	int x = idx % SDF_BLOCK_SIZE_PAD;
+	int y = (idx % (SDF_BLOCK_SIZE_PAD * SDF_BLOCK_SIZE_PAD)) / SDF_BLOCK_SIZE_PAD;
+	int z = idx / (SDF_BLOCK_SIZE_PAD * SDF_BLOCK_SIZE_PAD);	
+	return make_int3(x,y,z);
+}
+
+//! computes the linearized index of a local virtual voxel pos; pos in [0;7]^3
+__device__ 
+uint plinVoxelPos(const int3& virtualVoxelPos) {
+	return  
+		virtualVoxelPos.z * SDF_BLOCK_SIZE_PAD * SDF_BLOCK_SIZE_PAD +
+		virtualVoxelPos.y * SDF_BLOCK_SIZE_PAD +
+		virtualVoxelPos.x;
+}
+
+__device__  
+void deleteVoxel(ftl::voxhash::Voxel& v) {
+	v.color = make_uchar3(0,0,0);
+	v.weight = 0;
+	v.sdf = PINF;
+}
+
+__device__ inline int3 blockDelinear(const int3 &base, uint i) {
+	return make_int3(base.x + (i & 0x1), base.y + (i & 0x2), base.z + (i & 0x4));
+}
+
+__device__ inline uint blockLinear(int x, int y, int z) {
+	return x + (y << 1) + (z << 2);
+}
+
+__device__ inline void trilinearInterp(const ftl::voxhash::HashData &hashData, const ftl::voxhash::Voxel *voxels, const uint *ix, const float3 &pos, float &depth, uchar3 &colour) {
+	float3 colorFloat = make_float3(0.0f, 0.0f, 0.0f);
+	const float3 weight = frac(hashData.worldToVirtualVoxelPosFloat(pos));  // Should be world position of ray, not voxel??
+	float dist = 0.0f;
+	dist+= (1.0f-weight.x)*(1.0f-weight.y)*(1.0f-weight.z)*voxels[ix[0]].sdf; colorFloat+= (1.0f-weight.x)*(1.0f-weight.y)*(1.0f-weight.z)*make_float3(voxels[ix[0]].color);
+	dist+=	   weight.x *(1.0f-weight.y)*(1.0f-weight.z)*voxels[ix[1]].sdf; colorFloat+=	   weight.x *(1.0f-weight.y)*(1.0f-weight.z)*make_float3(voxels[ix[1]].color);
+	dist+= (1.0f-weight.x)*	   weight.y *(1.0f-weight.z)*voxels[ix[2]].sdf; colorFloat+= (1.0f-weight.x)*	   weight.y *(1.0f-weight.z)*make_float3(voxels[ix[2]].color);
+	dist+= (1.0f-weight.x)*(1.0f-weight.y)*	   weight.z *voxels[ix[3]].sdf; colorFloat+= (1.0f-weight.x)*(1.0f-weight.y)*	   weight.z *make_float3(voxels[ix[3]].color);
+	dist+=	   weight.x *	   weight.y *(1.0f-weight.z)*voxels[ix[4]].sdf; colorFloat+=	   weight.x *	   weight.y *(1.0f-weight.z)*make_float3(voxels[ix[4]].color);
+	dist+= (1.0f-weight.x)*	   weight.y *	   weight.z *voxels[ix[5]].sdf; colorFloat+= (1.0f-weight.x)*	   weight.y *	   weight.z *make_float3(voxels[ix[5]].color);
+	dist+=	   weight.x *(1.0f-weight.y)*	   weight.z *voxels[ix[6]].sdf; colorFloat+=	   weight.x *(1.0f-weight.y)*	   weight.z *make_float3(voxels[ix[6]].color);
+	dist+=	   weight.x *	   weight.y *	   weight.z *voxels[ix[7]].sdf; colorFloat+=	   weight.x *	   weight.y *	   weight.z *make_float3(voxels[ix[7]].color);
+	depth = dist;
+	colour = make_uchar3(colorFloat);
+}
+
+__global__ void nickRenderKernel(ftl::voxhash::HashData hashData, RayCastData rayCastData, DepthCameraData cameraData, RayCastParams params) {
+	// TODO(Nick) Reduce bank conflicts by aligning these
+	__shared__ ftl::voxhash::Voxel voxels[SDF_BLOCK_BUFFER];
+	__shared__ ftl::voxhash::HashEntry blocks[8];
+
+	const uint i = threadIdx.x;	//inside of an SDF block
+
+	//TODO (Nick) Either don't use compactified or re-run compacitification using render cam frustrum
+	if (i == 0) blocks[0] = hashData.d_hashCompactified[blockIdx.x];
+	if (i >= 1 && i <= 7) blocks[i] = hashData.getHashEntryForSDFBlockPos(blockDelinear(blocks[0].pos, i));
+
+	// Make sure all hash entries are cached
+	__syncthreads();
+
+	const int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(blocks[0].pos);
+	const int3 vp = make_int3(hashData.delinearizeVoxelIndex(i));
+	const int3 pi = pi_base + vp;
+	const uint j = plinVoxelPos(vp);  // Padded linear index
+	const float3 worldPos = hashData.virtualVoxelPosToWorld(pi);
+
+	// Load distances and colours into shared memory + padding
+	const ftl::voxhash::Voxel &v = hashData.d_SDFBlocks[blocks[0].ptr + i];
+	voxels[j] = v;
+
+	// TODO (Nick) Load padding also
+	// Currently does not load corners and some threads load two or three
+	if (vp.x == 7) {
+		ftl::voxhash::Voxel &padVox = voxels[plinVoxelPos(make_int3(vp.x+1,vp.y,vp.z))];
+		const uint ii = hashData.linearizeVoxelPos(make_int3(0,vp.y,vp.z));
+		//padVox = hashData.getVoxel(make_int3(pi.x+1,pi.y,pi.z));
+		if (blocks[blockLinear(1,0,0)].ptr != ftl::voxhash::FREE_ENTRY) padVox = hashData.d_SDFBlocks[blocks[blockLinear(1,0,0)].ptr + ii];
+		else deleteVoxel(padVox);
+	}
+	if (vp.y == 7) {
+		ftl::voxhash::Voxel &padVox = voxels[plinVoxelPos(make_int3(vp.x,vp.y+1,vp.z))];
+		const uint ii = hashData.linearizeVoxelPos(make_int3(vp.x,0,vp.z));
+		//padVox = hashData.getVoxel(make_int3(pi.x,pi.y+1,pi.z));
+		if (blocks[blockLinear(0,1,0)].ptr != ftl::voxhash::FREE_ENTRY) padVox = hashData.d_SDFBlocks[blocks[blockLinear(0,1,0)].ptr + ii];
+		else deleteVoxel(padVox);
+	}
+	if (vp.z == 7) {
+		ftl::voxhash::Voxel &padVox = voxels[plinVoxelPos(make_int3(vp.x,vp.y,vp.z+1))];
+		const uint ii = hashData.linearizeVoxelPos(make_int3(vp.x,vp.y,0));
+		//padVox = hashData.getVoxel(make_int3(pi.x,pi.y,pi.z+1));
+		if (blocks[blockLinear(0,0,1)].ptr != ftl::voxhash::FREE_ENTRY) padVox = hashData.d_SDFBlocks[blocks[blockLinear(0,0,1)].ptr + ii];
+		else deleteVoxel(padVox);
+	}
+
+	// Indexes of the 8 neighbor voxels in one direction
+	const uint ix[8] = {
+		j, j+SDF_DX, j+SDF_DY, j+SDF_DZ, j+SDF_DX+SDF_DY, j+SDF_DY+SDF_DZ,
+		j+SDF_DX+SDF_DZ, j+SDF_DX+SDF_DY+SDF_DZ
+	};
+
+	__syncthreads();
+
+	// If any weight is 0, skip this voxel
+	const bool missweight = voxels[ix[0]].weight == 0 || voxels[ix[1]].weight == 0 || voxels[ix[2]].weight == 0 ||
+			voxels[ix[3]].weight == 0 || voxels[ix[4]].weight == 0 || voxels[ix[5]].weight == 0 ||
+			voxels[ix[6]].weight == 0 || voxels[ix[7]].weight == 0;
+	if (missweight) return;
+
+	// Trilinear Interpolation (simple and fast)
+	/*float3 colorFloat = make_float3(0.0f, 0.0f, 0.0f);
+	const float3 weight = frac(hashData.worldToVirtualVoxelPosFloat(worldPos));  // Should be world position of ray, not voxel??
+	float dist = 0.0f;
+	dist+= (1.0f-weight.x)*(1.0f-weight.y)*(1.0f-weight.z)*voxels[ix[0]].sdf; colorFloat+= (1.0f-weight.x)*(1.0f-weight.y)*(1.0f-weight.z)*make_float3(voxels[ix[0]].color);
+	dist+=	   weight.x *(1.0f-weight.y)*(1.0f-weight.z)*voxels[ix[1]].sdf; colorFloat+=	   weight.x *(1.0f-weight.y)*(1.0f-weight.z)*make_float3(voxels[ix[1]].color);
+	dist+= (1.0f-weight.x)*	   weight.y *(1.0f-weight.z)*voxels[ix[2]].sdf; colorFloat+= (1.0f-weight.x)*	   weight.y *(1.0f-weight.z)*make_float3(voxels[ix[2]].color);
+	dist+= (1.0f-weight.x)*(1.0f-weight.y)*	   weight.z *voxels[ix[3]].sdf; colorFloat+= (1.0f-weight.x)*(1.0f-weight.y)*	   weight.z *make_float3(voxels[ix[3]].color);
+	dist+=	   weight.x *	   weight.y *(1.0f-weight.z)*voxels[ix[4]].sdf; colorFloat+=	   weight.x *	   weight.y *(1.0f-weight.z)*make_float3(voxels[ix[4]].color);
+	dist+= (1.0f-weight.x)*	   weight.y *	   weight.z *voxels[ix[5]].sdf; colorFloat+= (1.0f-weight.x)*	   weight.y *	   weight.z *make_float3(voxels[ix[5]].color);
+	dist+=	   weight.x *(1.0f-weight.y)*	   weight.z *voxels[ix[6]].sdf; colorFloat+=	   weight.x *(1.0f-weight.y)*	   weight.z *make_float3(voxels[ix[6]].color);
+	dist+=	   weight.x *	   weight.y *	   weight.z *voxels[ix[7]].sdf; colorFloat+=	   weight.x *	   weight.y *	   weight.z *make_float3(voxels[ix[7]].color);
+
+	// Must finish using colours before updating colours
+	__syncthreads();
+
+	//voxels[j].color = make_uchar3(colorFloat);
+	//voxels[j].sdf = dist;
+
+	// What happens if fitlered voxel is put back?
+	//hashData.d_SDFBlocks[blocks[0].ptr + i] = voxels[j];
+
+	//return;*/
+
+	bool is_surface = false;
+	// Identify surfaces through sign change. Since we only check in one direction
+	// it is fine to check for any sign change?
+#pragma unroll
+	for (int u=0; u<=1; u++) {
+		for (int v=0; v<=1; v++) {
+			for (int w=0; w<=1; w++) {
+				const int3 uvi = make_int3(vp.x+u,vp.y+v,vp.z+w);
+
+				// Skip these cases since we didn't load voxels properly
+				if (uvi.x == 8 && uvi.y == 8 || uvi.x == 8 && uvi.z == 8 || uvi.y == 8 && uvi.z == 8) continue;
+
+				if (signbit(voxels[j].sdf) != signbit(voxels[plinVoxelPos(uvi)].sdf)) {
+					is_surface = true;
+					break;
+				}
+			}
+		}
+	}
+
+	// Only for surface voxels, work out screen coordinates
+	// TODO Could adjust weights, strengthen on surface, weaken otherwise??
+	if (!is_surface) return;
+
+	const float3 camPos = params.m_viewMatrix * worldPos;
+	const float2 screenPosf = cameraData.cameraToKinectScreenFloat(camPos);
+	const uint2 screenPos = make_uint2(make_int2(screenPosf)); //  + make_float2(0.5f, 0.5f)
+
+	/*if (screenPos.x < params.m_width && screenPos.y < params.m_height && 
+			rayCastData.d_depth[(screenPos.y)*params.m_width+screenPos.x] > camPos.z) {
+		rayCastData.d_depth[(screenPos.y)*params.m_width+screenPos.x] = camPos.z;
+		rayCastData.d_colors[(screenPos.y)*params.m_width+screenPos.x] = voxels[j].color;
+	}*/
+
+	//return;
+
+	// For this voxel in hash, get its screen position and check it is on screen
+	// Convert depth map to int by x1000 and use atomicMin
+	//const int pixsize = static_cast<int>(c_hashParams.m_virtualVoxelSize*c_depthCameraParams.fx/camPos.z)+1;
+	int pixsizeX = 10;  // Max voxel pixels
+	int pixsizeY = 10;
+
+	for (int y=0; y<pixsizeY; y++) {
+		for (int x=0; x<pixsizeX; x++) {
+			// TODO(Nick) Within a window, check pixels that have same voxel id
+			// Then trilinear interpolate between current voxel and neighbors.
+			const float3 pixelWorldPos = params.m_viewMatrixInverse * cameraData.kinectDepthToSkeleton(screenPos.x+x,screenPos.y+y, camPos.z);
+			const float3 posInVoxel = (pixelWorldPos - worldPos) / make_float3(c_hashParams.m_virtualVoxelSize,c_hashParams.m_virtualVoxelSize,c_hashParams.m_virtualVoxelSize);
+
+			if (posInVoxel.x >= 1.0f || posInVoxel.y >= 1.0f || posInVoxel.z >= 1.0f) {
+				pixsizeX = x;
+				continue;
+			}
+
+			/*float depth;
+			uchar3 col;
+			trilinearInterp(hashData, voxels, ix, pixelWorldPos, depth, col);*/
+
+			// TODO (Nick) MAKE THIS ATOMIC!!!!
+			if (screenPos.x+x < params.m_width && screenPos.y+y < params.m_height && 
+					rayCastData.d_depth[(screenPos.y+y)*params.m_width+screenPos.x+x] > camPos.z) {
+				rayCastData.d_depth[(screenPos.y+y)*params.m_width+screenPos.x+x] = camPos.z;
+				rayCastData.d_colors[(screenPos.y+y)*params.m_width+screenPos.x+x] = voxels[j].color;
+			}
+		}
+		if (pixsizeX == 0) break;
+	}
+}
+
+extern "C" void nickRenderCUDA(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const RayCastData &rayCastData, const DepthCameraData &cameraData, const RayCastParams &params)
+{
+	const dim3 clear_gridSize((params.m_width + T_PER_BLOCK - 1)/T_PER_BLOCK, (params.m_height + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	clearDepthKernel<<<clear_gridSize, clear_blockSize>>>(hashData, rayCastData, cameraData);
+
+	const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE;
+	const dim3 gridSize(hashParams.m_numOccupiedBlocks, 1);
+	const dim3 blockSize(threadsPerBlock, 1);
+
+	if (hashParams.m_numOccupiedBlocks > 0) {	//this guard is important if there is no depth in the current frame (i.e., no blocks were allocated)
+		nickRenderKernel << <gridSize, blockSize >> >(hashData, rayCastData, cameraData, params);
+	}
+
+	cudaSafeCall( cudaGetLastError() );
+	#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	cutilCheckMsg(__FUNCTION__);
+	#endif
+}
+

 /////////////////////////////////////////////////////////////////////////
 // ray interval splatting

--- a/applications/reconstruct/src/scene_rep_hash_sdf.cu
+++ b/applications/reconstruct/src/scene_rep_hash_sdf.cu
@@ -540,7 +540,7 @@ __global__ void integrateDepthMapKernel(HashData hashData, DepthCameraData camer
 				uint idx = entry.ptr + i;
 				hashData.d_SDFBlocks[idx].weight = 0;
 				//hashData.d_SDFBlocks[idx].sdf = PINF;
-				hashData.d_SDFBlocks[idx].color = make_uchar3(255,0,0);
+				hashData.d_SDFBlocks[idx].color = make_uchar3(0,0,0);
 			}
 			else if (sdf > -truncation) // && depthZeroOne >= 0.0f && depthZeroOne <= 1.0f) //check if in truncation range should already be made in depth map computation
 			{