From a414414f2cd0ffd6a24c994f1d26a61bdba15760 Mon Sep 17 00:00:00 2001
From: Nicolas Pope <nwpope@utu.fi>
Date: Sun, 21 Jul 2019 18:49:49 +0300
Subject: [PATCH] Working MLS with splat bug fix

---
 .../reconstruct/include/ftl/voxel_hash.hpp    |  2 +-
 applications/reconstruct/src/dibr.cu          | 12 +++++-
 applications/reconstruct/src/integrators.cu   | 10 ++---
 applications/reconstruct/src/splat_render.cu  | 39 ++++++++++++-------
 4 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/applications/reconstruct/include/ftl/voxel_hash.hpp b/applications/reconstruct/include/ftl/voxel_hash.hpp
index 40b981740..acad30d41 100644
--- a/applications/reconstruct/include/ftl/voxel_hash.hpp
+++ b/applications/reconstruct/include/ftl/voxel_hash.hpp
@@ -73,7 +73,7 @@ struct __align__(16) HashEntry
 {
 	HashEntryHead head;
 	uint voxels[16];  // 512 bits, 1 bit per voxel
-	uint validity[16];  // Is the voxel valid, 512 bit
+	//uint validity[16];  // Is the voxel valid, 512 bit
 	
 	/*__device__ void operator=(const struct HashEntry& e) {
 		((long long*)this)[0] = ((const long long*)&e)[0];
diff --git a/applications/reconstruct/src/dibr.cu b/applications/reconstruct/src/dibr.cu
index 575e3c45f..d647e8f31 100644
--- a/applications/reconstruct/src/dibr.cu
+++ b/applications/reconstruct/src/dibr.cu
@@ -111,8 +111,16 @@ void ftl::cuda::dibr(const TextureObject<float> &depth_in,
     const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
     const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
-    clearColourKernel<<<gridSize, blockSize, 0, stream>>>(colour_out);
+	clearColourKernel<<<gridSize, blockSize, 0, stream>>>(colour_out);
+	
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
 
     dibr_kernel_rev<<<gridSize, blockSize, 0, stream>>>(depth_in, colour_out, numcams, params);
-    cudaSafeCall( cudaGetLastError() );
+	cudaSafeCall( cudaGetLastError() );
+	
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
 }
diff --git a/applications/reconstruct/src/integrators.cu b/applications/reconstruct/src/integrators.cu
index 9abb28c84..f907b6024 100644
--- a/applications/reconstruct/src/integrators.cu
+++ b/applications/reconstruct/src/integrators.cu
@@ -189,7 +189,7 @@ __global__ void integrateDepthMapsKernel(HashData hashData, HashParams hashParam
 
 __global__ void integrateMLSKernel(HashData hashData, HashParams hashParams, int numcams) {
 	__shared__ uint voxels[16];
-	__shared__ uint valid[16];
+	//__shared__ uint valid[16];
 
 	const uint i = threadIdx.x;	//inside of an SDF block
 	const int3 po = make_int3(hashData.delinearizeVoxelIndex(i));
@@ -252,13 +252,13 @@ __global__ void integrateMLSKernel(HashData hashData, HashParams hashParams, int
 
 	// Calculate voxel sign values across a warp
 	int warpNum = i / WARP_SIZE;
-	uint solid_ballot = __ballot_sync(0xFFFFFFFF, (sdf >= 0.0f) ? 0 : 1);
-	uint valid_ballot = __ballot_sync(0xFFFFFFFF, (weights >= 2.0f) ? 1 : 0);
+	uint solid_ballot = __ballot_sync(0xFFFFFFFF, (fabs(sdf) < 0.04f && weights > 10.0f) ? 1 : 0);
+	//uint valid_ballot = __ballot_sync(0xFFFFFFFF, (weights >= 1.0f) ? 1 : 0);
 
 	// Aggregate each warp result into voxel mask
 	if (i % WARP_SIZE == 0) {
 		voxels[warpNum] = solid_ballot;
-		valid[warpNum] = valid_ballot;
+		//valid[warpNum] = valid_ballot;
 	}
 
 	__syncthreads();
@@ -268,7 +268,7 @@ __global__ void integrateMLSKernel(HashData hashData, HashParams hashParams, int
 	if (i < 16) {
 		const uint v = voxels[i];
 		hashData.d_hashCompactified[bi]->voxels[i] = v;
-		hashData.d_hashCompactified[bi]->validity[i] = valid[i];
+		//hashData.d_hashCompactified[bi]->validity[i] = valid[i];
 		const uint mask = 0x0000FFFF;
 		uint b1 = __ballot_sync(mask, v == 0xFFFFFFFF);
 		uint b2 = __ballot_sync(mask, v == 0);
diff --git a/applications/reconstruct/src/splat_render.cu b/applications/reconstruct/src/splat_render.cu
index 58e4bbf2a..9fcb55de0 100644
--- a/applications/reconstruct/src/splat_render.cu
+++ b/applications/reconstruct/src/splat_render.cu
@@ -79,7 +79,7 @@ __device__ inline bool getVoxel(uint *voxels, int ix) {
 __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, TextureObject<uint> depth, SplatParams params) {
 	// TODO:(Nick) Reduce bank conflicts by aligning these
 	__shared__ uint voxels[16];
-	__shared__ uint valid[16];
+	//__shared__ uint valid[16];
 	__shared__ ftl::voxhash::HashEntryHead block;
 
 	// Stride over all allocated blocks
@@ -91,7 +91,7 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture
 	if (i == 0) block = hashData.d_hashCompactified[bi]->head;
 	if (i < 16) {
 		voxels[i] = hashData.d_hashCompactified[bi]->voxels[i];
-		valid[i] = hashData.d_hashCompactified[bi]->validity[i];
+		//valid[i] = hashData.d_hashCompactified[bi]->validity[i];
 	}
 
 	// Make sure all hash entries are cached
@@ -119,10 +119,10 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture
 	int edgeZ = (vp.z == 0 ) ? 1 : 0;
 
 	uchar4 color = make_uchar4(255,0,0,255);
-	bool is_surface = false; //((params.m_flags & ftl::render::kShowBlockBorders) && edgeX + edgeY + edgeZ >= 2);
+	bool is_surface = v; //((params.m_flags & ftl::render::kShowBlockBorders) && edgeX + edgeY + edgeZ >= 2);
 	//if (is_surface) color = make_uchar4(255,(vp.x == 0 && vp.y == 0 && vp.z == 0) ? 255 : 0,0,255);
 
-	if (!getVoxel(valid, i)) continue;
+	//if (!v) continue;  // !getVoxel(valid, i)
 
 	//if (vp.z == 7) voxels[j].color = make_uchar3(0,255,(voxels[j].sdf < 0.0f) ? 255 : 0);
 
@@ -130,7 +130,7 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture
 	// it is fine to check for any sign change?
 
 
-#pragma unroll
+/*#pragma unroll
 	for (int u=0; u<=1; u++) {
 		for (int v=0; v<=1; v++) {
 			for (int w=0; w<=1; w++) {
@@ -140,13 +140,13 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture
 				//if (uvi.x == 8 || uvi.z == 8 || uvi.y == 8) continue;
 
 				const bool vox = getVoxel(voxels, hashData.linearizeVoxelPos(uvi));
-				if (getVoxel(valid, hashData.linearizeVoxelPos(uvi))) {
+				if (vox) { //getVoxel(valid, hashData.linearizeVoxelPos(uvi))) {
 					is_surface = true;
 					// Should break but is slower?
 				}
 			}
 		}
-	}
+	}*/
 
 	// Only for surface voxels, work out screen coordinates
 	if (!is_surface) continue;
@@ -186,7 +186,9 @@ void ftl::cuda::isosurface_point_image(const ftl::voxhash::HashData& hashData,
 
 	clearDepthKernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(hashData, depth);
 
-	//cudaSafeCall( cudaDeviceSynchronize() );
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
 
 	const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE;
 	const dim3 gridSize(NUM_CUDA_BLOCKS, 1);
@@ -195,7 +197,10 @@ void ftl::cuda::isosurface_point_image(const ftl::voxhash::HashData& hashData,
 	isosurface_image_kernel<<<gridSize, blockSize, 0, stream>>>(hashData, depth, params);
 
 	cudaSafeCall( cudaGetLastError() );
-	//cudaSafeCall( cudaDeviceSynchronize() );
+
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
 }
 
 // ---- Pass 2: Expand the point splats ----------------------------------------
@@ -203,7 +208,7 @@ void ftl::cuda::isosurface_point_image(const ftl::voxhash::HashData& hashData,
 #define SPLAT_RADIUS 7
 #define SPLAT_BOUNDS (2*SPLAT_RADIUS+T_PER_BLOCK+1)
 #define SPLAT_BUFFER_SIZE (SPLAT_BOUNDS*SPLAT_BOUNDS)
-#define MAX_VALID 8
+#define MAX_VALID 100
 
 __device__ float distance2(float3 a, float3 b) {
 	const float x = a.x-b.x;
@@ -244,7 +249,7 @@ __global__ void splatting_kernel(
 
 	__syncthreads();
 
-	if (x >= depth_in.width() && y >= depth_in.height()) return;
+	if (x >= depth_in.width() || y >= depth_in.height()) return;
 
 	const float voxelSquared = params.voxelSize*params.voxelSize;
 	float mindepth = 1000.0f;
@@ -270,7 +275,7 @@ __global__ void splatting_kernel(
 			if (dist < voxelSquared) {
 				// Valid so check for minimum
 				//validPos[validix] = pos;
-				validIndices[validix++] = idx;
+				//validIndices[validix++] = idx;
 				if (d < mindepth) {
 					mindepth = d;
 					minidx = idx;
@@ -291,8 +296,8 @@ __global__ void splatting_kernel(
 	float contrib = 0.0f;
 	float3 pos = params.camera.kinectDepthToSkeleton(x, y, mindepth);  // TODO:(Nick) Mindepth assumption is poor choice.
 
-	for (int j=0; j<validix; ++j) {
-		const int idx = validIndices[j];
+	//for (int j=0; j<validix; ++j) {
+		const int idx = minidx; //validIndices[j];
 		float3 posp = positions[idx];
 		//float3 pos = params.camera.kinectDepthToSkeleton(x, y, posp.z);
 		float3 delta = (posp - pos) / 2*params.voxelSize;
@@ -312,7 +317,7 @@ __global__ void splatting_kernel(
 			contrib += c;
 			depth += posp.z * c;
 		}
-	}
+	//}
 
 	// Normalise
 	//colour.x /= contrib;
@@ -333,4 +338,8 @@ void ftl::cuda::splat_points(const TextureObject<uint> &depth_in,
 
 	splatting_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, params);
 	cudaSafeCall( cudaGetLastError() );
+
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
 }
-- 
GitLab