From a414414f2cd0ffd6a24c994f1d26a61bdba15760 Mon Sep 17 00:00:00 2001 From: Nicolas Pope <nwpope@utu.fi> Date: Sun, 21 Jul 2019 18:49:49 +0300 Subject: [PATCH] Working MLS with splat bug fix --- .../reconstruct/include/ftl/voxel_hash.hpp | 2 +- applications/reconstruct/src/dibr.cu | 12 +++++- applications/reconstruct/src/integrators.cu | 10 ++--- applications/reconstruct/src/splat_render.cu | 39 ++++++++++++------- 4 files changed, 40 insertions(+), 23 deletions(-) diff --git a/applications/reconstruct/include/ftl/voxel_hash.hpp b/applications/reconstruct/include/ftl/voxel_hash.hpp index 40b981740..acad30d41 100644 --- a/applications/reconstruct/include/ftl/voxel_hash.hpp +++ b/applications/reconstruct/include/ftl/voxel_hash.hpp @@ -73,7 +73,7 @@ struct __align__(16) HashEntry { HashEntryHead head; uint voxels[16]; // 512 bits, 1 bit per voxel - uint validity[16]; // Is the voxel valid, 512 bit + //uint validity[16]; // Is the voxel valid, 512 bit /*__device__ void operator=(const struct HashEntry& e) { ((long long*)this)[0] = ((const long long*)&e)[0]; diff --git a/applications/reconstruct/src/dibr.cu b/applications/reconstruct/src/dibr.cu index 575e3c45f..d647e8f31 100644 --- a/applications/reconstruct/src/dibr.cu +++ b/applications/reconstruct/src/dibr.cu @@ -111,8 +111,16 @@ void ftl::cuda::dibr(const TextureObject<float> &depth_in, const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - clearColourKernel<<<gridSize, blockSize, 0, stream>>>(colour_out); + clearColourKernel<<<gridSize, blockSize, 0, stream>>>(colour_out); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif dibr_kernel_rev<<<gridSize, blockSize, 0, stream>>>(depth_in, colour_out, numcams, params); - cudaSafeCall( cudaGetLastError() ); + cudaSafeCall( cudaGetLastError() ); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif } diff --git a/applications/reconstruct/src/integrators.cu b/applications/reconstruct/src/integrators.cu index 9abb28c84..f907b6024 100644 --- a/applications/reconstruct/src/integrators.cu +++ b/applications/reconstruct/src/integrators.cu @@ -189,7 +189,7 @@ __global__ void integrateDepthMapsKernel(HashData hashData, HashParams hashParam __global__ void integrateMLSKernel(HashData hashData, HashParams hashParams, int numcams) { __shared__ uint voxels[16]; - __shared__ uint valid[16]; + //__shared__ uint valid[16]; const uint i = threadIdx.x; //inside of an SDF block const int3 po = make_int3(hashData.delinearizeVoxelIndex(i)); @@ -252,13 +252,13 @@ __global__ void integrateMLSKernel(HashData hashData, HashParams hashParams, int // Calculate voxel sign values across a warp int warpNum = i / WARP_SIZE; - uint solid_ballot = __ballot_sync(0xFFFFFFFF, (sdf >= 0.0f) ? 0 : 1); - uint valid_ballot = __ballot_sync(0xFFFFFFFF, (weights >= 2.0f) ? 1 : 0); + uint solid_ballot = __ballot_sync(0xFFFFFFFF, (fabs(sdf) < 0.04f && weights > 10.0f) ? 1 : 0); + //uint valid_ballot = __ballot_sync(0xFFFFFFFF, (weights >= 1.0f) ? 1 : 0); // Aggregate each warp result into voxel mask if (i % WARP_SIZE == 0) { voxels[warpNum] = solid_ballot; - valid[warpNum] = valid_ballot; + //valid[warpNum] = valid_ballot; } __syncthreads(); @@ -268,7 +268,7 @@ __global__ void integrateMLSKernel(HashData hashData, HashParams hashParams, int if (i < 16) { const uint v = voxels[i]; hashData.d_hashCompactified[bi]->voxels[i] = v; - hashData.d_hashCompactified[bi]->validity[i] = valid[i]; + //hashData.d_hashCompactified[bi]->validity[i] = valid[i]; const uint mask = 0x0000FFFF; uint b1 = __ballot_sync(mask, v == 0xFFFFFFFF); uint b2 = __ballot_sync(mask, v == 0); diff --git a/applications/reconstruct/src/splat_render.cu b/applications/reconstruct/src/splat_render.cu index 58e4bbf2a..9fcb55de0 100644 --- a/applications/reconstruct/src/splat_render.cu +++ b/applications/reconstruct/src/splat_render.cu @@ -79,7 +79,7 @@ __device__ inline bool getVoxel(uint *voxels, int ix) { __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, TextureObject<uint> depth, SplatParams params) { // TODO:(Nick) Reduce bank conflicts by aligning these __shared__ uint voxels[16]; - __shared__ uint valid[16]; + //__shared__ uint valid[16]; __shared__ ftl::voxhash::HashEntryHead block; // Stride over all allocated blocks @@ -91,7 +91,7 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture if (i == 0) block = hashData.d_hashCompactified[bi]->head; if (i < 16) { voxels[i] = hashData.d_hashCompactified[bi]->voxels[i]; - valid[i] = hashData.d_hashCompactified[bi]->validity[i]; + //valid[i] = hashData.d_hashCompactified[bi]->validity[i]; } // Make sure all hash entries are cached @@ -119,10 +119,10 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture int edgeZ = (vp.z == 0 ) ? 1 : 0; uchar4 color = make_uchar4(255,0,0,255); - bool is_surface = false; //((params.m_flags & ftl::render::kShowBlockBorders) && edgeX + edgeY + edgeZ >= 2); + bool is_surface = v; //((params.m_flags & ftl::render::kShowBlockBorders) && edgeX + edgeY + edgeZ >= 2); //if (is_surface) color = make_uchar4(255,(vp.x == 0 && vp.y == 0 && vp.z == 0) ? 255 : 0,0,255); - if (!getVoxel(valid, i)) continue; + //if (!v) continue; // !getVoxel(valid, i) //if (vp.z == 7) voxels[j].color = make_uchar3(0,255,(voxels[j].sdf < 0.0f) ? 255 : 0); @@ -130,7 +130,7 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture // it is fine to check for any sign change? -#pragma unroll +/*#pragma unroll for (int u=0; u<=1; u++) { for (int v=0; v<=1; v++) { for (int w=0; w<=1; w++) { @@ -140,13 +140,13 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture //if (uvi.x == 8 || uvi.z == 8 || uvi.y == 8) continue; const bool vox = getVoxel(voxels, hashData.linearizeVoxelPos(uvi)); - if (getVoxel(valid, hashData.linearizeVoxelPos(uvi))) { + if (vox) { //getVoxel(valid, hashData.linearizeVoxelPos(uvi))) { is_surface = true; // Should break but is slower? } } } - } + }*/ // Only for surface voxels, work out screen coordinates if (!is_surface) continue; @@ -186,7 +186,9 @@ void ftl::cuda::isosurface_point_image(const ftl::voxhash::HashData& hashData, clearDepthKernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(hashData, depth); - //cudaSafeCall( cudaDeviceSynchronize() ); +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; const dim3 gridSize(NUM_CUDA_BLOCKS, 1); @@ -195,7 +197,10 @@ void ftl::cuda::isosurface_point_image(const ftl::voxhash::HashData& hashData, isosurface_image_kernel<<<gridSize, blockSize, 0, stream>>>(hashData, depth, params); cudaSafeCall( cudaGetLastError() ); - //cudaSafeCall( cudaDeviceSynchronize() ); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif } // ---- Pass 2: Expand the point splats ---------------------------------------- @@ -203,7 +208,7 @@ void ftl::cuda::isosurface_point_image(const ftl::voxhash::HashData& hashData, #define SPLAT_RADIUS 7 #define SPLAT_BOUNDS (2*SPLAT_RADIUS+T_PER_BLOCK+1) #define SPLAT_BUFFER_SIZE (SPLAT_BOUNDS*SPLAT_BOUNDS) -#define MAX_VALID 8 +#define MAX_VALID 100 __device__ float distance2(float3 a, float3 b) { const float x = a.x-b.x; @@ -244,7 +249,7 @@ __global__ void splatting_kernel( __syncthreads(); - if (x >= depth_in.width() && y >= depth_in.height()) return; + if (x >= depth_in.width() || y >= depth_in.height()) return; const float voxelSquared = params.voxelSize*params.voxelSize; float mindepth = 1000.0f; @@ -270,7 +275,7 @@ __global__ void splatting_kernel( if (dist < voxelSquared) { // Valid so check for minimum //validPos[validix] = pos; - validIndices[validix++] = idx; + //validIndices[validix++] = idx; if (d < mindepth) { mindepth = d; minidx = idx; @@ -291,8 +296,8 @@ __global__ void splatting_kernel( float contrib = 0.0f; float3 pos = params.camera.kinectDepthToSkeleton(x, y, mindepth); // TODO:(Nick) Mindepth assumption is poor choice. - for (int j=0; j<validix; ++j) { - const int idx = validIndices[j]; + //for (int j=0; j<validix; ++j) { + const int idx = minidx; //validIndices[j]; float3 posp = positions[idx]; //float3 pos = params.camera.kinectDepthToSkeleton(x, y, posp.z); float3 delta = (posp - pos) / 2*params.voxelSize; @@ -312,7 +317,7 @@ __global__ void splatting_kernel( contrib += c; depth += posp.z * c; } - } + //} // Normalise //colour.x /= contrib; @@ -333,4 +338,8 @@ void ftl::cuda::splat_points(const TextureObject<uint> &depth_in, splatting_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, params); cudaSafeCall( cudaGetLastError() ); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif } -- GitLab