From 579ef0d21d230b3b0468a1ccc7a9165a0fc3feb4 Mon Sep 17 00:00:00 2001 From: Nicolas Pope <nwpope@utu.fi> Date: Sat, 10 Aug 2019 09:49:07 +0300 Subject: [PATCH] WIP Switch to warp interval energy search --- applications/reconstruct/src/dibr.cu | 101 +++++++++++++++------------ 1 file changed, 57 insertions(+), 44 deletions(-) diff --git a/applications/reconstruct/src/dibr.cu b/applications/reconstruct/src/dibr.cu index 1e3b7e888..691ce7c57 100644 --- a/applications/reconstruct/src/dibr.cu +++ b/applications/reconstruct/src/dibr.cu @@ -13,7 +13,7 @@ #define WARP_SIZE 32 #define DEPTH_THRESHOLD 0.05f #define UPSAMPLE_MAX 60 -#define MAX_ITERATIONS 10 +#define MAX_ITERATIONS 32 #define SPATIAL_SMOOTHING 0.005f using ftl::cuda::TextureObject; @@ -377,9 +377,19 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp } } -#define NEIGHBOR_RADIUS_2 2 +#define NEIGHBOR_RADIUS_2 3 #define NEIGHBOR_WINDOW ((NEIGHBOR_RADIUS_2*2+1)*(NEIGHBOR_RADIUS_2*2+1)) -#define MAX_NEIGHBORS_2 10 +#define MAX_NEIGHBORS_2 32 + +#define FULL_MASK 0xffffffff + +__device__ inline float warpMaxEnergy(float energy) { + for (int i = WARP_SIZE/2; i > 0; i /= 2) { + const float other = __shfl_xor_sync(FULL_MASK, energy, i, WARP_SIZE); + energy = (energy > other) ? energy : other; + } + return energy; +} /* @@ -394,14 +404,15 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp __shared__ int minimum[2*T_PER_BLOCK]; __shared__ int maximum[2*T_PER_BLOCK]; __shared__ unsigned int nidx[2*T_PER_BLOCK]; + __shared__ float sampleenergy[2*T_PER_BLOCK][WARP_SIZE]; const int warp = threadIdx.x / WARP_SIZE + threadIdx.y*2; const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; const int y = blockIdx.y*blockDim.y + threadIdx.y; - const float3 camPos = params.camera.kinectDepthToSkeleton(x,y, float(point_in.tex2D(x,y)) / 1000.0f); + //const float3 camPos = params.camera.kinectDepthToSkeleton(x,y, float(point_in.tex2D(x,y)) / 1000.0f); - const float r = 1.0f; //(camera.poseInverse * worldPos).z / camera.params.fx; + //const float r = 1.0f; //(camera.poseInverse * worldPos).z / camera.params.fx; // Get virtual camera ray for splat centre and backface cull if possible //const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f); @@ -410,11 +421,11 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp // Find the virtual screen position of current point //const float3 camPos = params.m_viewMatrix * worldPos; - if (camPos.z < params.camera.m_sensorDepthWorldMin) return; - if (camPos.z > params.camera.m_sensorDepthWorldMax) return; - const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); + //if (camPos.z < params.camera.m_sensorDepthWorldMin) return; + //if (camPos.z > params.camera.m_sensorDepthWorldMax) return; + //const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); - const int upsample = 16; //min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z)); + //const int upsample = 16; //min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z)); // Not on screen so stop now... //if (screenPos.x + upsample < 0 || screenPos.y + upsample < 0 || @@ -436,10 +447,10 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2; const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2; const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f); + const float3 camPos = params.camera.kinectDepthToSkeleton(x, y, point.z); // If it is close enough... - // TODO: We don't have camPos so distance if this pixel takes on same depth - if (length(point - camPos) <= SPATIAL_SMOOTHING) { + if (point.z > params.camera.m_sensorDepthWorldMin && point.z < params.camera.m_sensorDepthWorldMax && length(point - camPos) <= 0.02f) { // Append to neighbour list unsigned int idx = atomicInc(&nidx[warp], MAX_NEIGHBORS_2-1); neighborhood_cache[warp][idx] = point; @@ -450,48 +461,47 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp __syncwarp(); - const float interval = (float(maximum[warp])/1000.0f - float(minimum[warp]) / 1000.0f) / float(MAX_ITERATIONS); - //if (y == 200) printf("interval: %f\n", interval); + const float minDepth = float(minimum[warp])/1000.0f; + const float maxDepth = float(maximum[warp])/1000.0f; + const float interval = (maxDepth - minDepth) / float(MAX_ITERATIONS); + + if (minDepth >= params.camera.m_sensorDepthWorldMax) return; + if (maxDepth <= params.camera.m_sensorDepthWorldMin) return; + if (y == 200) printf("interval: %f\n", interval); + + //const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); // Each thread in warp takes an upsample point and updates corresponding depth buffer. // TODO: Don't do this step, simply update the current pixel to either fill or replace existing value // use warp threads to do the iteration samples ... 32 samples per pixel. // could iterate each thread to perform more checks within likely range. - for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { - const float u = (i % upsample) - (upsample / 2); - const float v = (i / upsample) - (upsample / 2); + //for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { + // const float u = (i % upsample) - (upsample / 2); + // const float v = (i / upsample) - (upsample / 2); // Make an initial estimate of the points location // Use minimum z as first estimate - float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,float(minimum[warp])/1000.0f); - float lastenergy = 0.0f; - float lastdepth = nearest.z; + float maxenergy = 0.0f; + float bestdepth = -1.0f; // Search for best or threshold energy - for (int k=0; k<MAX_ITERATIONS; ++k) { + for (int k=lane; k<MAX_ITERATIONS; k+=WARP_SIZE) { + const float3 nearest = params.camera.kinectDepthToSkeleton(x,y,minDepth+float(k)*interval); + const float myenergy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, nidx[warp], SPATIAL_SMOOTHING); + maxenergy = warpMaxEnergy(max(myenergy, maxenergy)); + bestdepth = (myenergy == maxenergy) ? nearest.z : -1.0f; + } - const float energy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, nidx[warp], SPATIAL_SMOOTHING); - - //if (energy <= 0.0f) break; - - const float d = nearest.z; - nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d+interval); - - // Search for first energy maximum above a threshold - if (lastenergy >= 0.01f && energy < lastenergy) { - const unsigned int cx = screenPos.x+u; - const unsigned int cy = screenPos.y+v; - if (lastdepth > params.camera.m_sensorDepthWorldMin && lastdepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { - // Transform estimated point to virtual cam space and output z - atomicMin(&depth(cx,cy), lastdepth * 1000.0f); - } - break; + // Search for first energy maximum above a threshold + if (bestdepth > 0.0f && maxenergy >= 0.01f) { + const unsigned int cx = x; + const unsigned int cy = y; + if (bestdepth > params.camera.m_sensorDepthWorldMin && bestdepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { + // Transform estimated point to virtual cam space and output z + atomicMin(&depth(cx,cy), bestdepth * 1000.0f); } - - lastenergy = energy; - lastdepth = d; } - } + //} } // ===== Pass 2 and 3 : Attribute contributions ================================ @@ -657,19 +667,22 @@ void ftl::cuda::dibr(const TextureObject<int> &depth_out, #endif int i=3; + + bool noSplatting = params.m_flags & ftl::render::kNoSplatting; + // Pass 1, gather and upsample depth maps if (params.m_flags & ftl::render::kNoUpsampling) { for (int i=0; i<numcams; ++i) - dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>(tmp_depth, i, params); + dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>((noSplatting) ? depth_out : tmp_depth, i, params); } else { for (int i=0; i<numcams; ++i) - dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, i, params); + dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>((noSplatting) ? depth_out : tmp_depth, i, params); } - if (params.m_flags & ftl::render::kNoSplatting) { + if (noSplatting) { // Pass 3, accumulate all point contributions to pixels for (int i=0; i<numcams; ++i) - dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, tmp_colour, normal_out, confidence_out, i, params); + dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, tmp_colour, normal_out, confidence_out, i, params); } else { // Pass 2 dibr_visibility_principal_kernel2<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, depth_out, params); -- GitLab