diff --git a/applications/reconstruct/src/dibr.cu b/applications/reconstruct/src/dibr.cu index 691ce7c5706ded55def668751300ebde5c44ab6b..08c73219df472ba526792f41ced599e1dd1e181c 100644 --- a/applications/reconstruct/src/dibr.cu +++ b/applications/reconstruct/src/dibr.cu @@ -383,12 +383,24 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp #define FULL_MASK 0xffffffff -__device__ inline float warpMaxEnergy(float energy) { - for (int i = WARP_SIZE/2; i > 0; i /= 2) { - const float other = __shfl_xor_sync(FULL_MASK, energy, i, WARP_SIZE); - energy = (energy > other) ? energy : other; +__device__ inline float warpMax(float energy) { + //for (int i = WARP_SIZE/2; i > 0; i /= 2) { + float e = energy; + for (int i = 1; i < 32; i *= 2) { + const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e = max(e, other); } - return energy; + return e; +} + +__device__ inline float warpMin(float energy) { + //for (int i = WARP_SIZE/2; i > 0; i /= 2) { + float e = energy; + for (int i = 1; i < 32; i *= 2) { + const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e = min(e, other); + } + return e; } @@ -404,34 +416,13 @@ __device__ inline float warpMaxEnergy(float energy) { __shared__ int minimum[2*T_PER_BLOCK]; __shared__ int maximum[2*T_PER_BLOCK]; __shared__ unsigned int nidx[2*T_PER_BLOCK]; - __shared__ float sampleenergy[2*T_PER_BLOCK][WARP_SIZE]; - const int warp = threadIdx.x / WARP_SIZE + threadIdx.y*2; + const int tid = (threadIdx.x + threadIdx.y * blockDim.x); + const int warp = tid / WARP_SIZE; //threadIdx.x / WARP_SIZE + threadIdx.y*2; const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; const int y = blockIdx.y*blockDim.y + threadIdx.y; - //const float3 camPos = params.camera.kinectDepthToSkeleton(x,y, float(point_in.tex2D(x,y)) / 1000.0f); - - //const float r = 1.0f; //(camera.poseInverse * worldPos).z / camera.params.fx; - - // Get virtual camera ray for splat centre and backface cull if possible - //const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f); - //const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin); - //if (dot(rayDir, normal) > 0.0f) return; - - // Find the virtual screen position of current point - //const float3 camPos = params.m_viewMatrix * worldPos; - //if (camPos.z < params.camera.m_sensorDepthWorldMin) return; - //if (camPos.z > params.camera.m_sensorDepthWorldMax) return; - //const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); - - //const int upsample = 16; //min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z)); - - // Not on screen so stop now... - //if (screenPos.x + upsample < 0 || screenPos.y + upsample < 0 || - // screenPos.x - upsample >= depth.width() || screenPos.y - upsample >= depth.height()) return; - - const int lane = threadIdx.x % WARP_SIZE; + const int lane = tid % WARP_SIZE; if (lane == 0) { minimum[warp] = 100000000; maximum[warp] = -100000000; @@ -467,41 +458,31 @@ __device__ inline float warpMaxEnergy(float energy) { if (minDepth >= params.camera.m_sensorDepthWorldMax) return; if (maxDepth <= params.camera.m_sensorDepthWorldMin) return; - if (y == 200) printf("interval: %f\n", interval); + //if (y == 200) printf("interval: %f\n", maxDepth); - //const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); - // Each thread in warp takes an upsample point and updates corresponding depth buffer. - // TODO: Don't do this step, simply update the current pixel to either fill or replace existing value - // use warp threads to do the iteration samples ... 32 samples per pixel. - // could iterate each thread to perform more checks within likely range. - //for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { - // const float u = (i % upsample) - (upsample / 2); - // const float v = (i / upsample) - (upsample / 2); + float maxenergy = 0.0f; + float bestdepth = 0.0f; - // Make an initial estimate of the points location - // Use minimum z as first estimate - float maxenergy = 0.0f; - float bestdepth = -1.0f; - - // Search for best or threshold energy - for (int k=lane; k<MAX_ITERATIONS; k+=WARP_SIZE) { - const float3 nearest = params.camera.kinectDepthToSkeleton(x,y,minDepth+float(k)*interval); - const float myenergy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, nidx[warp], SPATIAL_SMOOTHING); - maxenergy = warpMaxEnergy(max(myenergy, maxenergy)); - bestdepth = (myenergy == maxenergy) ? nearest.z : -1.0f; - } + // Search for best or threshold energy + for (int k=lane; k<MAX_ITERATIONS; k+=WARP_SIZE) { + const float3 nearest = params.camera.kinectDepthToSkeleton(x,y,minDepth+float(k)*interval); + const float myenergy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, nidx[warp], SPATIAL_SMOOTHING); + maxenergy = warpMax(max(myenergy, maxenergy)); + bestdepth = (myenergy == maxenergy) ? nearest.z : 0.0f; + } - // Search for first energy maximum above a threshold - if (bestdepth > 0.0f && maxenergy >= 0.01f) { - const unsigned int cx = x; - const unsigned int cy = y; - if (bestdepth > params.camera.m_sensorDepthWorldMin && bestdepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { - // Transform estimated point to virtual cam space and output z - atomicMin(&depth(cx,cy), bestdepth * 1000.0f); - } + // Search for first energy maximum above a threshold + if (bestdepth > 0.0f && maxenergy >= 0.1f) { + //printf("E D %f %f\n", maxenergy, bestdepth); + const unsigned int cx = x; + const unsigned int cy = y; + if (bestdepth > params.camera.m_sensorDepthWorldMin && bestdepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { + // Transform estimated point to virtual cam space and output z + //atomicMin(&depth(cx,cy), bestdepth * 1000.0f); + depth(cx,cy) = bestdepth * 1000.0f; } - //} + } } // ===== Pass 2 and 3 : Attribute contributions ================================ diff --git a/applications/reconstruct/src/splat_render.cpp b/applications/reconstruct/src/splat_render.cpp index 3cf4e18ebe863908ffab4fb6c03888a1a5ff4d2d..039d4ab99a4de7add09797b1cf0b35d7775994dc 100644 --- a/applications/reconstruct/src/splat_render.cpp +++ b/applications/reconstruct/src/splat_render.cpp @@ -89,6 +89,7 @@ void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) { //ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); if (src->value("splatting", false)) { //ftl::cuda::splat_points(depth1_, colour1_, normal1_, depth2_, colour2_, params, stream); + ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); src->writeFrames(colour2_, depth2_, stream); } else { ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream);