diff --git a/applications/reconstruct/src/dibr.cu b/applications/reconstruct/src/dibr.cu index 97157c56faad91258e918fda855298c497c86401..88d480872a551845ba298809f5750341fd29923c 100644 --- a/applications/reconstruct/src/dibr.cu +++ b/applications/reconstruct/src/dibr.cu @@ -346,7 +346,8 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp } #define NEIGHBOR_RADIUS_2 5 -#define MAX_NEIGHBORS_2 ((NEIGHBOR_RADIUS_2*2+1)*(NEIGHBOR_RADIUS_2*2+1)) +#define NEIGHBOR_WINDOW ((NEIGHBOR_RADIUS_2*2+1)*(NEIGHBOR_RADIUS_2*2+1)) +#define MAX_NEIGHBORS_2 20 /* @@ -360,6 +361,7 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp __shared__ float3 neighborhood_cache[2*T_PER_BLOCK][MAX_NEIGHBORS_2]; __shared__ int minimum[2*T_PER_BLOCK]; __shared__ int maximum[2*T_PER_BLOCK]; + __shared__ int nidx[2*T_PER_BLOCK]; const int warp = threadIdx.x / WARP_SIZE + threadIdx.y*2; const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; @@ -393,17 +395,19 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp if (lane == 0) { minimum[warp] = 100000000; maximum[warp] = -100000000; + nidx[warp] = 0; } __syncwarp(); - for (int i=lane; i<MAX_NEIGHBORS_2; i+=WARP_SIZE) { + for (int i=lane; i<NEIGHBOR_WINDOW; i+=WARP_SIZE) { const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2; const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2; const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f); - neighborhood_cache[warp][i] = point; if (length(point - camPos) <= 0.04f) { + int idx = atomicInc(&nidx[warp], MAX_NEIGHBORS_2-1); + neighborhood_cache[warp][idx] = point; atomicMin(&minimum[warp], point.z*1000.0f); atomicMax(&maximum[warp], point.z*1000.0f); }