Skip to content
Snippets Groups Projects
Commit 579ef0d2 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

WIP Switch to warp interval energy search

parent 8d407ee5
No related branches found
No related tags found
1 merge request!88Implements #146 upsampling option
Pipeline #12699 passed
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
#define WARP_SIZE 32 #define WARP_SIZE 32
#define DEPTH_THRESHOLD 0.05f #define DEPTH_THRESHOLD 0.05f
#define UPSAMPLE_MAX 60 #define UPSAMPLE_MAX 60
#define MAX_ITERATIONS 10 #define MAX_ITERATIONS 32
#define SPATIAL_SMOOTHING 0.005f #define SPATIAL_SMOOTHING 0.005f
using ftl::cuda::TextureObject; using ftl::cuda::TextureObject;
...@@ -377,9 +377,19 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp ...@@ -377,9 +377,19 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
} }
} }
#define NEIGHBOR_RADIUS_2 2 #define NEIGHBOR_RADIUS_2 3
#define NEIGHBOR_WINDOW ((NEIGHBOR_RADIUS_2*2+1)*(NEIGHBOR_RADIUS_2*2+1)) #define NEIGHBOR_WINDOW ((NEIGHBOR_RADIUS_2*2+1)*(NEIGHBOR_RADIUS_2*2+1))
#define MAX_NEIGHBORS_2 10 #define MAX_NEIGHBORS_2 32
#define FULL_MASK 0xffffffff
__device__ inline float warpMaxEnergy(float energy) {
for (int i = WARP_SIZE/2; i > 0; i /= 2) {
const float other = __shfl_xor_sync(FULL_MASK, energy, i, WARP_SIZE);
energy = (energy > other) ? energy : other;
}
return energy;
}
/* /*
...@@ -394,14 +404,15 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp ...@@ -394,14 +404,15 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
__shared__ int minimum[2*T_PER_BLOCK]; __shared__ int minimum[2*T_PER_BLOCK];
__shared__ int maximum[2*T_PER_BLOCK]; __shared__ int maximum[2*T_PER_BLOCK];
__shared__ unsigned int nidx[2*T_PER_BLOCK]; __shared__ unsigned int nidx[2*T_PER_BLOCK];
__shared__ float sampleenergy[2*T_PER_BLOCK][WARP_SIZE];
const int warp = threadIdx.x / WARP_SIZE + threadIdx.y*2; const int warp = threadIdx.x / WARP_SIZE + threadIdx.y*2;
const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
const int y = blockIdx.y*blockDim.y + threadIdx.y; const int y = blockIdx.y*blockDim.y + threadIdx.y;
const float3 camPos = params.camera.kinectDepthToSkeleton(x,y, float(point_in.tex2D(x,y)) / 1000.0f); //const float3 camPos = params.camera.kinectDepthToSkeleton(x,y, float(point_in.tex2D(x,y)) / 1000.0f);
const float r = 1.0f; //(camera.poseInverse * worldPos).z / camera.params.fx; //const float r = 1.0f; //(camera.poseInverse * worldPos).z / camera.params.fx;
// Get virtual camera ray for splat centre and backface cull if possible // Get virtual camera ray for splat centre and backface cull if possible
//const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f); //const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f);
...@@ -410,11 +421,11 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp ...@@ -410,11 +421,11 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
// Find the virtual screen position of current point // Find the virtual screen position of current point
//const float3 camPos = params.m_viewMatrix * worldPos; //const float3 camPos = params.m_viewMatrix * worldPos;
if (camPos.z < params.camera.m_sensorDepthWorldMin) return; //if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
if (camPos.z > params.camera.m_sensorDepthWorldMax) return; //if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); //const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
const int upsample = 16; //min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z)); //const int upsample = 16; //min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z));
// Not on screen so stop now... // Not on screen so stop now...
//if (screenPos.x + upsample < 0 || screenPos.y + upsample < 0 || //if (screenPos.x + upsample < 0 || screenPos.y + upsample < 0 ||
...@@ -436,10 +447,10 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp ...@@ -436,10 +447,10 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2; const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2; const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f); const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f);
const float3 camPos = params.camera.kinectDepthToSkeleton(x, y, point.z);
// If it is close enough... // If it is close enough...
// TODO: We don't have camPos so distance if this pixel takes on same depth if (point.z > params.camera.m_sensorDepthWorldMin && point.z < params.camera.m_sensorDepthWorldMax && length(point - camPos) <= 0.02f) {
if (length(point - camPos) <= SPATIAL_SMOOTHING) {
// Append to neighbour list // Append to neighbour list
unsigned int idx = atomicInc(&nidx[warp], MAX_NEIGHBORS_2-1); unsigned int idx = atomicInc(&nidx[warp], MAX_NEIGHBORS_2-1);
neighborhood_cache[warp][idx] = point; neighborhood_cache[warp][idx] = point;
...@@ -450,48 +461,47 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp ...@@ -450,48 +461,47 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
__syncwarp(); __syncwarp();
const float interval = (float(maximum[warp])/1000.0f - float(minimum[warp]) / 1000.0f) / float(MAX_ITERATIONS); const float minDepth = float(minimum[warp])/1000.0f;
//if (y == 200) printf("interval: %f\n", interval); const float maxDepth = float(maximum[warp])/1000.0f;
const float interval = (maxDepth - minDepth) / float(MAX_ITERATIONS);
if (minDepth >= params.camera.m_sensorDepthWorldMax) return;
if (maxDepth <= params.camera.m_sensorDepthWorldMin) return;
if (y == 200) printf("interval: %f\n", interval);
//const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
// Each thread in warp takes an upsample point and updates corresponding depth buffer. // Each thread in warp takes an upsample point and updates corresponding depth buffer.
// TODO: Don't do this step, simply update the current pixel to either fill or replace existing value // TODO: Don't do this step, simply update the current pixel to either fill or replace existing value
// use warp threads to do the iteration samples ... 32 samples per pixel. // use warp threads to do the iteration samples ... 32 samples per pixel.
// could iterate each thread to perform more checks within likely range. // could iterate each thread to perform more checks within likely range.
for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { //for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
const float u = (i % upsample) - (upsample / 2); // const float u = (i % upsample) - (upsample / 2);
const float v = (i / upsample) - (upsample / 2); // const float v = (i / upsample) - (upsample / 2);
// Make an initial estimate of the points location // Make an initial estimate of the points location
// Use minimum z as first estimate // Use minimum z as first estimate
float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,float(minimum[warp])/1000.0f); float maxenergy = 0.0f;
float lastenergy = 0.0f; float bestdepth = -1.0f;
float lastdepth = nearest.z;
// Search for best or threshold energy // Search for best or threshold energy
for (int k=0; k<MAX_ITERATIONS; ++k) { for (int k=lane; k<MAX_ITERATIONS; k+=WARP_SIZE) {
const float3 nearest = params.camera.kinectDepthToSkeleton(x,y,minDepth+float(k)*interval);
const float myenergy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, nidx[warp], SPATIAL_SMOOTHING);
maxenergy = warpMaxEnergy(max(myenergy, maxenergy));
bestdepth = (myenergy == maxenergy) ? nearest.z : -1.0f;
}
const float energy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, nidx[warp], SPATIAL_SMOOTHING); // Search for first energy maximum above a threshold
if (bestdepth > 0.0f && maxenergy >= 0.01f) {
//if (energy <= 0.0f) break; const unsigned int cx = x;
const unsigned int cy = y;
const float d = nearest.z; if (bestdepth > params.camera.m_sensorDepthWorldMin && bestdepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d+interval); // Transform estimated point to virtual cam space and output z
atomicMin(&depth(cx,cy), bestdepth * 1000.0f);
// Search for first energy maximum above a threshold
if (lastenergy >= 0.01f && energy < lastenergy) {
const unsigned int cx = screenPos.x+u;
const unsigned int cy = screenPos.y+v;
if (lastdepth > params.camera.m_sensorDepthWorldMin && lastdepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
// Transform estimated point to virtual cam space and output z
atomicMin(&depth(cx,cy), lastdepth * 1000.0f);
}
break;
} }
lastenergy = energy;
lastdepth = d;
} }
} //}
} }
// ===== Pass 2 and 3 : Attribute contributions ================================ // ===== Pass 2 and 3 : Attribute contributions ================================
...@@ -657,19 +667,22 @@ void ftl::cuda::dibr(const TextureObject<int> &depth_out, ...@@ -657,19 +667,22 @@ void ftl::cuda::dibr(const TextureObject<int> &depth_out,
#endif #endif
int i=3; int i=3;
bool noSplatting = params.m_flags & ftl::render::kNoSplatting;
// Pass 1, gather and upsample depth maps // Pass 1, gather and upsample depth maps
if (params.m_flags & ftl::render::kNoUpsampling) { if (params.m_flags & ftl::render::kNoUpsampling) {
for (int i=0; i<numcams; ++i) for (int i=0; i<numcams; ++i)
dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>(tmp_depth, i, params); dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>((noSplatting) ? depth_out : tmp_depth, i, params);
} else { } else {
for (int i=0; i<numcams; ++i) for (int i=0; i<numcams; ++i)
dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, i, params); dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>((noSplatting) ? depth_out : tmp_depth, i, params);
} }
if (params.m_flags & ftl::render::kNoSplatting) { if (noSplatting) {
// Pass 3, accumulate all point contributions to pixels // Pass 3, accumulate all point contributions to pixels
for (int i=0; i<numcams; ++i) for (int i=0; i<numcams; ++i)
dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, tmp_colour, normal_out, confidence_out, i, params); dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, tmp_colour, normal_out, confidence_out, i, params);
} else { } else {
// Pass 2 // Pass 2
dibr_visibility_principal_kernel2<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, depth_out, params); dibr_visibility_principal_kernel2<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, depth_out, params);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment