Skip to content
Snippets Groups Projects
Commit 579ef0d2 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

WIP Switch to warp interval energy search

parent 8d407ee5
No related branches found
No related tags found
1 merge request!88Implements #146 upsampling option
Pipeline #12699 passed
......@@ -13,7 +13,7 @@
#define WARP_SIZE 32
#define DEPTH_THRESHOLD 0.05f
#define UPSAMPLE_MAX 60
#define MAX_ITERATIONS 10
#define MAX_ITERATIONS 32
#define SPATIAL_SMOOTHING 0.005f
using ftl::cuda::TextureObject;
......@@ -377,9 +377,19 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
}
}
#define NEIGHBOR_RADIUS_2 2
#define NEIGHBOR_RADIUS_2 3
#define NEIGHBOR_WINDOW ((NEIGHBOR_RADIUS_2*2+1)*(NEIGHBOR_RADIUS_2*2+1))
#define MAX_NEIGHBORS_2 10
#define MAX_NEIGHBORS_2 32
#define FULL_MASK 0xffffffff
__device__ inline float warpMaxEnergy(float energy) {
for (int i = WARP_SIZE/2; i > 0; i /= 2) {
const float other = __shfl_xor_sync(FULL_MASK, energy, i, WARP_SIZE);
energy = (energy > other) ? energy : other;
}
return energy;
}
/*
......@@ -394,14 +404,15 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
__shared__ int minimum[2*T_PER_BLOCK];
__shared__ int maximum[2*T_PER_BLOCK];
__shared__ unsigned int nidx[2*T_PER_BLOCK];
__shared__ float sampleenergy[2*T_PER_BLOCK][WARP_SIZE];
const int warp = threadIdx.x / WARP_SIZE + threadIdx.y*2;
const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
const int y = blockIdx.y*blockDim.y + threadIdx.y;
const float3 camPos = params.camera.kinectDepthToSkeleton(x,y, float(point_in.tex2D(x,y)) / 1000.0f);
//const float3 camPos = params.camera.kinectDepthToSkeleton(x,y, float(point_in.tex2D(x,y)) / 1000.0f);
const float r = 1.0f; //(camera.poseInverse * worldPos).z / camera.params.fx;
//const float r = 1.0f; //(camera.poseInverse * worldPos).z / camera.params.fx;
// Get virtual camera ray for splat centre and backface cull if possible
//const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f);
......@@ -410,11 +421,11 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
// Find the virtual screen position of current point
//const float3 camPos = params.m_viewMatrix * worldPos;
if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
//if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
//if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
//const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
const int upsample = 16; //min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z));
//const int upsample = 16; //min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z));
// Not on screen so stop now...
//if (screenPos.x + upsample < 0 || screenPos.y + upsample < 0 ||
......@@ -436,10 +447,10 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f);
const float3 camPos = params.camera.kinectDepthToSkeleton(x, y, point.z);
// If it is close enough...
// TODO: We don't have camPos so distance if this pixel takes on same depth
if (length(point - camPos) <= SPATIAL_SMOOTHING) {
if (point.z > params.camera.m_sensorDepthWorldMin && point.z < params.camera.m_sensorDepthWorldMax && length(point - camPos) <= 0.02f) {
// Append to neighbour list
unsigned int idx = atomicInc(&nidx[warp], MAX_NEIGHBORS_2-1);
neighborhood_cache[warp][idx] = point;
......@@ -450,48 +461,47 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
__syncwarp();
const float interval = (float(maximum[warp])/1000.0f - float(minimum[warp]) / 1000.0f) / float(MAX_ITERATIONS);
//if (y == 200) printf("interval: %f\n", interval);
const float minDepth = float(minimum[warp])/1000.0f;
const float maxDepth = float(maximum[warp])/1000.0f;
const float interval = (maxDepth - minDepth) / float(MAX_ITERATIONS);
if (minDepth >= params.camera.m_sensorDepthWorldMax) return;
if (maxDepth <= params.camera.m_sensorDepthWorldMin) return;
if (y == 200) printf("interval: %f\n", interval);
//const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
// Each thread in warp takes an upsample point and updates corresponding depth buffer.
// TODO: Don't do this step, simply update the current pixel to either fill or replace existing value
// use warp threads to do the iteration samples ... 32 samples per pixel.
// could iterate each thread to perform more checks within likely range.
for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
const float u = (i % upsample) - (upsample / 2);
const float v = (i / upsample) - (upsample / 2);
//for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
// const float u = (i % upsample) - (upsample / 2);
// const float v = (i / upsample) - (upsample / 2);
// Make an initial estimate of the points location
// Use minimum z as first estimate
float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,float(minimum[warp])/1000.0f);
float lastenergy = 0.0f;
float lastdepth = nearest.z;
float maxenergy = 0.0f;
float bestdepth = -1.0f;
// Search for best or threshold energy
for (int k=0; k<MAX_ITERATIONS; ++k) {
for (int k=lane; k<MAX_ITERATIONS; k+=WARP_SIZE) {
const float3 nearest = params.camera.kinectDepthToSkeleton(x,y,minDepth+float(k)*interval);
const float myenergy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, nidx[warp], SPATIAL_SMOOTHING);
maxenergy = warpMaxEnergy(max(myenergy, maxenergy));
bestdepth = (myenergy == maxenergy) ? nearest.z : -1.0f;
}
const float energy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, nidx[warp], SPATIAL_SMOOTHING);
//if (energy <= 0.0f) break;
const float d = nearest.z;
nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d+interval);
// Search for first energy maximum above a threshold
if (lastenergy >= 0.01f && energy < lastenergy) {
const unsigned int cx = screenPos.x+u;
const unsigned int cy = screenPos.y+v;
if (lastdepth > params.camera.m_sensorDepthWorldMin && lastdepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
// Transform estimated point to virtual cam space and output z
atomicMin(&depth(cx,cy), lastdepth * 1000.0f);
}
break;
// Search for first energy maximum above a threshold
if (bestdepth > 0.0f && maxenergy >= 0.01f) {
const unsigned int cx = x;
const unsigned int cy = y;
if (bestdepth > params.camera.m_sensorDepthWorldMin && bestdepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
// Transform estimated point to virtual cam space and output z
atomicMin(&depth(cx,cy), bestdepth * 1000.0f);
}
lastenergy = energy;
lastdepth = d;
}
}
//}
}
// ===== Pass 2 and 3 : Attribute contributions ================================
......@@ -657,19 +667,22 @@ void ftl::cuda::dibr(const TextureObject<int> &depth_out,
#endif
int i=3;
bool noSplatting = params.m_flags & ftl::render::kNoSplatting;
// Pass 1, gather and upsample depth maps
if (params.m_flags & ftl::render::kNoUpsampling) {
for (int i=0; i<numcams; ++i)
dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>(tmp_depth, i, params);
dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>((noSplatting) ? depth_out : tmp_depth, i, params);
} else {
for (int i=0; i<numcams; ++i)
dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, i, params);
dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>((noSplatting) ? depth_out : tmp_depth, i, params);
}
if (params.m_flags & ftl::render::kNoSplatting) {
if (noSplatting) {
// Pass 3, accumulate all point contributions to pixels
for (int i=0; i<numcams; ++i)
dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, tmp_colour, normal_out, confidence_out, i, params);
dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, tmp_colour, normal_out, confidence_out, i, params);
} else {
// Pass 2
dibr_visibility_principal_kernel2<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, depth_out, params);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment