Skip to content
Snippets Groups Projects

Implements #146 upsampling option

Merged Nicolas Pope requested to merge feature/146/upsample into master
1 file
+ 57
44
Compare changes
  • Side-by-side
  • Inline
@@ -13,7 +13,7 @@
#define WARP_SIZE 32
#define DEPTH_THRESHOLD 0.05f
#define UPSAMPLE_MAX 60
#define MAX_ITERATIONS 10
#define MAX_ITERATIONS 32
#define SPATIAL_SMOOTHING 0.005f
using ftl::cuda::TextureObject;
@@ -377,9 +377,19 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
}
}
#define NEIGHBOR_RADIUS_2 2
#define NEIGHBOR_RADIUS_2 3
#define NEIGHBOR_WINDOW ((NEIGHBOR_RADIUS_2*2+1)*(NEIGHBOR_RADIUS_2*2+1))
#define MAX_NEIGHBORS_2 10
#define MAX_NEIGHBORS_2 32
#define FULL_MASK 0xffffffff
__device__ inline float warpMaxEnergy(float energy) {
for (int i = WARP_SIZE/2; i > 0; i /= 2) {
const float other = __shfl_xor_sync(FULL_MASK, energy, i, WARP_SIZE);
energy = (energy > other) ? energy : other;
}
return energy;
}
/*
@@ -394,14 +404,15 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
__shared__ int minimum[2*T_PER_BLOCK];
__shared__ int maximum[2*T_PER_BLOCK];
__shared__ unsigned int nidx[2*T_PER_BLOCK];
__shared__ float sampleenergy[2*T_PER_BLOCK][WARP_SIZE];
const int warp = threadIdx.x / WARP_SIZE + threadIdx.y*2;
const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
const int y = blockIdx.y*blockDim.y + threadIdx.y;
const float3 camPos = params.camera.kinectDepthToSkeleton(x,y, float(point_in.tex2D(x,y)) / 1000.0f);
//const float3 camPos = params.camera.kinectDepthToSkeleton(x,y, float(point_in.tex2D(x,y)) / 1000.0f);
const float r = 1.0f; //(camera.poseInverse * worldPos).z / camera.params.fx;
//const float r = 1.0f; //(camera.poseInverse * worldPos).z / camera.params.fx;
// Get virtual camera ray for splat centre and backface cull if possible
//const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f);
@@ -410,11 +421,11 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
// Find the virtual screen position of current point
//const float3 camPos = params.m_viewMatrix * worldPos;
if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
//if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
//if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
//const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
const int upsample = 16; //min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z));
//const int upsample = 16; //min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z));
// Not on screen so stop now...
//if (screenPos.x + upsample < 0 || screenPos.y + upsample < 0 ||
@@ -436,10 +447,10 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f);
const float3 camPos = params.camera.kinectDepthToSkeleton(x, y, point.z);
// If it is close enough...
// TODO: We don't have camPos so distance if this pixel takes on same depth
if (length(point - camPos) <= SPATIAL_SMOOTHING) {
if (point.z > params.camera.m_sensorDepthWorldMin && point.z < params.camera.m_sensorDepthWorldMax && length(point - camPos) <= 0.02f) {
// Append to neighbour list
unsigned int idx = atomicInc(&nidx[warp], MAX_NEIGHBORS_2-1);
neighborhood_cache[warp][idx] = point;
@@ -450,48 +461,47 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
__syncwarp();
const float interval = (float(maximum[warp])/1000.0f - float(minimum[warp]) / 1000.0f) / float(MAX_ITERATIONS);
//if (y == 200) printf("interval: %f\n", interval);
const float minDepth = float(minimum[warp])/1000.0f;
const float maxDepth = float(maximum[warp])/1000.0f;
const float interval = (maxDepth - minDepth) / float(MAX_ITERATIONS);
if (minDepth >= params.camera.m_sensorDepthWorldMax) return;
if (maxDepth <= params.camera.m_sensorDepthWorldMin) return;
if (y == 200) printf("interval: %f\n", interval);
//const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
// Each thread in warp takes an upsample point and updates corresponding depth buffer.
// TODO: Don't do this step, simply update the current pixel to either fill or replace existing value
// use warp threads to do the iteration samples ... 32 samples per pixel.
// could iterate each thread to perform more checks within likely range.
for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
const float u = (i % upsample) - (upsample / 2);
const float v = (i / upsample) - (upsample / 2);
//for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
// const float u = (i % upsample) - (upsample / 2);
// const float v = (i / upsample) - (upsample / 2);
// Make an initial estimate of the points location
// Use minimum z as first estimate
float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,float(minimum[warp])/1000.0f);
float lastenergy = 0.0f;
float lastdepth = nearest.z;
float maxenergy = 0.0f;
float bestdepth = -1.0f;
// Search for best or threshold energy
for (int k=0; k<MAX_ITERATIONS; ++k) {
for (int k=lane; k<MAX_ITERATIONS; k+=WARP_SIZE) {
const float3 nearest = params.camera.kinectDepthToSkeleton(x,y,minDepth+float(k)*interval);
const float myenergy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, nidx[warp], SPATIAL_SMOOTHING);
maxenergy = warpMaxEnergy(max(myenergy, maxenergy));
bestdepth = (myenergy == maxenergy) ? nearest.z : -1.0f;
}
const float energy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, nidx[warp], SPATIAL_SMOOTHING);
//if (energy <= 0.0f) break;
const float d = nearest.z;
nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d+interval);
// Search for first energy maximum above a threshold
if (lastenergy >= 0.01f && energy < lastenergy) {
const unsigned int cx = screenPos.x+u;
const unsigned int cy = screenPos.y+v;
if (lastdepth > params.camera.m_sensorDepthWorldMin && lastdepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
// Transform estimated point to virtual cam space and output z
atomicMin(&depth(cx,cy), lastdepth * 1000.0f);
}
break;
// Search for first energy maximum above a threshold
if (bestdepth > 0.0f && maxenergy >= 0.01f) {
const unsigned int cx = x;
const unsigned int cy = y;
if (bestdepth > params.camera.m_sensorDepthWorldMin && bestdepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
// Transform estimated point to virtual cam space and output z
atomicMin(&depth(cx,cy), bestdepth * 1000.0f);
}
lastenergy = energy;
lastdepth = d;
}
}
//}
}
// ===== Pass 2 and 3 : Attribute contributions ================================
@@ -657,19 +667,22 @@ void ftl::cuda::dibr(const TextureObject<int> &depth_out,
#endif
int i=3;
bool noSplatting = params.m_flags & ftl::render::kNoSplatting;
// Pass 1, gather and upsample depth maps
if (params.m_flags & ftl::render::kNoUpsampling) {
for (int i=0; i<numcams; ++i)
dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>(tmp_depth, i, params);
dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>((noSplatting) ? depth_out : tmp_depth, i, params);
} else {
for (int i=0; i<numcams; ++i)
dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, i, params);
dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>((noSplatting) ? depth_out : tmp_depth, i, params);
}
if (params.m_flags & ftl::render::kNoSplatting) {
if (noSplatting) {
// Pass 3, accumulate all point contributions to pixels
for (int i=0; i<numcams; ++i)
dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, tmp_colour, normal_out, confidence_out, i, params);
dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, tmp_colour, normal_out, confidence_out, i, params);
} else {
// Pass 2
dibr_visibility_principal_kernel2<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, depth_out, params);
Loading