Skip to content
Snippets Groups Projects
Commit 65cdf26d authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Correct depth map conversion

parent 579ef0d2
No related branches found
No related tags found
1 merge request!88Implements #146 upsampling option
......@@ -383,12 +383,24 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
#define FULL_MASK 0xffffffff
__device__ inline float warpMaxEnergy(float energy) {
for (int i = WARP_SIZE/2; i > 0; i /= 2) {
const float other = __shfl_xor_sync(FULL_MASK, energy, i, WARP_SIZE);
energy = (energy > other) ? energy : other;
__device__ inline float warpMax(float energy) {
//for (int i = WARP_SIZE/2; i > 0; i /= 2) {
float e = energy;
for (int i = 1; i < 32; i *= 2) {
const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
e = max(e, other);
}
return energy;
return e;
}
__device__ inline float warpMin(float energy) {
//for (int i = WARP_SIZE/2; i > 0; i /= 2) {
float e = energy;
for (int i = 1; i < 32; i *= 2) {
const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
e = min(e, other);
}
return e;
}
......@@ -404,34 +416,13 @@ __device__ inline float warpMaxEnergy(float energy) {
__shared__ int minimum[2*T_PER_BLOCK];
__shared__ int maximum[2*T_PER_BLOCK];
__shared__ unsigned int nidx[2*T_PER_BLOCK];
__shared__ float sampleenergy[2*T_PER_BLOCK][WARP_SIZE];
const int warp = threadIdx.x / WARP_SIZE + threadIdx.y*2;
const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
const int warp = tid / WARP_SIZE; //threadIdx.x / WARP_SIZE + threadIdx.y*2;
const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
const int y = blockIdx.y*blockDim.y + threadIdx.y;
//const float3 camPos = params.camera.kinectDepthToSkeleton(x,y, float(point_in.tex2D(x,y)) / 1000.0f);
//const float r = 1.0f; //(camera.poseInverse * worldPos).z / camera.params.fx;
// Get virtual camera ray for splat centre and backface cull if possible
//const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f);
//const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin);
//if (dot(rayDir, normal) > 0.0f) return;
// Find the virtual screen position of current point
//const float3 camPos = params.m_viewMatrix * worldPos;
//if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
//if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
//const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
//const int upsample = 16; //min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z));
// Not on screen so stop now...
//if (screenPos.x + upsample < 0 || screenPos.y + upsample < 0 ||
// screenPos.x - upsample >= depth.width() || screenPos.y - upsample >= depth.height()) return;
const int lane = threadIdx.x % WARP_SIZE;
const int lane = tid % WARP_SIZE;
if (lane == 0) {
minimum[warp] = 100000000;
maximum[warp] = -100000000;
......@@ -467,41 +458,31 @@ __device__ inline float warpMaxEnergy(float energy) {
if (minDepth >= params.camera.m_sensorDepthWorldMax) return;
if (maxDepth <= params.camera.m_sensorDepthWorldMin) return;
if (y == 200) printf("interval: %f\n", interval);
//if (y == 200) printf("interval: %f\n", maxDepth);
//const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
// Each thread in warp takes an upsample point and updates corresponding depth buffer.
// TODO: Don't do this step, simply update the current pixel to either fill or replace existing value
// use warp threads to do the iteration samples ... 32 samples per pixel.
// could iterate each thread to perform more checks within likely range.
//for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
// const float u = (i % upsample) - (upsample / 2);
// const float v = (i / upsample) - (upsample / 2);
float maxenergy = 0.0f;
float bestdepth = 0.0f;
// Make an initial estimate of the points location
// Use minimum z as first estimate
float maxenergy = 0.0f;
float bestdepth = -1.0f;
// Search for best or threshold energy
for (int k=lane; k<MAX_ITERATIONS; k+=WARP_SIZE) {
const float3 nearest = params.camera.kinectDepthToSkeleton(x,y,minDepth+float(k)*interval);
const float myenergy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, nidx[warp], SPATIAL_SMOOTHING);
maxenergy = warpMaxEnergy(max(myenergy, maxenergy));
bestdepth = (myenergy == maxenergy) ? nearest.z : -1.0f;
}
// Search for best or threshold energy
for (int k=lane; k<MAX_ITERATIONS; k+=WARP_SIZE) {
const float3 nearest = params.camera.kinectDepthToSkeleton(x,y,minDepth+float(k)*interval);
const float myenergy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, nidx[warp], SPATIAL_SMOOTHING);
maxenergy = warpMax(max(myenergy, maxenergy));
bestdepth = (myenergy == maxenergy) ? nearest.z : 0.0f;
}
// Search for first energy maximum above a threshold
if (bestdepth > 0.0f && maxenergy >= 0.01f) {
const unsigned int cx = x;
const unsigned int cy = y;
if (bestdepth > params.camera.m_sensorDepthWorldMin && bestdepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
// Transform estimated point to virtual cam space and output z
atomicMin(&depth(cx,cy), bestdepth * 1000.0f);
}
// Search for first energy maximum above a threshold
if (bestdepth > 0.0f && maxenergy >= 0.1f) {
//printf("E D %f %f\n", maxenergy, bestdepth);
const unsigned int cx = x;
const unsigned int cy = y;
if (bestdepth > params.camera.m_sensorDepthWorldMin && bestdepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
// Transform estimated point to virtual cam space and output z
//atomicMin(&depth(cx,cy), bestdepth * 1000.0f);
depth(cx,cy) = bestdepth * 1000.0f;
}
//}
}
}
// ===== Pass 2 and 3 : Attribute contributions ================================
......
......@@ -89,6 +89,7 @@ void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) {
//ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream);
if (src->value("splatting", false)) {
//ftl::cuda::splat_points(depth1_, colour1_, normal1_, depth2_, colour2_, params, stream);
ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream);
src->writeFrames(colour2_, depth2_, stream);
} else {
ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment