Skip to content
Snippets Groups Projects

Implements #146 upsampling option

Merged Nicolas Pope requested to merge feature/146/upsample into master
3 files
+ 141
6
Compare changes
  • Side-by-side
  • Inline
Files
3
@@ -67,7 +67,7 @@ __device__ inline bool isStable(const float3 &previous, const float3 &estimate,
if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
const int upsample = 5; //min(UPSAMPLE_MAX, int((2.0f*r) * params.camera.fx / camPos.z));
const int upsample = min(UPSAMPLE_MAX-2, int((r) * params.camera.fx / camPos.z))+2;
const float interval = 1.0f / float(upsample / 2);
@@ -345,6 +345,136 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
}
}
#define NEIGHBOR_RADIUS_2 5
#define MAX_NEIGHBORS_2 ((NEIGHBOR_RADIUS_2*2+1)*(NEIGHBOR_RADIUS_2*2+1))
/*
* Pass 2: Determine depth buffer with enough accuracy for a visibility test in pass 2.
* These values are also used as the actual surface estimate during rendering so should
* at least be plane or sphere fitted if not MLS smoothed onto the actual surface.
*
* This version uses a previous point render as neighbour source.
*/
__global__ void dibr_visibility_principal_kernel2(TextureObject<int> point_in, TextureObject<int> depth, SplatParams params) {
__shared__ float3 neighborhood_cache[2*T_PER_BLOCK][MAX_NEIGHBORS_2];
__shared__ int minimum[2*T_PER_BLOCK];
__shared__ int maximum[2*T_PER_BLOCK];
const int warp = threadIdx.x / WARP_SIZE + threadIdx.y*2;
const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
const int y = blockIdx.y*blockDim.y + threadIdx.y;
const float3 camPos = params.camera.kinectDepthToSkeleton(x,y, float(point_in.tex2D(x,y)) / 1000.0f);
const float r = 1.0f; //(camera.poseInverse * worldPos).z / camera.params.fx;
// Get virtual camera ray for splat centre and backface cull if possible
//const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f);
//const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin);
//if (dot(rayDir, normal) > 0.0f) return;
// Find the virtual screen position of current point
//const float3 camPos = params.m_viewMatrix * worldPos;
if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
const int upsample = 10; //min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z));
// Not on screen so stop now...
//if (screenPos.x + upsample < 0 || screenPos.y + upsample < 0 ||
// screenPos.x - upsample >= depth.width() || screenPos.y - upsample >= depth.height()) return;
// TODO:(Nick) Check depth buffer and don't do anything if already hidden?
// TODO:(Nick) Preload point neighbors and transform to eye
const int lane = threadIdx.x % WARP_SIZE;
if (lane == 0) {
minimum[warp] = 100000000;
maximum[warp] = -100000000;
}
__syncwarp();
for (int i=lane; i<MAX_NEIGHBORS_2; i+=WARP_SIZE) {
const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f);
neighborhood_cache[warp][i] = point;
if (length(point - camPos) <= 0.04f) {
atomicMin(&minimum[warp], point.z*1000.0f);
atomicMax(&maximum[warp], point.z*1000.0f);
}
}
__syncwarp();
const float interval = (float(maximum[warp])/1000.0f - float(minimum[warp]) / 1000.0f) / float(MAX_ITERATIONS);
//if (y == 200) printf("interval: %f\n", interval);
// TODO:(Nick) Find min and max depths of neighbors to estimate z bounds
// Each thread in warp takes an upsample point and updates corresponding depth buffer.
for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
const float u = (i % upsample) - (upsample / 2);
const float v = (i / upsample) - (upsample / 2);
// Make an initial estimate of the points location
// Use centroid depth as estimate...?
//float3 nearest = ftl::cuda::screen_centroid<1>(camera.points, make_float2(screenPos.x+u, screenPos.y+v), make_int2(x,y), params, upsample);
// Use current points z as estimate
// TODO: Use min point as estimate
float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,float(minimum[warp])/1000.0f);
// Or calculate upper and lower bounds for depth and do gradient
// descent until the gradient change is too small or max iter is reached
// and depth remains within the bounds.
// How to find min and max depths?
// TODO: (Nick) Estimate depth using points plane, but needs better normals.
//float t;
//if (ftl::cuda::intersectPlane(normal, worldPos, rayOrigin, rayDir, t)) {
// Plane based estimate of surface at this pixel
//const float3 nearest = rayOrigin + rayDir * camPos.z;
// Use MLS of camera neighbor points to get more exact estimate
// Iterate until pixel is stable on the surface.
for (int k=0; k<MAX_ITERATIONS; ++k) {
// TODO:(Nick) Should perhaps use points from all cameras?
// Instead of doing each camera separately...
// If the depth already is close then it has already been done and can skip this point
const float energy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, SPATIAL_SMOOTHING);
if (energy <= 0.0f) break;
//ftl::cuda::render_depth(depth, params, output);
// This is essentially the SDF function f(x), only the normal should be estimated also from the weights
//const float d = nearest.z + (normal.x*output.x + normal.y*output.y + normal.z*output.z);
const float d = nearest.z;
nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d+interval);
if (energy >= 0.1f) {
const unsigned int cx = screenPos.x+u;
const unsigned int cy = screenPos.y+v;
if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
// Transform estimated point to virtual cam space and output z
atomicMin(&depth(cx,cy), d * 1000.0f);
}
break;
}
}
//}
}
}
// ===== Pass 2 and 3 : Attribute contributions ================================
__device__ inline float4 make_float4(const uchar4 &c) {
return make_float4(c.x,c.y,c.z,c.w);
}
@@ -485,7 +615,8 @@ void ftl::cuda::dibr(const TextureObject<int> &depth_out,
const TextureObject<uchar4> &colour_out,
const TextureObject<float4> &normal_out,
const TextureObject<float> &confidence_out,
const TextureObject<float4> &tmp_colour,
const TextureObject<float4> &tmp_colour,
const TextureObject<int> &tmp_depth,
int numcams,
const SplatParams &params,
cudaStream_t stream) {
@@ -507,7 +638,10 @@ void ftl::cuda::dibr(const TextureObject<int> &depth_out,
int i=3;
// Pass 1, gather and upsample depth maps
for (int i=0; i<numcams; ++i)
dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, i, params);
dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, i, params);
// Pass 2
dibr_visibility_principal_kernel2<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, depth_out, params);
// Pass 2, merge a depth map from each camera.
//for (int i=0; i<numcams; ++i)
Loading