From f841d6f5eea263950cfb0a064d9e750e5f9bfe6e Mon Sep 17 00:00:00 2001 From: Nicolas Pope <nwpope@utu.fi> Date: Fri, 9 Aug 2019 13:45:36 +0300 Subject: [PATCH] WIP Splat the merged points --- applications/reconstruct/src/dibr.cu | 140 +++++++++++++++++- applications/reconstruct/src/splat_render.cpp | 4 +- .../reconstruct/src/splat_render_cuda.hpp | 3 +- 3 files changed, 141 insertions(+), 6 deletions(-) diff --git a/applications/reconstruct/src/dibr.cu b/applications/reconstruct/src/dibr.cu index 7cc0f6de5..97157c56f 100644 --- a/applications/reconstruct/src/dibr.cu +++ b/applications/reconstruct/src/dibr.cu @@ -67,7 +67,7 @@ __device__ inline bool isStable(const float3 &previous, const float3 &estimate, if (camPos.z < params.camera.m_sensorDepthWorldMin) return; if (camPos.z > params.camera.m_sensorDepthWorldMax) return; - const int upsample = 5; //min(UPSAMPLE_MAX, int((2.0f*r) * params.camera.fx / camPos.z)); + const int upsample = min(UPSAMPLE_MAX-2, int((r) * params.camera.fx / camPos.z))+2; const float interval = 1.0f / float(upsample / 2); @@ -345,6 +345,136 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp } } +#define NEIGHBOR_RADIUS_2 5 +#define MAX_NEIGHBORS_2 ((NEIGHBOR_RADIUS_2*2+1)*(NEIGHBOR_RADIUS_2*2+1)) + + +/* + * Pass 2: Determine depth buffer with enough accuracy for a visibility test in pass 2. + * These values are also used as the actual surface estimate during rendering so should + * at least be plane or sphere fitted if not MLS smoothed onto the actual surface. + * + * This version uses a previous point render as neighbour source. + */ + __global__ void dibr_visibility_principal_kernel2(TextureObject<int> point_in, TextureObject<int> depth, SplatParams params) { + __shared__ float3 neighborhood_cache[2*T_PER_BLOCK][MAX_NEIGHBORS_2]; + __shared__ int minimum[2*T_PER_BLOCK]; + __shared__ int maximum[2*T_PER_BLOCK]; + + const int warp = threadIdx.x / WARP_SIZE + threadIdx.y*2; + const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float3 camPos = params.camera.kinectDepthToSkeleton(x,y, float(point_in.tex2D(x,y)) / 1000.0f); + + const float r = 1.0f; //(camera.poseInverse * worldPos).z / camera.params.fx; + + // Get virtual camera ray for splat centre and backface cull if possible + //const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f); + //const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin); + //if (dot(rayDir, normal) > 0.0f) return; + + // Find the virtual screen position of current point + //const float3 camPos = params.m_viewMatrix * worldPos; + if (camPos.z < params.camera.m_sensorDepthWorldMin) return; + if (camPos.z > params.camera.m_sensorDepthWorldMax) return; + const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); + + const int upsample = 10; //min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z)); + + // Not on screen so stop now... + //if (screenPos.x + upsample < 0 || screenPos.y + upsample < 0 || + // screenPos.x - upsample >= depth.width() || screenPos.y - upsample >= depth.height()) return; + + // TODO:(Nick) Check depth buffer and don't do anything if already hidden? + + // TODO:(Nick) Preload point neighbors and transform to eye + const int lane = threadIdx.x % WARP_SIZE; + if (lane == 0) { + minimum[warp] = 100000000; + maximum[warp] = -100000000; + } + + __syncwarp(); + + for (int i=lane; i<MAX_NEIGHBORS_2; i+=WARP_SIZE) { + const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2; + const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2; + const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f); + neighborhood_cache[warp][i] = point; + + if (length(point - camPos) <= 0.04f) { + atomicMin(&minimum[warp], point.z*1000.0f); + atomicMax(&maximum[warp], point.z*1000.0f); + } + } + + __syncwarp(); + + const float interval = (float(maximum[warp])/1000.0f - float(minimum[warp]) / 1000.0f) / float(MAX_ITERATIONS); + //if (y == 200) printf("interval: %f\n", interval); + + // TODO:(Nick) Find min and max depths of neighbors to estimate z bounds + + // Each thread in warp takes an upsample point and updates corresponding depth buffer. + for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { + const float u = (i % upsample) - (upsample / 2); + const float v = (i / upsample) - (upsample / 2); + + // Make an initial estimate of the points location + // Use centroid depth as estimate...? + //float3 nearest = ftl::cuda::screen_centroid<1>(camera.points, make_float2(screenPos.x+u, screenPos.y+v), make_int2(x,y), params, upsample); + + // Use current points z as estimate + // TODO: Use min point as estimate + float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,float(minimum[warp])/1000.0f); + + // Or calculate upper and lower bounds for depth and do gradient + // descent until the gradient change is too small or max iter is reached + // and depth remains within the bounds. + // How to find min and max depths? + + // TODO: (Nick) Estimate depth using points plane, but needs better normals. + //float t; + //if (ftl::cuda::intersectPlane(normal, worldPos, rayOrigin, rayDir, t)) { + // Plane based estimate of surface at this pixel + //const float3 nearest = rayOrigin + rayDir * camPos.z; + + // Use MLS of camera neighbor points to get more exact estimate + // Iterate until pixel is stable on the surface. + for (int k=0; k<MAX_ITERATIONS; ++k) { + + // TODO:(Nick) Should perhaps use points from all cameras? + // Instead of doing each camera separately... + // If the depth already is close then it has already been done and can skip this point + const float energy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, SPATIAL_SMOOTHING); + + if (energy <= 0.0f) break; + + //ftl::cuda::render_depth(depth, params, output); + + // This is essentially the SDF function f(x), only the normal should be estimated also from the weights + //const float d = nearest.z + (normal.x*output.x + normal.y*output.y + normal.z*output.z); + + const float d = nearest.z; + nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d+interval); + + if (energy >= 0.1f) { + const unsigned int cx = screenPos.x+u; + const unsigned int cy = screenPos.y+v; + if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { + // Transform estimated point to virtual cam space and output z + atomicMin(&depth(cx,cy), d * 1000.0f); + } + break; + } + } + //} + } +} + +// ===== Pass 2 and 3 : Attribute contributions ================================ + __device__ inline float4 make_float4(const uchar4 &c) { return make_float4(c.x,c.y,c.z,c.w); } @@ -485,7 +615,8 @@ void ftl::cuda::dibr(const TextureObject<int> &depth_out, const TextureObject<uchar4> &colour_out, const TextureObject<float4> &normal_out, const TextureObject<float> &confidence_out, - const TextureObject<float4> &tmp_colour, + const TextureObject<float4> &tmp_colour, + const TextureObject<int> &tmp_depth, int numcams, const SplatParams ¶ms, cudaStream_t stream) { @@ -507,7 +638,10 @@ void ftl::cuda::dibr(const TextureObject<int> &depth_out, int i=3; // Pass 1, gather and upsample depth maps for (int i=0; i<numcams; ++i) - dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, i, params); + dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, i, params); + + // Pass 2 + dibr_visibility_principal_kernel2<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, depth_out, params); // Pass 2, merge a depth map from each camera. //for (int i=0; i<numcams; ++i) diff --git a/applications/reconstruct/src/splat_render.cpp b/applications/reconstruct/src/splat_render.cpp index 1f7a18f66..e2eb948e7 100644 --- a/applications/reconstruct/src/splat_render.cpp +++ b/applications/reconstruct/src/splat_render.cpp @@ -73,7 +73,7 @@ void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) { ftl::cuda::clear_depth(depth3_, stream); ftl::cuda::clear_depth(depth2_, stream); ftl::cuda::clear_colour(colour2_, stream); - ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, scene_->cameraCount(), params, stream); + ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, depth3_, scene_->cameraCount(), params, stream); // Step 1: Put all points into virtual view to gather them //ftl::cuda::dibr_raw(depth1_, scene_->cameraCount(), params, stream); @@ -98,7 +98,7 @@ void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) { params.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix); ftl::cuda::clear_depth(depth1_, stream); - ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, scene_->cameraCount(), params, stream); + ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, depth3_, scene_->cameraCount(), params, stream); src->writeFrames(colour1_, colour2_, stream); } else { if (src->value("splatting", false)) { diff --git a/applications/reconstruct/src/splat_render_cuda.hpp b/applications/reconstruct/src/splat_render_cuda.hpp index f8cbdbb6b..e60fc8c27 100644 --- a/applications/reconstruct/src/splat_render_cuda.hpp +++ b/applications/reconstruct/src/splat_render_cuda.hpp @@ -109,7 +109,8 @@ void dibr(const ftl::cuda::TextureObject<int> &depth_out, const ftl::cuda::TextureObject<uchar4> &colour_out, const ftl::cuda::TextureObject<float4> &normal_out, const ftl::cuda::TextureObject<float> &confidence_out, - const ftl::cuda::TextureObject<float4> &tmp_colour, int numcams, + const ftl::cuda::TextureObject<float4> &tmp_colour, + const ftl::cuda::TextureObject<int> &tmp_depth, int numcams, const ftl::render::SplatParams ¶ms, cudaStream_t stream); /** -- GitLab