From f841d6f5eea263950cfb0a064d9e750e5f9bfe6e Mon Sep 17 00:00:00 2001
From: Nicolas Pope <nwpope@utu.fi>
Date: Fri, 9 Aug 2019 13:45:36 +0300
Subject: [PATCH] WIP Splat the merged points

---
 applications/reconstruct/src/dibr.cu          | 140 +++++++++++++++++-
 applications/reconstruct/src/splat_render.cpp |   4 +-
 .../reconstruct/src/splat_render_cuda.hpp     |   3 +-
 3 files changed, 141 insertions(+), 6 deletions(-)

diff --git a/applications/reconstruct/src/dibr.cu b/applications/reconstruct/src/dibr.cu
index 7cc0f6de5..97157c56f 100644
--- a/applications/reconstruct/src/dibr.cu
+++ b/applications/reconstruct/src/dibr.cu
@@ -67,7 +67,7 @@ __device__ inline bool isStable(const float3 &previous, const float3 &estimate,
 	if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
 	if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
 
-	const int upsample = 5; //min(UPSAMPLE_MAX, int((2.0f*r) * params.camera.fx / camPos.z));
+	const int upsample = min(UPSAMPLE_MAX-2, int((r) * params.camera.fx / camPos.z))+2;
 	const float interval = 1.0f / float(upsample / 2);
 
             
@@ -345,6 +345,136 @@ __global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, Sp
 	}
 }
 
+#define NEIGHBOR_RADIUS_2 5
+#define MAX_NEIGHBORS_2 ((NEIGHBOR_RADIUS_2*2+1)*(NEIGHBOR_RADIUS_2*2+1))
+
+
+/*
+ * Pass 2: Determine depth buffer with enough accuracy for a visibility test in pass 2.
+ * These values are also used as the actual surface estimate during rendering so should
+ * at least be plane or sphere fitted if not MLS smoothed onto the actual surface.
+ *
+ * This version uses a previous point render as neighbour source.
+ */
+ __global__ void dibr_visibility_principal_kernel2(TextureObject<int> point_in, TextureObject<int> depth, SplatParams params) {
+	__shared__ float3 neighborhood_cache[2*T_PER_BLOCK][MAX_NEIGHBORS_2];
+	__shared__ int minimum[2*T_PER_BLOCK];
+	__shared__ int maximum[2*T_PER_BLOCK];
+
+	const int warp = threadIdx.x / WARP_SIZE + threadIdx.y*2;
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	const float3 camPos = params.camera.kinectDepthToSkeleton(x,y, float(point_in.tex2D(x,y)) / 1000.0f);
+
+    const float r = 1.0f; //(camera.poseInverse * worldPos).z / camera.params.fx;
+
+	// Get virtual camera ray for splat centre and backface cull if possible
+	//const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f);
+	//const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin);
+	//if (dot(rayDir, normal) > 0.0f) return;
+
+    // Find the virtual screen position of current point
+	//const float3 camPos = params.m_viewMatrix * worldPos;
+	if (camPos.z < params.camera.m_sensorDepthWorldMin) return;
+	if (camPos.z > params.camera.m_sensorDepthWorldMax) return;
+	const uint2 screenPos = params.camera.cameraToKinectScreen(camPos);
+
+	const int upsample = 10; //min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z));
+
+	// Not on screen so stop now...
+	//if (screenPos.x + upsample < 0 || screenPos.y + upsample < 0 ||
+    //        screenPos.x - upsample >= depth.width() || screenPos.y - upsample >= depth.height()) return;
+            
+	// TODO:(Nick) Check depth buffer and don't do anything if already hidden?
+	
+	// TODO:(Nick) Preload point neighbors and transform to eye
+	const int lane = threadIdx.x % WARP_SIZE;
+	if (lane == 0) {
+		minimum[warp] = 100000000;
+		maximum[warp] = -100000000;
+	}
+
+	__syncwarp();
+
+	for (int i=lane; i<MAX_NEIGHBORS_2; i+=WARP_SIZE) {
+		const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
+		const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2;
+		const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f);
+		neighborhood_cache[warp][i] = point;
+
+		if (length(point - camPos) <= 0.04f) {
+			atomicMin(&minimum[warp], point.z*1000.0f);
+			atomicMax(&maximum[warp], point.z*1000.0f);
+		}
+	}
+
+	__syncwarp();
+	
+	const float interval = (float(maximum[warp])/1000.0f - float(minimum[warp]) / 1000.0f) / float(MAX_ITERATIONS);
+	//if (y == 200) printf("interval: %f\n", interval);
+
+	// TODO:(Nick) Find min and max depths of neighbors to estimate z bounds
+
+	// Each thread in warp takes an upsample point and updates corresponding depth buffer.
+	for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
+		const float u = (i % upsample) - (upsample / 2);
+		const float v = (i / upsample) - (upsample / 2);
+
+        // Make an initial estimate of the points location
+		// Use centroid depth as estimate...?
+        //float3 nearest = ftl::cuda::screen_centroid<1>(camera.points, make_float2(screenPos.x+u, screenPos.y+v), make_int2(x,y), params, upsample);
+
+		// Use current points z as estimate
+		// TODO: Use min point as estimate
+		float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,float(minimum[warp])/1000.0f);
+		
+		// Or calculate upper and lower bounds for depth and do gradient
+		// descent until the gradient change is too small or max iter is reached
+		// and depth remains within the bounds.
+		// How to find min and max depths?
+
+		// TODO: (Nick) Estimate depth using points plane, but needs better normals.
+		//float t;
+		//if (ftl::cuda::intersectPlane(normal, worldPos, rayOrigin, rayDir, t)) {
+			// Plane based estimate of surface at this pixel
+			//const float3 nearest = rayOrigin + rayDir * camPos.z;
+
+            // Use MLS of camera neighbor points to get more exact estimate
+            // Iterate until pixel is stable on the surface.
+            for (int k=0; k<MAX_ITERATIONS; ++k) {
+
+                // TODO:(Nick) Should perhaps use points from all cameras?
+                // Instead of doing each camera separately...
+                // If the depth already is close then it has already been done and can skip this point
+				const float energy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, SPATIAL_SMOOTHING);
+				
+				if (energy <= 0.0f) break;
+            
+				//ftl::cuda::render_depth(depth, params, output);
+
+                // This is essentially the SDF function f(x), only the normal should be estimated also from the weights
+                //const float d = nearest.z + (normal.x*output.x + normal.y*output.y + normal.z*output.z);
+
+				const float d = nearest.z;
+				nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d+interval);
+				
+				if (energy >= 0.1f) {
+					const unsigned int cx = screenPos.x+u;
+                    const unsigned int cy = screenPos.y+v;
+					if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) {
+                        // Transform estimated point to virtual cam space and output z
+                        atomicMin(&depth(cx,cy), d * 1000.0f);
+					}
+					break;
+				}
+			}
+		//}
+	}
+}
+
+// ===== Pass 2 and 3 : Attribute contributions ================================
+
 __device__ inline float4 make_float4(const uchar4 &c) {
     return make_float4(c.x,c.y,c.z,c.w);
 }
@@ -485,7 +615,8 @@ void ftl::cuda::dibr(const TextureObject<int> &depth_out,
         const TextureObject<uchar4> &colour_out,
         const TextureObject<float4> &normal_out,
         const TextureObject<float> &confidence_out,
-        const TextureObject<float4> &tmp_colour,
+		const TextureObject<float4> &tmp_colour,
+		const TextureObject<int> &tmp_depth,
         int numcams,
         const SplatParams &params,
         cudaStream_t stream) {
@@ -507,7 +638,10 @@ void ftl::cuda::dibr(const TextureObject<int> &depth_out,
 	int i=3;
 	// Pass 1, gather and upsample depth maps
 	for (int i=0; i<numcams; ++i)
-		dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, i, params);
+		dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, i, params);
+
+	// Pass 2
+	dibr_visibility_principal_kernel2<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, depth_out, params);
 
     // Pass 2, merge a depth map from each camera.
 	//for (int i=0; i<numcams; ++i)
diff --git a/applications/reconstruct/src/splat_render.cpp b/applications/reconstruct/src/splat_render.cpp
index 1f7a18f66..e2eb948e7 100644
--- a/applications/reconstruct/src/splat_render.cpp
+++ b/applications/reconstruct/src/splat_render.cpp
@@ -73,7 +73,7 @@ void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) {
 		ftl::cuda::clear_depth(depth3_, stream);
 		ftl::cuda::clear_depth(depth2_, stream);
 		ftl::cuda::clear_colour(colour2_, stream);
-		ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, scene_->cameraCount(), params, stream);
+		ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, depth3_, scene_->cameraCount(), params, stream);
 
 		// Step 1: Put all points into virtual view to gather them
 		//ftl::cuda::dibr_raw(depth1_, scene_->cameraCount(), params, stream);
@@ -98,7 +98,7 @@ void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) {
 			params.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix);
 
 			ftl::cuda::clear_depth(depth1_, stream);
-			ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, scene_->cameraCount(), params, stream);
+			ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, depth3_, scene_->cameraCount(), params, stream);
 			src->writeFrames(colour1_, colour2_, stream);
 		} else {
 			if (src->value("splatting",  false)) {
diff --git a/applications/reconstruct/src/splat_render_cuda.hpp b/applications/reconstruct/src/splat_render_cuda.hpp
index f8cbdbb6b..e60fc8c27 100644
--- a/applications/reconstruct/src/splat_render_cuda.hpp
+++ b/applications/reconstruct/src/splat_render_cuda.hpp
@@ -109,7 +109,8 @@ void dibr(const ftl::cuda::TextureObject<int> &depth_out,
 		const ftl::cuda::TextureObject<uchar4> &colour_out,
 		const ftl::cuda::TextureObject<float4> &normal_out,
         const ftl::cuda::TextureObject<float> &confidence_out,
-        const ftl::cuda::TextureObject<float4> &tmp_colour, int numcams,
+        const ftl::cuda::TextureObject<float4> &tmp_colour,
+        const ftl::cuda::TextureObject<int> &tmp_depth, int numcams,
 		const ftl::render::SplatParams &params, cudaStream_t stream);
 
 /**
-- 
GitLab