From 1c623d96656f98631a22a196bd281abdaf0406be Mon Sep 17 00:00:00 2001
From: Nicolas Pope <nwpope@utu.fi>
Date: Mon, 7 Oct 2019 11:32:28 +0300
Subject: [PATCH] WIP: Alternate way of making normals

---
 .../cpp/include/ftl/cuda/normals.hpp          |  10 ++
 components/renderers/cpp/src/normals.cu       | 111 +++++++++++++++++-
 components/renderers/cpp/src/splat_render.cpp |  20 +++-
 3 files changed, 136 insertions(+), 5 deletions(-)

diff --git a/components/renderers/cpp/include/ftl/cuda/normals.hpp b/components/renderers/cpp/include/ftl/cuda/normals.hpp
index b9801743a..0e1b8a46f 100644
--- a/components/renderers/cpp/include/ftl/cuda/normals.hpp
+++ b/components/renderers/cpp/include/ftl/cuda/normals.hpp
@@ -11,9 +11,19 @@ namespace cuda {
 void normals(ftl::cuda::TextureObject<float4> &output,
         ftl::cuda::TextureObject<float4> &temp,
         ftl::cuda::TextureObject<float4> &input,
+		int radius,
+		float smoothing,
         const ftl::rgbd::Camera &camera,
         const float3x3 &pose, cudaStream_t stream);
 
+void normals(ftl::cuda::TextureObject<float4> &output,
+        ftl::cuda::TextureObject<float4> &temp,
+        ftl::cuda::TextureObject<int> &input,  // Integer depth values
+		int radius,
+		float smoothing,
+        const ftl::rgbd::Camera &camera,
+        const float3x3 &pose_inv, const float3x3 &pose, cudaStream_t stream);
+
 void normal_visualise(ftl::cuda::TextureObject<float4> &norm,
         ftl::cuda::TextureObject<uchar4> &output,
         const float3 &light, const uchar4 &diffuse, const uchar4 &ambient,
diff --git a/components/renderers/cpp/src/normals.cu b/components/renderers/cpp/src/normals.cu
index 4d6686931..7f9b1bb18 100644
--- a/components/renderers/cpp/src/normals.cu
+++ b/components/renderers/cpp/src/normals.cu
@@ -31,6 +31,38 @@ __global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output,
 	}
 }
 
+__device__ inline bool isValid(const ftl::rgbd::Camera &camera, const float3 &d) {
+	return d.z >= camera.minDepth && d.z <= camera.maxDepth;
+}
+
+__global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output,
+		ftl::cuda::TextureObject<int> input, ftl::rgbd::Camera camera, float3x3 pose) {
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if(x >= input.width() || y >= input.height()) return;
+
+	output(x,y) = make_float4(0, 0, 0, 0);
+
+	if(x > 0 && x < input.width()-1 && y > 0 && y < input.height()-1) {
+		const float3 CC = camera.screenToCam(x+0, y+0, (float)input.tex2D((int)x+0, (int)y+0) / 1000.0f);
+		const float3 PC = camera.screenToCam(x+0, y+1, (float)input.tex2D((int)x+0, (int)y+1) / 1000.0f);
+		const float3 CP = camera.screenToCam(x+1, y+0, (float)input.tex2D((int)x+1, (int)y+0) / 1000.0f);
+		const float3 MC = camera.screenToCam(x+0, y-1, (float)input.tex2D((int)x+0, (int)y-1) / 1000.0f);
+		const float3 CM = camera.screenToCam(x-1, y+0, (float)input.tex2D((int)x-1, (int)y+0) / 1000.0f);
+
+		//if(CC.z <  && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) {
+		if (isValid(camera,CC) && isValid(camera,PC) && isValid(camera,CP) && isValid(camera,MC) && isValid(camera,CM)) {
+			const float3 n = cross(PC-MC, CP-CM);
+			const float  l = length(n);
+
+			if(l > 0.0f) {
+				output(x,y) = make_float4(pose * (n/-l), 1.0f);
+			}
+		}
+	}
+}
+
 template <int RADIUS>
 __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
         ftl::cuda::TextureObject<float4> output,
@@ -74,9 +106,54 @@ __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
     output(x,y) = (contrib > 0.0f) ? make_float4(nsum, dot(nsum, ray)) : make_float4(0.0f);
 }
 
+template <int RADIUS>
+__global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
+        ftl::cuda::TextureObject<float4> output,
+        ftl::cuda::TextureObject<int> depth,
+        ftl::rgbd::Camera camera, float3x3 pose, float smoothing) {
+    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if(x >= depth.width() || y >= depth.height()) return;
+
+    const float3 p0 = camera.screenToCam(x,y, (float)depth.tex2D((int)x,(int)y) / 1000.0f);
+    float3 nsum = make_float3(0.0f);
+    float contrib = 0.0f;
+
+    if (p0.z < camera.minDepth || p0.z > camera.maxDepth) return;
+
+    for (int v=-RADIUS; v<=RADIUS; ++v) {
+        for (int u=-RADIUS; u<=RADIUS; ++u) {
+            const float3 p = camera.screenToCam(x+u,y+v, (float)depth.tex2D((int)x+u,(int)y+v) / 1000.0f);
+            if (p.z < camera.minDepth || p.z > camera.maxDepth) continue;
+            const float s = ftl::cuda::spatialWeighting(p0, p, smoothing);
+            //const float s = 1.0f;
+
+            //if (s > 0.0f) {
+                const float4 n = norms.tex2D((int)x+u,(int)y+v);
+                //if (n.w > 0.0f) {
+                    nsum += make_float3(n) * s;
+                    contrib += s;
+                //}
+            //}
+        }
+    }
+
+    // Compute dot product of normal with camera to obtain measure of how
+    // well this point faces the source camera, a measure of confidence
+    float3 ray = pose * camera.screenToCam(x, y, 1.0f);
+    ray = ray / length(ray);
+    nsum /= contrib;
+    nsum /= length(nsum);
+
+    output(x,y) = (contrib > 0.0f) ? make_float4(nsum, 1.0f) : make_float4(0.0f); //dot(nsum, ray)
+}
+
 void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
         ftl::cuda::TextureObject<float4> &temp,
-        ftl::cuda::TextureObject<float4> &input,
+		ftl::cuda::TextureObject<float4> &input,
+		int radius,
+		float smoothing,
         const ftl::rgbd::Camera &camera,
         const float3x3 &pose,cudaStream_t stream) {
 	const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
@@ -85,7 +162,11 @@ void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
 	computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(temp, input);
     cudaSafeCall( cudaGetLastError() );
 
-    smooth_normals_kernel<3><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, 0.04f);
+	switch (radius) {
+	case 7: smooth_normals_kernel<7><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing);
+	case 5: smooth_normals_kernel<5><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing);
+	case 3: smooth_normals_kernel<3><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing);
+	}
     cudaSafeCall( cudaGetLastError() );
 
 #ifdef _DEBUG
@@ -94,6 +175,32 @@ void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
 #endif
 }
 
+void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
+		ftl::cuda::TextureObject<float4> &temp,
+		ftl::cuda::TextureObject<int> &input,
+		int radius,
+		float smoothing,
+		const ftl::rgbd::Camera &camera,
+		const float3x3 &pose_inv, const float3x3 &pose,cudaStream_t stream) {
+	const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(temp, input, camera, pose);
+	cudaSafeCall( cudaGetLastError() );
+
+	switch (radius) {
+	case 7: smooth_normals_kernel<7><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose_inv, smoothing);
+	case 5: smooth_normals_kernel<5><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose_inv, smoothing);
+	case 3: smooth_normals_kernel<3><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose_inv, smoothing);
+	}
+	cudaSafeCall( cudaGetLastError() );
+
+	#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	//cutilCheckMsg(__FUNCTION__);
+	#endif
+}
+
 //==============================================================================
 
 __global__ void vis_normals_kernel(ftl::cuda::TextureObject<float4> norm,
diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp
index 895ea79ff..f96054587 100644
--- a/components/renderers/cpp/src/splat_render.cpp
+++ b/components/renderers/cpp/src/splat_render.cpp
@@ -168,7 +168,7 @@ void Splatter::renderChannel(
 	out.get<GpuMat>(Channel::Normals).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
 
 	// Create normals first
-	for (auto &f : scene_->frames) {
+	/*for (auto &f : scene_->frames) {
 
 		ftl::cuda::dibr_attribute(
 			f.createTexture<float4>(Channel::Normals),
@@ -185,7 +185,19 @@ void Splatter::renderChannel(
 		out.getTexture<float4>(Channel::Normals),
 		temp_.getTexture<float>(Channel::Contribution),
 		stream
-	);
+	);*/
+
+	//auto &t = out.createTexture<float4>(Channel::Points, Format<float4>(params.camera.width, params.camera.height));
+	//ftl::cuda::point_cloud(t, temp_.getTexture<int>(Channel::Depth2), params.camera, params.m_viewMatrixInverse, 0, stream);
+	ftl::cuda::normals(out.createTexture<float4>(Channel::Normals),
+		temp_.getTexture<float4>(Channel::Normals),
+		temp_.getTexture<int>(Channel::Depth2), 
+		5, 0.04f,
+		params.camera, params.m_viewMatrixInverse.getFloat3x3(), params.m_viewMatrix.getFloat3x3(), stream);
+
+	//if (norm_filter_ > -0.1f) {
+	//	ftl::cuda::normal_filter(f.getTexture<float4>(Channel::Normals), f.getTexture<float4>(Channel::Points), s->parameters(), pose, norm_filter_, stream);
+	//}
 
 	temp_.get<GpuMat>(Channel::Colour).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
 	temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0.0f), cvstream);
@@ -369,7 +381,9 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
 			auto &g = f.get<GpuMat>(Channel::Colour);
 			ftl::cuda::normals(f.createTexture<float4>(Channel::Normals, Format<float4>(g.cols, g.rows)),
 				temp_.getTexture<float4>(Channel::Normals),  // FIXME: Uses assumption of vcam res same as input res
-				f.getTexture<float4>(Channel::Points), s->parameters(), pose.getFloat3x3(), stream);
+				f.getTexture<float4>(Channel::Points),
+				3, 0.04f,
+				s->parameters(), pose.getFloat3x3(), stream);
 
 			if (norm_filter_ > -0.1f) {
 				ftl::cuda::normal_filter(f.getTexture<float4>(Channel::Normals), f.getTexture<float4>(Channel::Points), s->parameters(), pose, norm_filter_, stream);
-- 
GitLab