From e4c15a9620213880048c013f2da9150e4580f0aa Mon Sep 17 00:00:00 2001
From: Nicolas Pope <nwpope@utu.fi>
Date: Wed, 2 Oct 2019 12:14:06 +0300
Subject: [PATCH] Implement normal smoothing

---
 .../cpp/include/ftl/cuda/normals.hpp          |  1 +
 components/renderers/cpp/src/normals.cu       | 45 ++++++++++++++++++-
 components/renderers/cpp/src/splat_render.cpp |  5 ++-
 3 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/components/renderers/cpp/include/ftl/cuda/normals.hpp b/components/renderers/cpp/include/ftl/cuda/normals.hpp
index 50f2e881a..f692e02a4 100644
--- a/components/renderers/cpp/include/ftl/cuda/normals.hpp
+++ b/components/renderers/cpp/include/ftl/cuda/normals.hpp
@@ -9,6 +9,7 @@ namespace ftl {
 namespace cuda {
 
 void normals(ftl::cuda::TextureObject<float4> &output,
+        ftl::cuda::TextureObject<float4> &temp,
         ftl::cuda::TextureObject<float4> &input, cudaStream_t stream);
 
 void normal_visualise(ftl::cuda::TextureObject<float4> &norm,
diff --git a/components/renderers/cpp/src/normals.cu b/components/renderers/cpp/src/normals.cu
index 22654a3a4..040f761b0 100644
--- a/components/renderers/cpp/src/normals.cu
+++ b/components/renderers/cpp/src/normals.cu
@@ -1,4 +1,5 @@
 #include <ftl/cuda/normals.hpp>
+#include <ftl/cuda/weighting.hpp>
 
 #define T_PER_BLOCK 16
 #define MINF __int_as_float(0xff800000)
@@ -30,14 +31,54 @@ __global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output,
 	}
 }
 
+template <int RADIUS>
+__global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
+        ftl::cuda::TextureObject<float4> output,
+        ftl::cuda::TextureObject<float4> points, float smoothing) {
+    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if(x >= points.width() || y >= points.height()) return;
+
+    const float3 p0 = make_float3(points.tex2D((int)x,(int)y));
+    float3 nsum = make_float3(0.0f);
+    float contrib = 0.0f;
+
+    if (p0.x == MINF) return;
+
+    for (int v=-RADIUS; v<=RADIUS; ++v) {
+        for (int u=-RADIUS; u<=RADIUS; ++u) {
+            const float3 p = make_float3(points.tex2D((int)x+u,(int)y+v));
+            if (p.x == MINF) continue;
+            const float s = ftl::cuda::spatialWeighting(p0, p, smoothing);
+
+            if (s > 0.0f) {
+                const float4 n = norms.tex2D((int)x+u,(int)y+v);
+                if (n.w > 0.0f) {
+                    nsum += make_float3(n) * s;
+                    contrib += s;
+                }
+            }
+        }
+    }
+
+    // FIXME: USE A DIFFERENT OUTPUT BUFFER
+    //__syncthreads();
+    output(x,y) = (contrib > 0.0f) ? make_float4(nsum / contrib, 1.0f) : make_float4(0.0f);
+}
+
 void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
+        ftl::cuda::TextureObject<float4> &temp,
         ftl::cuda::TextureObject<float4> &input, cudaStream_t stream) {
 	const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
 	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
-	computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(output, input);
+	computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(temp, input);
+    cudaSafeCall( cudaGetLastError() );
 
+    smooth_normals_kernel<1><<<gridSize, blockSize, 0, stream>>>(temp, output, input, 0.04f);
     cudaSafeCall( cudaGetLastError() );
+
 #ifdef _DEBUG
 	cudaSafeCall(cudaDeviceSynchronize());
 	//cutilCheckMsg(__FUNCTION__);
@@ -62,7 +103,7 @@ __global__ void vis_normals_kernel(ftl::cuda::TextureObject<float4> norm,
     if (l == 0) return;
     n /= l;
 
-    output(x,y) = (1.0f + dot(ray, n))*3.5f;
+    output(x,y) = (1.0f + dot(ray, n))*3.5f;  // FIXME: Do not hard code these value scalings
 }
 
 void ftl::cuda::normal_visualise(ftl::cuda::TextureObject<float4> &norm,
diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp
index 50264124b..ceaff5325 100644
--- a/components/renderers/cpp/src/splat_render.cpp
+++ b/components/renderers/cpp/src/splat_render.cpp
@@ -265,11 +265,14 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
 		//ftl::cuda::normal_visualise(temp_.getTexture<float4>(Channel::Normals), temp_.getTexture<float>(Channel::Contribution), camera);
 
 		// First make sure each input has normals
+		temp_.createTexture<float4>(Channel::Normals);
 		for (auto &f : scene_->frames) {
 			if (!f.hasChannel(Channel::Normals)) {
 				auto &g = f.get<GpuMat>(Channel::Colour);
 				LOG(INFO) << "Make normals channel";
-				ftl::cuda::normals(f.createTexture<float4>(Channel::Normals, Format<float4>(g.cols, g.rows)), f.getTexture<float4>(Channel::Points), stream);
+				ftl::cuda::normals(f.createTexture<float4>(Channel::Normals, Format<float4>(g.cols, g.rows)),
+					temp_.getTexture<float4>(Channel::Normals),  // FIXME: Uses assumption of vcam res same as input res
+					f.getTexture<float4>(Channel::Points), stream);
 			}
 		}
 
-- 
GitLab