diff --git a/components/renderers/cpp/include/ftl/cuda/normals.hpp b/components/renderers/cpp/include/ftl/cuda/normals.hpp index 50f2e881a7c0550652ff49377382476c53f5bd11..f692e02a44b773ad07b7f5f982788f5ff83c7070 100644 --- a/components/renderers/cpp/include/ftl/cuda/normals.hpp +++ b/components/renderers/cpp/include/ftl/cuda/normals.hpp @@ -9,6 +9,7 @@ namespace ftl { namespace cuda { void normals(ftl::cuda::TextureObject<float4> &output, + ftl::cuda::TextureObject<float4> &temp, ftl::cuda::TextureObject<float4> &input, cudaStream_t stream); void normal_visualise(ftl::cuda::TextureObject<float4> &norm, diff --git a/components/renderers/cpp/src/normals.cu b/components/renderers/cpp/src/normals.cu index 22654a3a4bacbd30f3830b3ba25e628c53fdf5c6..040f761b0f42b92ceacfeeca63be843e55e39ac3 100644 --- a/components/renderers/cpp/src/normals.cu +++ b/components/renderers/cpp/src/normals.cu @@ -1,4 +1,5 @@ #include <ftl/cuda/normals.hpp> +#include <ftl/cuda/weighting.hpp> #define T_PER_BLOCK 16 #define MINF __int_as_float(0xff800000) @@ -30,14 +31,54 @@ __global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output, } } +template <int RADIUS> +__global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms, + ftl::cuda::TextureObject<float4> output, + ftl::cuda::TextureObject<float4> points, float smoothing) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if(x >= points.width() || y >= points.height()) return; + + const float3 p0 = make_float3(points.tex2D((int)x,(int)y)); + float3 nsum = make_float3(0.0f); + float contrib = 0.0f; + + if (p0.x == MINF) return; + + for (int v=-RADIUS; v<=RADIUS; ++v) { + for (int u=-RADIUS; u<=RADIUS; ++u) { + const float3 p = make_float3(points.tex2D((int)x+u,(int)y+v)); + if (p.x == MINF) continue; + const float s = ftl::cuda::spatialWeighting(p0, p, smoothing); + + if (s > 0.0f) { + const float4 n = norms.tex2D((int)x+u,(int)y+v); + if (n.w > 0.0f) { + nsum += make_float3(n) * s; + contrib += s; + } + } + } + } + + // FIXME: USE A DIFFERENT OUTPUT BUFFER + //__syncthreads(); + output(x,y) = (contrib > 0.0f) ? make_float4(nsum / contrib, 1.0f) : make_float4(0.0f); +} + void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output, + ftl::cuda::TextureObject<float4> &temp, ftl::cuda::TextureObject<float4> &input, cudaStream_t stream) { const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(output, input); + computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(temp, input); + cudaSafeCall( cudaGetLastError() ); + smooth_normals_kernel<1><<<gridSize, blockSize, 0, stream>>>(temp, output, input, 0.04f); cudaSafeCall( cudaGetLastError() ); + #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); //cutilCheckMsg(__FUNCTION__); @@ -62,7 +103,7 @@ __global__ void vis_normals_kernel(ftl::cuda::TextureObject<float4> norm, if (l == 0) return; n /= l; - output(x,y) = (1.0f + dot(ray, n))*3.5f; + output(x,y) = (1.0f + dot(ray, n))*3.5f; // FIXME: Do not hard code these value scalings } void ftl::cuda::normal_visualise(ftl::cuda::TextureObject<float4> &norm, diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp index 50264124bc7a04ffd70831aa726f69c4429857a7..ceaff532557ee274ebf5ec6390f7110d07a1b961 100644 --- a/components/renderers/cpp/src/splat_render.cpp +++ b/components/renderers/cpp/src/splat_render.cpp @@ -265,11 +265,14 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda //ftl::cuda::normal_visualise(temp_.getTexture<float4>(Channel::Normals), temp_.getTexture<float>(Channel::Contribution), camera); // First make sure each input has normals + temp_.createTexture<float4>(Channel::Normals); for (auto &f : scene_->frames) { if (!f.hasChannel(Channel::Normals)) { auto &g = f.get<GpuMat>(Channel::Colour); LOG(INFO) << "Make normals channel"; - ftl::cuda::normals(f.createTexture<float4>(Channel::Normals, Format<float4>(g.cols, g.rows)), f.getTexture<float4>(Channel::Points), stream); + ftl::cuda::normals(f.createTexture<float4>(Channel::Normals, Format<float4>(g.cols, g.rows)), + temp_.getTexture<float4>(Channel::Normals), // FIXME: Uses assumption of vcam res same as input res + f.getTexture<float4>(Channel::Points), stream); } }