diff --git a/components/operators/CMakeLists.txt b/components/operators/CMakeLists.txt index 51223e8f6631da88ee7d677952eb6f440fade377..3ffb6a0c81f1d7cb78d8357c5b0e499cc3f7ee4d 100644 --- a/components/operators/CMakeLists.txt +++ b/components/operators/CMakeLists.txt @@ -1,6 +1,7 @@ set(OPERSRC src/surface/smoothing.cpp src/surface/smoothing.cu + src/surface/mls/image_basic.cu src/surface/mls.cu src/analysis/local/smoothchan.cu src/operator.cpp diff --git a/components/operators/include/ftl/operators/cuda/mls_cuda.hpp b/components/operators/include/ftl/operators/cuda/mls_cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bb34b9a4201b4dfbc8202c8fc4aa43789e36bb11 --- /dev/null +++ b/components/operators/include/ftl/operators/cuda/mls_cuda.hpp @@ -0,0 +1,32 @@ +#ifndef _FTL_CUDA_MLS_HPP_ +#define _FTL_CUDA_MLS_HPP_ + +#include <ftl/rgbd/camera.hpp> +#include <ftl/cuda_common.hpp> + +namespace ftl { +namespace cuda { + +void mls_smooth( + const cv::cuda::GpuMat &normals_in, + cv::cuda::GpuMat &normals_out, + const cv::cuda::GpuMat &depth_in, + cv::cuda::GpuMat &depth_out, + float smoothing, + int radius, + const ftl::rgbd::Camera &camera, + cudaStream_t stream); + +void mls_smooth( + const cv::cuda::GpuMat &normals_in, + cv::cuda::GpuMat &normals_out, + const cv::cuda::GpuMat &depth_in, + float smoothing, + int radius, + const ftl::rgbd::Camera &camera, + cudaStream_t stream); + +} +} + +#endif // _FTL_CUDA_MLS_HPP_ diff --git a/components/operators/include/ftl/operators/cuda/smoothing_cuda.hpp b/components/operators/include/ftl/operators/cuda/smoothing_cuda.hpp index 8c99b5e9f2dc130bcc236b8f4cdea9c2932ab7f4..f1934cdae28f4a3a90cf007c3572f3da017b9357 100644 --- a/components/operators/include/ftl/operators/cuda/smoothing_cuda.hpp +++ b/components/operators/include/ftl/operators/cuda/smoothing_cuda.hpp @@ -7,25 +7,6 @@ namespace ftl { namespace cuda { -void mls_smooth( - ftl::cuda::TextureObject<half4> &normals_in, - ftl::cuda::TextureObject<half4> &normals_out, - ftl::cuda::TextureObject<float> &depth_in, - ftl::cuda::TextureObject<float> &depth_out, - float smoothing, - int radius, - const ftl::rgbd::Camera &camera, - cudaStream_t stream); - -void mls_smooth( - ftl::cuda::TextureObject<half4> &normals_in, - ftl::cuda::TextureObject<half4> &normals_out, - ftl::cuda::TextureObject<float> &depth_in, - float smoothing, - int radius, - const ftl::rgbd::Camera &camera, - cudaStream_t stream); - void colour_mls_smooth( ftl::cuda::TextureObject<half4> &normals_in, ftl::cuda::TextureObject<half4> &normals_out, diff --git a/components/operators/src/surface/mls.cu b/components/operators/src/surface/mls.cu index 1293e6d9a48d15b7345619d44df3264c8c9e144d..0a6373719c5d4655db9d9a2f7bacbadcea63e72c 100644 --- a/components/operators/src/surface/mls.cu +++ b/components/operators/src/surface/mls.cu @@ -7,192 +7,6 @@ using ftl::cuda::TextureObject; #define T_PER_BLOCK 8 #define WARP_SIZE 32 -// ===== MLS Smooth ============================================================ - -/* - * Smooth depth map using Moving Least Squares - */ - template <int SEARCH_RADIUS> - __global__ void mls_smooth_kernel( - TextureObject<half4> normals_in, - TextureObject<half4> normals_out, - TextureObject<float> depth_in, // Virtual depth map - TextureObject<float> depth_out, // Accumulated output - float smoothing, - ftl::rgbd::Camera camera) { - - const int x = blockIdx.x*blockDim.x + threadIdx.x; - const int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return; - - float3 aX = make_float3(0.0f,0.0f,0.0f); - float3 nX = make_float3(0.0f,0.0f,0.0f); - float contrib = 0.0f; - - float d0 = depth_in.tex2D(x, y); - depth_out(x,y) = d0; - normals_out(x,y) = normals_in(x,y); - if (d0 < camera.minDepth || d0 > camera.maxDepth) return; - float3 X = camera.screenToCam((int)(x),(int)(y),d0); - - // Neighbourhood - for (int v=-SEARCH_RADIUS; v<=SEARCH_RADIUS; ++v) { - for (int u=-SEARCH_RADIUS; u<=SEARCH_RADIUS; ++u) { - const float d = depth_in.tex2D(x+u, y+v); - if (d < camera.minDepth || d > camera.maxDepth) continue; - - // Point and normal of neighbour - const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y)+v,d); - const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y)+v)); - - // Gauss approx weighting function using point distance - const float w = (Ni.x+Ni.y+Ni.z == 0.0f) ? 0.0f : ftl::cuda::spatialWeighting(X,Xi,smoothing); - - aX += Xi*w; - nX += Ni*w; - contrib += w; - } - } - - if (contrib > 0.0f) { - nX /= contrib; // Weighted average normal - aX /= contrib; // Weighted average point (centroid) - - // Signed-Distance Field function - float fX = nX.x * (X.x - aX.x) + nX.y * (X.y - aX.y) + nX.z * (X.z - aX.z); - - // Calculate new point using SDF function to adjust depth (and position) - X = X - nX * fX; - - //uint2 screen = camera.camToScreen<uint2>(X); - - //if (screen.x < depth_out.width() && screen.y < depth_out.height()) { - // depth_out(screen.x,screen.y) = X.z; - //} - depth_out(x,y) = X.z; - normals_out(x,y) = make_half4(nX / length(nX), 0.0f); - } -} - -void ftl::cuda::mls_smooth( - ftl::cuda::TextureObject<half4> &normals_in, - ftl::cuda::TextureObject<half4> &normals_out, - ftl::cuda::TextureObject<float> &depth_in, - ftl::cuda::TextureObject<float> &depth_out, - float smoothing, - int radius, - const ftl::rgbd::Camera &camera, - cudaStream_t stream) { - - const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - switch (radius) { - case 5: mls_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, smoothing, camera); break; - case 4: mls_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, smoothing, camera); break; - case 3: mls_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, smoothing, camera); break; - case 2: mls_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, smoothing, camera); break; - case 1: mls_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, smoothing, camera); break; - } - cudaSafeCall( cudaGetLastError() ); - - - #ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - #endif -} - -/* - * Smooth depth map using Moving Least Squares. Normals only output - */ - template <int SEARCH_RADIUS> - __global__ void mls_smooth_kernel( - TextureObject<half4> normals_in, - TextureObject<half4> normals_out, - TextureObject<float> depth_in, // Virtual depth map - float smoothing, - ftl::rgbd::Camera camera) { - - const int x = blockIdx.x*blockDim.x + threadIdx.x; - const int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return; - - float3 aX = make_float3(0.0f,0.0f,0.0f); - float3 nX = make_float3(0.0f,0.0f,0.0f); - float contrib = 0.0f; - - float d0 = depth_in.tex2D(x, y); - normals_out(x,y) = normals_in(x,y); - if (d0 < camera.minDepth || d0 > camera.maxDepth) return; - float3 X = camera.screenToCam((int)(x),(int)(y),d0); - - // Neighbourhood - for (int v=-SEARCH_RADIUS; v<=SEARCH_RADIUS; ++v) { - for (int u=-SEARCH_RADIUS; u<=SEARCH_RADIUS; ++u) { - const float d = depth_in.tex2D(x+u, y+v); - if (d < camera.minDepth || d > camera.maxDepth) continue; - - // Point and normal of neighbour - const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y)+v,d); - const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y)+v)); - - // Gauss approx weighting function using point distance - const float w = (Ni.x+Ni.y+Ni.z == 0.0f) ? 0.0f : ftl::cuda::spatialWeighting(X,Xi,smoothing); - - aX += Xi*w; - nX += Ni*w; - contrib += w; - } - } - - if (contrib > 0.0f) { - nX /= contrib; // Weighted average normal - aX /= contrib; // Weighted average point (centroid) - - // Signed-Distance Field function - float fX = nX.x * (X.x - aX.x) + nX.y * (X.y - aX.y) + nX.z * (X.z - aX.z); - - // Calculate new point using SDF function to adjust depth (and position) - X = X - nX * fX; - - //uint2 screen = camera.camToScreen<uint2>(X); - - //if (screen.x < depth_out.width() && screen.y < depth_out.height()) { - // depth_out(screen.x,screen.y) = X.z; - //} - //depth_out(x,y) = X.z; - normals_out(x,y) = make_half4(nX / length(nX), 0.0f); - } -} - -void ftl::cuda::mls_smooth( - ftl::cuda::TextureObject<half4> &normals_in, - ftl::cuda::TextureObject<half4> &normals_out, - ftl::cuda::TextureObject<float> &depth_in, - float smoothing, - int radius, - const ftl::rgbd::Camera &camera, - cudaStream_t stream) { - - const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - switch (radius) { - case 5: mls_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, smoothing, camera); break; - case 4: mls_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, smoothing, camera); break; - case 3: mls_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, smoothing, camera); break; - case 2: mls_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, smoothing, camera); break; - case 1: mls_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, smoothing, camera); break; - } - cudaSafeCall( cudaGetLastError() ); - - - #ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - #endif -} // ===== Colour MLS Smooth ===================================================== diff --git a/components/operators/src/surface/mls/image_basic.cu b/components/operators/src/surface/mls/image_basic.cu new file mode 100644 index 0000000000000000000000000000000000000000..a34d0b13a0164a7d40c48361b6edf03c09b9d1db --- /dev/null +++ b/components/operators/src/surface/mls/image_basic.cu @@ -0,0 +1,126 @@ +#include <ftl/operators/cuda/mls_cuda.hpp> +#include <ftl/cuda/weighting.hpp> + +// ===== MLS Smooth ============================================================ + +/* + * Smooth depth map using Moving Least Squares. This version is for a single + * depth image and does not use colour. + */ + template <int RADIUS> + __global__ void mls_smooth_kernel( + const half4* __restrict__ normals_in, + half4* __restrict__ normals_out, // Can be nullptr + const float* __restrict__ depth_in, + float* __restrict__ depth_out, // Can be nullptr + int npitch, + int dpitch, + float smoothing, // Radius of Gaussian in cm + ftl::rgbd::Camera camera +) { + + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < RADIUS || y < RADIUS || x >= camera.width-RADIUS || y >= camera.height-RADIUS) return; + + float3 aX = make_float3(0.0f,0.0f,0.0f); + float3 nX = make_float3(0.0f,0.0f,0.0f); + float contrib = 0.0f; + + const float d0 = depth_in[x+y*dpitch]; + if (depth_out) depth_out[x+y*dpitch] = d0; + if (normals_out) normals_out[x+y*npitch] = normals_in[x+y*npitch]; + if (d0 < camera.minDepth || d0 > camera.maxDepth) return; + + float3 X = camera.screenToCam((int)(x),(int)(y),d0); + + // Neighbourhood + for (int v=-RADIUS; v<=RADIUS; ++v) { + for (int u=-RADIUS; u<=RADIUS; ++u) { + const float d = depth_in[x+u+(y+v)*dpitch]; + if (d < camera.minDepth || d > camera.maxDepth) continue; + + // Point and normal of neighbour + const float3 Xi = camera.screenToCam(x+u, y+v, d); + const float3 Ni = make_float3(normals_in[x+u+(y+v)*npitch]); + + // Gauss approx weighting function using point distance + const float w = (Ni.x+Ni.y+Ni.z == 0.0f) ? 0.0f : ftl::cuda::spatialWeighting(X,Xi,smoothing); + + aX += Xi*w; + nX += Ni*w; + contrib += w; + } + } + + if (contrib > 0.0f) { + nX /= contrib; // Weighted average normal + aX /= contrib; // Weighted average point (centroid) + + // Signed-Distance Field function + float fX = nX.x * (X.x - aX.x) + nX.y * (X.y - aX.y) + nX.z * (X.z - aX.z); + + // Calculate new point using SDF function to adjust depth (and position) + X = X - nX * fX; + + if (depth_out) depth_out[x+y*dpitch] = X.z; + if (normals_out) normals_out[x+y*npitch] = make_half4(nX / length(nX), 0.0f); + } +} + +/* One iteration of MLS Smoothing, simple, single image and output depth also. */ +void ftl::cuda::mls_smooth( + const cv::cuda::GpuMat &normals_in, + cv::cuda::GpuMat &normals_out, + const cv::cuda::GpuMat &depth_in, + cv::cuda::GpuMat &depth_out, + float smoothing, + int radius, + const ftl::rgbd::Camera &camera, + cudaStream_t stream +) { + static constexpr int THREADS_X = 8; + static constexpr int THREADS_Y = 8; + + const dim3 gridSize((depth_in.cols + THREADS_X - 1)/THREADS_X, (depth_in.rows + THREADS_Y - 1)/THREADS_Y); + const dim3 blockSize(THREADS_X, THREADS_Y); + + normals_out.create(normals_in.size(), CV_16FC4); + depth_out.create(depth_in.size(), CV_32F); + + switch (radius) { + case 5: mls_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(normals_in.ptr<half4>(), normals_out.ptr<half4>(), depth_in.ptr<float>(), depth_out.ptr<float>(), normals_in.step1()/4, depth_in.step1(), smoothing, camera); break; + case 4: mls_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(normals_in.ptr<half4>(), normals_out.ptr<half4>(), depth_in.ptr<float>(), depth_out.ptr<float>(), normals_in.step1()/4, depth_in.step1(), smoothing, camera); break; + case 3: mls_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(normals_in.ptr<half4>(), normals_out.ptr<half4>(), depth_in.ptr<float>(), depth_out.ptr<float>(), normals_in.step1()/4, depth_in.step1(), smoothing, camera); break; + case 2: mls_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(normals_in.ptr<half4>(), normals_out.ptr<half4>(), depth_in.ptr<float>(), depth_out.ptr<float>(), normals_in.step1()/4, depth_in.step1(), smoothing, camera); break; + case 1: mls_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(normals_in.ptr<half4>(), normals_out.ptr<half4>(), depth_in.ptr<float>(), depth_out.ptr<float>(), normals_in.step1()/4, depth_in.step1(), smoothing, camera); break; + } + cudaSafeCall( cudaGetLastError() ); +} + +/* One iteration of MLS Smoothing, simple, single image, normals only */ +void ftl::cuda::mls_smooth( + const cv::cuda::GpuMat &normals_in, + cv::cuda::GpuMat &normals_out, + const cv::cuda::GpuMat &depth_in, + float smoothing, + int radius, + const ftl::rgbd::Camera &camera, + cudaStream_t stream +) { + static constexpr int THREADS_X = 8; + static constexpr int THREADS_Y = 8; + + const dim3 gridSize((depth_in.cols + THREADS_X - 1)/THREADS_X, (depth_in.rows + THREADS_Y - 1)/THREADS_Y); + const dim3 blockSize(THREADS_X, THREADS_Y); + + switch (radius) { + case 5: mls_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(normals_in.ptr<half4>(), normals_out.ptr<half4>(), depth_in.ptr<float>(), nullptr, normals_in.step1()/4, depth_in.step1(), smoothing, camera); break; + case 4: mls_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(normals_in.ptr<half4>(), normals_out.ptr<half4>(), depth_in.ptr<float>(), nullptr, normals_in.step1()/4, depth_in.step1(), smoothing, camera); break; + case 3: mls_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(normals_in.ptr<half4>(), normals_out.ptr<half4>(), depth_in.ptr<float>(), nullptr, normals_in.step1()/4, depth_in.step1(), smoothing, camera); break; + case 2: mls_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(normals_in.ptr<half4>(), normals_out.ptr<half4>(), depth_in.ptr<float>(), nullptr, normals_in.step1()/4, depth_in.step1(), smoothing, camera); break; + case 1: mls_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(normals_in.ptr<half4>(), normals_out.ptr<half4>(), depth_in.ptr<float>(), nullptr, normals_in.step1()/4, depth_in.step1(), smoothing, camera); break; + } + cudaSafeCall( cudaGetLastError() ); +} diff --git a/components/operators/src/surface/smoothing.cpp b/components/operators/src/surface/smoothing.cpp index 44fcea6b0f58ad956f4c532c3022eb888aa1e2a7..dd96fe6fcfaafac8c48b3d17af649c07257ca87a 100644 --- a/components/operators/src/surface/smoothing.cpp +++ b/components/operators/src/surface/smoothing.cpp @@ -1,5 +1,6 @@ #include <ftl/operators/smoothing.hpp> #include <ftl/operators/cuda/smoothing_cuda.hpp> +#include <ftl/operators/cuda/mls_cuda.hpp> #define LOGURU_REPLACE_GLOG 1 #include <loguru.hpp> @@ -163,10 +164,10 @@ bool SimpleMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t // FIXME: Assume in and out are the same frame. for (int i=0; i<iters; ++i) { ftl::cuda::mls_smooth( - in.createTexture<half4>(Channel::Normals), - temp.createTexture<half4>(Channel::Normals, ftl::rgbd::Format<half4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), - in.createTexture<float>(Channel::Depth), - temp.createTexture<float>(Channel::Depth, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + in.get<GpuMat>(Channel::Normals), + temp.create<GpuMat>(Channel::Normals), + in.get<GpuMat>(Channel::Depth), + temp.create<GpuMat>(Channel::Depth), thresh, radius, in.getLeftCamera(), diff --git a/components/renderers/cpp/src/CUDARender.cpp b/components/renderers/cpp/src/CUDARender.cpp index e3817f45a22b6e925b42b01a564e438109768ce2..da271fa404b7e85d72edacd6d408325cf776ddb7 100644 --- a/components/renderers/cpp/src/CUDARender.cpp +++ b/components/renderers/cpp/src/CUDARender.cpp @@ -6,7 +6,7 @@ #include <ftl/operators/cuda/mask.hpp> #include <ftl/render/colouriser.hpp> #include <ftl/cuda/transform.hpp> -#include <ftl/operators/cuda/smoothing_cuda.hpp> +#include <ftl/operators/cuda/mls_cuda.hpp> #include "carver.hpp" #include <ftl/utility/image_debug.hpp> @@ -499,9 +499,9 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre params_.camera, stream_); ftl::cuda::mls_smooth( - temp_.createTexture<half4>(Channel::Normals), - out.createTexture<half4>(_getNormalsChannel()), - out.getTexture<float>(_getDepthChannel()), + temp_.get<cv::cuda::GpuMat>(Channel::Normals), + out.create<cv::cuda::GpuMat>(_getNormalsChannel()), + out.get<cv::cuda::GpuMat>(_getDepthChannel()), //out.getTexture<float>(_getDepthChannel()), value("mls_smooth", 0.01f), value("mls_radius", 2), @@ -509,7 +509,7 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre stream_ ); - ftl::cuda::mls_smooth( + /*ftl::cuda::mls_smooth( out.createTexture<half4>(_getNormalsChannel()), temp_.createTexture<half4>(Channel::Normals), out.getTexture<float>(_getDepthChannel()), @@ -529,7 +529,7 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre value("mls_radius", 2), params_.camera, stream_ - ); + );*/ } }