diff --git a/components/common/cpp/include/ftl/cuda_matrix_util.hpp b/components/common/cpp/include/ftl/cuda_matrix_util.hpp index 4dd1db7fa8a58c84a40f9d93abd8cc0a492b2792..cbb82052da87e2e5ddca271afe79233639a379bb 100644 --- a/components/common/cpp/include/ftl/cuda_matrix_util.hpp +++ b/components/common/cpp/include/ftl/cuda_matrix_util.hpp @@ -750,7 +750,7 @@ public: } //! returns the 3x3 part of the matrix - inline __device__ __host__ float3x3 getFloat3x3() { + inline __device__ __host__ float3x3 getFloat3x3() const { float3x3 ret; ret.m11 = m11; ret.m12 = m12; ret.m13 = m13; ret.m21 = m21; ret.m22 = m22; ret.m23 = m23; @@ -1086,7 +1086,7 @@ public: //! returns the 3x3 part of the matrix - inline __device__ __host__ float3x3 getFloat3x3() { + inline __device__ __host__ float3x3 getFloat3x3() const { float3x3 ret; ret.m11 = m11; ret.m12 = m12; ret.m13 = m13; ret.m21 = m21; ret.m22 = m22; ret.m23 = m23; diff --git a/components/operators/CMakeLists.txt b/components/operators/CMakeLists.txt index 3ffb6a0c81f1d7cb78d8357c5b0e499cc3f7ee4d..e1b7569960118e923c18b0d759cbb3f1ebcad640 100644 --- a/components/operators/CMakeLists.txt +++ b/components/operators/CMakeLists.txt @@ -26,6 +26,7 @@ set(OPERSRC src/fusion/correspondence_depth.cu src/fusion/correspondence_util.cu src/fusion/mls_aggr.cu + src/fusion/smoothing/mls_multi_weighted.cu src/misc/clipping.cpp src/disparity/depth.cpp src/analysis/tracking/detectandtrack.cpp diff --git a/components/operators/include/ftl/operators/cuda/mls/multi_intensity.hpp b/components/operators/include/ftl/operators/cuda/mls/multi_intensity.hpp new file mode 100644 index 0000000000000000000000000000000000000000..884fd0da46db2d61e68aba175603d7a04717f407 --- /dev/null +++ b/components/operators/include/ftl/operators/cuda/mls/multi_intensity.hpp @@ -0,0 +1,76 @@ +#ifndef _FTL_CUDA_MLS_MULTIINTENSITY_HPP_ +#define _FTL_CUDA_MLS_MULTIINTENSITY_HPP_ + +#include <ftl/cuda_common.hpp> +#include <ftl/rgbd/camera.hpp> +#include <ftl/cuda_matrix_util.hpp> + +namespace ftl { +namespace cuda { + +// TODO: 4th channel of normals could be used as curvature estimate? Then can +// adaptively adjust the smoothing amount by the degree of curvature. This +// curvature value must be low for noise data, is only high if a consistent +// high curvature is detected over a radius. Other option is to also or instead +// use the curvature value as a feature, to only smooth against points with +// similar curvature? + +/** + * For a particular viewpoint, use a set of other viewpoints to estimate a + * smooth surface for the focus viewpoint. This version of MLS uses absolute + * difference of some 8-bit value as a weight, this can be raw intensity or + * a local contrast measure. This `prime`, `gather`, `adjust` cycle should + * be iterated 2-3 times, but perhaps better to do one iteration of each view + * first before repeating a view. + * + * Note: Have one instance per frameset because the internal buffers are + * allocated based upon frame image size. + */ +class MLSMultiIntensity +{ +public: + MLSMultiIntensity(int radius); + ~MLSMultiIntensity(); + + void prime( + const cv::cuda::GpuMat &depth_prime, + const cv::cuda::GpuMat &intensity_prime, + const ftl::rgbd::Camera &cam_prime, + const float4x4 &pose_prime, + cudaStream_t stream + ); + + void gatherPrime(float smoothing, cudaStream_t stream); + + void gather( + const cv::cuda::GpuMat &depth_src, + const cv::cuda::GpuMat &normals_src, + const cv::cuda::GpuMat &intensity_src, + const ftl::rgbd::Camera &cam_src, + const float4x4 &pose_src, + float smoothing, + cudaStream_t stream + ); + + void adjust( + cv::cuda::GpuMat &depth_out, + cv::cuda::GpuMat &normals_out, + cudaStream_t stream + ); + +private: + cv::cuda::GpuMat depth_prime_; + cv::cuda::GpuMat intensity_prime_; + ftl::rgbd::Camera cam_prime_; + float4x4 pose_prime_; + int radius_; + + cv::cuda::GpuMat normal_accum_; + cv::cuda::GpuMat centroid_accum_; + cv::cuda::GpuMat weight_accum_; +}; + +} +} + +#endif \ No newline at end of file diff --git a/components/operators/src/fusion/smoothing/mls_multi_weighted.cu b/components/operators/src/fusion/smoothing/mls_multi_weighted.cu new file mode 100644 index 0000000000000000000000000000000000000000..a5e5ded31f193bda1c5b3d24b58994d5717ead32 --- /dev/null +++ b/components/operators/src/fusion/smoothing/mls_multi_weighted.cu @@ -0,0 +1,256 @@ +#include <ftl/operators/cuda/mls/multi_intensity.hpp> +#include <opencv2/core/cuda_stream_accessor.hpp> +#include <ftl/cuda/weighting.hpp> + +using ftl::cuda::MLSMultiIntensity; +using cv::cuda::GpuMat; + +// ==== Multi image MLS ======================================================== + +__device__ inline float featureWeight(int f1, int f2) { + const float w = (1.0f-(float(abs(f1 - f2)) / 255.0f)); + return w*w; +} + +/* + * Gather points for Moving Least Squares, from each source image + */ + template <int SEARCH_RADIUS, typename T> + __global__ void mls_gather_intensity_kernel( + const half4* __restrict__ normals_in, + half4* __restrict__ normals_out, + const float* __restrict__ depth_origin, + const float* __restrict__ depth_in, + const T* __restrict__ feature_origin, + const T* __restrict__ feature_in, + float4* __restrict__ centroid_out, + float* __restrict__ contrib_out, + float smoothing, + float4x4 o_2_in, + float4x4 in_2_o, + float3x3 in_2_o33, + ftl::rgbd::Camera camera_origin, + ftl::rgbd::Camera camera_in, + int npitch_out, + int cpitch_out, + int wpitch_out, + int dpitch_o, + int dpitch_i, + int npitch_in, + int fpitch_o, + int fpitch_i +) { + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < 0 || y < 0 || x >= camera_origin.width || y >= camera_origin.height) return; + + float3 nX = make_float3(normals_out[y*npitch_out+x]); + float3 aX = make_float3(centroid_out[y*cpitch_out+x]); + float contrib = contrib_out[y*wpitch_out+x]; + + float d0 = depth_origin[x+y*dpitch_o]; + if (d0 <= camera_origin.minDepth || d0 >= camera_origin.maxDepth) return; + + const int feature1 = feature_origin[x+y*fpitch_o]; + + float3 X = camera_origin.screenToCam((int)(x),(int)(y),d0); + + int2 s = camera_in.camToScreen<int2>(o_2_in * X); + + // Neighbourhood + for (int v=-SEARCH_RADIUS; v<=SEARCH_RADIUS; ++v) { + for (int u=-SEARCH_RADIUS; u<=SEARCH_RADIUS; ++u) { + const float d = (s.x+u >= 0 && s.x+u < camera_in.width && s.y+v >= 0 && s.y+v < camera_in.height) ? depth_in[s.x+u+(s.y+v)*dpitch_i] : 0.0f; + if (d <= camera_in.minDepth || d >= camera_in.maxDepth) continue; + + // Point and normal of neighbour + const float3 Xi = in_2_o * camera_in.screenToCam(s.x+u, s.y+v, d); + const float3 Ni = make_float3(normals_in[s.x+u+(s.y+v)*npitch_in]); + + const int feature2 = feature_in[s.x+y+(s.y+v)*fpitch_i]; + + // Gauss approx weighting function using point distance + const float w = (length(Ni) > 0.0f) + ? ftl::cuda::spatialWeighting(X,Xi,smoothing) * featureWeight(feature1, feature2) + : 0.0f; + + aX += Xi*w; + nX += (in_2_o33 * Ni)*w; + contrib += w; + } + } + + normals_out[y*npitch_out+x] = make_half4(nX, 0.0f); + centroid_out[y*cpitch_out+x] = make_float4(aX, 0.0f); + contrib_out[y*wpitch_out+x] = contrib; +} + +/** + * Convert accumulated values into estimate of depth and normals at pixel. + */ + __global__ void mls_reduce_kernel_2( + const float4* __restrict__ centroid, + const half4* __restrict__ normals, + const float* __restrict__ contrib_out, + half4* __restrict__ normals_out, + float* __restrict__ depth, + ftl::rgbd::Camera camera, + int npitch_in, + int cpitch_in, + int wpitch, + int npitch, + int dpitch +) { + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x >= 0 && y >= 0 && x < camera.width && y < camera.height) { + float3 nX = make_float3(normals[y*npitch_in+x]); + float3 aX = make_float3(centroid[y*cpitch_in+x]); + float contrib = contrib_out[y*wpitch+x]; + + //depth[x+y*dpitch] = X.z; + normals_out[x+y*npitch] = make_half4(0.0f, 0.0f, 0.0f, 0.0f); + + float d0 = depth[x+y*dpitch]; + //depth[x+y*dpitch] = 0.0f; + if (d0 <= camera.minDepth || d0 >= camera.maxDepth || contrib == 0.0f) return; + float3 X = camera.screenToCam((int)(x),(int)(y),d0); + + nX /= contrib; // Weighted average normal + aX /= contrib; // Weighted average point (centroid) + + // Signed-Distance Field function + float fX = nX.x * (X.x - aX.x) + nX.y * (X.y - aX.y) + nX.z * (X.z - aX.z); + + // Calculate new point using SDF function to adjust depth (and position) + X = X - nX * fX; + + depth[x+y*dpitch] = X.z; + normals_out[x+y*npitch] = make_half4(nX / length(nX), 0.0f); + } +} + + +MLSMultiIntensity::MLSMultiIntensity(int radius) + : radius_(radius) +{ + +} + +MLSMultiIntensity::~MLSMultiIntensity() +{ + +} + +void MLSMultiIntensity::prime( + const GpuMat &depth_prime, + const GpuMat &intensity_prime, + const ftl::rgbd::Camera &cam_prime, + const float4x4 &pose_prime, + cudaStream_t stream) +{ + depth_prime_ = depth_prime; + intensity_prime_ = intensity_prime; + cam_prime_ = cam_prime; + pose_prime_ = pose_prime; + + centroid_accum_.create(depth_prime.size(), CV_32FC4); + normal_accum_.create(depth_prime.size(), CV_16FC4); + weight_accum_.create(depth_prime.size(), CV_32F); + + cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + + // Reset buffers + centroid_accum_.setTo(cv::Scalar(0,0,0,0), cvstream); + weight_accum_.setTo(cv::Scalar(0), cvstream); + normal_accum_.setTo(cv::Scalar(0,0,0,0), cvstream); +} + +void MLSMultiIntensity::gatherPrime(float smoothing, cudaStream_t stream) +{ + // Can use a simpler kernel without pose transformations +} + +void MLSMultiIntensity::gather( + const GpuMat &depth_src, + const GpuMat &normals_src, + const GpuMat &intensity_src, + const ftl::rgbd::Camera &cam_src, + const float4x4 &pose_src, + float smoothing, + cudaStream_t stream) +{ + static constexpr int THREADS_X = 8; + static constexpr int THREADS_Y = 8; + + const dim3 gridSize((depth_prime_.cols + THREADS_X - 1)/THREADS_X, (depth_prime_.rows + THREADS_Y - 1)/THREADS_Y); + const dim3 blockSize(THREADS_X, THREADS_Y); + + float4x4 inv_pose_src = pose_src; + inv_pose_src.invert(); + float4x4 o_2_in = inv_pose_src * pose_prime_; + float4x4 inv_pose_prime = pose_prime_; + inv_pose_prime.invert(); + float4x4 in_2_o = inv_pose_prime * pose_src; + float3x3 in_2_o33 = inv_pose_prime.getFloat3x3() * pose_src.getFloat3x3(); + + mls_gather_intensity_kernel<3><<<gridSize, blockSize, 0, stream>>>( + normals_src.ptr<half4>(), + normal_accum_.ptr<half4>(), + depth_prime_.ptr<float>(), + depth_src.ptr<float>(), + intensity_prime_.ptr<uchar>(), + intensity_src.ptr<uchar>(), + centroid_accum_.ptr<float4>(), + weight_accum_.ptr<float>(), + smoothing, + o_2_in, + in_2_o, + in_2_o33, + cam_prime_, + cam_src, + normal_accum_.step1()/4, + centroid_accum_.step1()/4, + weight_accum_.step1(), + depth_prime_.step1(), + depth_src.step1(), + normals_src.step1()/4, + intensity_prime_.step1(), + intensity_src.step1() + ); + cudaSafeCall( cudaGetLastError() ); +} + +void MLSMultiIntensity::adjust( + GpuMat &depth_out, + GpuMat &normals_out, + cudaStream_t stream) +{ + static constexpr int THREADS_X = 8; + static constexpr int THREADS_Y = 8; + + const dim3 gridSize((depth_prime_.cols + THREADS_X - 1)/THREADS_X, (depth_prime_.rows + THREADS_Y - 1)/THREADS_Y); + const dim3 blockSize(THREADS_X, THREADS_Y); + + normals_out.create(depth_prime_.size(), CV_16FC4); + depth_out.create(depth_prime_.size(), CV_32F); + + // FIXME: Depth prime assumed to be same as depth out + + mls_reduce_kernel_2<<<gridSize, blockSize, 0, stream>>>( + centroid_accum_.ptr<float4>(), + normal_accum_.ptr<half4>(), + weight_accum_.ptr<float>(), + normals_out.ptr<half4>(), + depth_prime_.ptr<float>(), + cam_prime_, + normal_accum_.step1()/4, + centroid_accum_.step1()/4, + weight_accum_.step1(), + normals_out.step1()/4, + depth_prime_.step1() + ); + cudaSafeCall( cudaGetLastError() ); +} diff --git a/components/renderers/cpp/src/colouriser.cpp b/components/renderers/cpp/src/colouriser.cpp index a8399d016b3229fe04d009d49ca9b902f4e6e68e..2412aeb86addf8bd1c558f421a7e9e7230f20a27 100644 --- a/components/renderers/cpp/src/colouriser.cpp +++ b/components/renderers/cpp/src/colouriser.cpp @@ -143,7 +143,7 @@ TextureObject<uchar4> &Colouriser::_processNormals(ftl::rgbd::Frame &f, Channel light_diffuse.w = value("alpha", 0.5f)*255.0f; light_ambient.w = light_diffuse.w; - auto light_pos = make_float3(value("light_x", 0.3f), value("light_y", 0.2f), value("light_z", 1.0f)); + auto light_pos = make_float3(value("light_x", 0.3f), value("light_y", 0.2f), value("light_z", -1.0f)); ftl::cuda::normal_visualise(f.createTexture<half4>(c), buf, light_pos,