diff --git a/components/operators/include/ftl/operators/cuda/mls/multi_intensity.hpp b/components/operators/include/ftl/operators/cuda/mls/multi_intensity.hpp index 884fd0da46db2d61e68aba175603d7a04717f407..e1a7e21321650f2c2e4b5592b401b1ca9ff7d49a 100644 --- a/components/operators/include/ftl/operators/cuda/mls/multi_intensity.hpp +++ b/components/operators/include/ftl/operators/cuda/mls/multi_intensity.hpp @@ -49,6 +49,7 @@ public: const ftl::rgbd::Camera &cam_src, const float4x4 &pose_src, float smoothing, + float fsmoothing, cudaStream_t stream ); @@ -70,6 +71,13 @@ private: cv::cuda::GpuMat weight_accum_; }; +void mean_subtract( + const cv::cuda::GpuMat &intensity, + cv::cuda::GpuMat &contrast, + int radius, + cudaStream_t stream +); + } } diff --git a/components/operators/include/ftl/operators/fusion.hpp b/components/operators/include/ftl/operators/fusion.hpp index ed0e5f9cd324f1e57979d9f20fc9d9a98f796ce0..d9f2c76c1d707b764bf32e4ab668b688c314e175 100644 --- a/components/operators/include/ftl/operators/fusion.hpp +++ b/components/operators/include/ftl/operators/fusion.hpp @@ -21,6 +21,7 @@ class Fusion : public ftl::operators::Operator { ftl::cuda::MLSMultiIntensity mls_; std::vector<cv::cuda::GpuMat> weights_; cv::cuda::GpuMat temp_; + cv::cuda::GpuMat temp2_; }; } diff --git a/components/operators/src/fusion/fusion.cpp b/components/operators/src/fusion/fusion.cpp index 7f5d039fef314407bd59b1186e29a4556bf0b68d..d270c54bc0003840035e505c6a3edb665bc8386a 100644 --- a/components/operators/src/fusion/fusion.cpp +++ b/components/operators/src/fusion/fusion.cpp @@ -3,6 +3,8 @@ #include <ftl/utility/matrix_conversion.hpp> #include <opencv2/core/cuda_stream_accessor.hpp> +#include <ftl/utility/image_debug.hpp> + #include <opencv2/cudaimgproc.hpp> #include <opencv2/cudawarping.hpp> @@ -19,7 +21,8 @@ Fusion::~Fusion() { } bool Fusion::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) { - float mls_smoothing = config()->value("mls_smoothing", 0.01f); + float mls_spatial = config()->value("mls_spatial", 0.01f); + float mls_feature = config()->value("mls_feature", 20.0f); int mls_iters = config()->value("mls_iterations", 2); if (weights_.size() != in.frames.size()) weights_.resize(in.frames.size()); @@ -32,9 +35,12 @@ bool Fusion::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream const GpuMat &d = in.frames[i].get<GpuMat>(Channel::Depth); cv::cuda::cvtColor(col, temp_, cv::COLOR_BGRA2GRAY, 0, cvstream); - cv::cuda::resize(temp_, weights_[i], d.size(), 0, 0, cv::INTER_LINEAR, cvstream); + cv::cuda::resize(temp_, temp2_, d.size(), 0, 0, cv::INTER_LINEAR, cvstream); + ftl::cuda::mean_subtract(temp2_, weights_[i], 3, stream); } + //if (weights_.size() > 0) ftl::utility::show_image(weights_[0], "MeanSub", 1.0f, ftl::utility::ImageVisualisation::RAW_GRAY); + // 1) Optical flow of colour // 2) Flow depth from model, // a) check local depth change consistency, generate a weighting @@ -119,7 +125,8 @@ bool Fusion::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream weights_[j], f2.getLeft(), pose2, - mls_smoothing, + mls_spatial, + mls_feature, stream ); } diff --git a/components/operators/src/fusion/smoothing/mls_multi_weighted.cu b/components/operators/src/fusion/smoothing/mls_multi_weighted.cu index a5e5ded31f193bda1c5b3d24b58994d5717ead32..e92d23c99dbe2f821d89d7ee8c46436b9d2be190 100644 --- a/components/operators/src/fusion/smoothing/mls_multi_weighted.cu +++ b/components/operators/src/fusion/smoothing/mls_multi_weighted.cu @@ -9,7 +9,7 @@ using cv::cuda::GpuMat; __device__ inline float featureWeight(int f1, int f2) { const float w = (1.0f-(float(abs(f1 - f2)) / 255.0f)); - return w*w; + return w*w*w; } /* @@ -26,6 +26,7 @@ __device__ inline float featureWeight(int f1, int f2) { float4* __restrict__ centroid_out, float* __restrict__ contrib_out, float smoothing, + float fsmoothing, float4x4 o_2_in, float4x4 in_2_o, float3x3 in_2_o33, @@ -70,9 +71,14 @@ __device__ inline float featureWeight(int f1, int f2) { const int feature2 = feature_in[s.x+y+(s.y+v)*fpitch_i]; - // Gauss approx weighting function using point distance + // Gauss approx weighting functions + // Rule: spatially close and feature close is strong + // Spatially far or feature far, then poor. + // So take the minimum, must be close and feature close to get good value + const float w_feat = ftl::cuda::weighting(float(abs(feature1-feature2)), fsmoothing); + const float w_space = ftl::cuda::spatialWeighting(X,Xi,smoothing); const float w = (length(Ni) > 0.0f) - ? ftl::cuda::spatialWeighting(X,Xi,smoothing) * featureWeight(feature1, feature2) + ? min(w_space, w_feat) : 0.0f; aX += Xi*w; @@ -180,6 +186,7 @@ void MLSMultiIntensity::gather( const ftl::rgbd::Camera &cam_src, const float4x4 &pose_src, float smoothing, + float fsmoothing, cudaStream_t stream) { static constexpr int THREADS_X = 8; @@ -206,6 +213,7 @@ void MLSMultiIntensity::gather( centroid_accum_.ptr<float4>(), weight_accum_.ptr<float>(), smoothing, + fsmoothing, o_2_in, in_2_o, in_2_o33, @@ -254,3 +262,56 @@ void MLSMultiIntensity::adjust( ); cudaSafeCall( cudaGetLastError() ); } + +// ============================================================================= + +template <int RADIUS> +__global__ void mean_subtract_kernel( + const uchar* __restrict__ intensity, + uchar* __restrict__ contrast, + int pitch, + int width, + int height +) { + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x >= RADIUS && y >= RADIUS && x < width-RADIUS && y < height-RADIUS) { + float mean = 0.0f; + + for (int v=-RADIUS; v<=RADIUS; ++v) { + for (int u=-RADIUS; u<=RADIUS; ++u) { + mean += float(intensity[x+u+(y+v)*pitch]); + } + } + + mean /= float((2*RADIUS+1)*(2*RADIUS+1)); + + float diff = float(intensity[x+y*pitch]) - mean; + contrast[x+y*pitch] = max(0, min(254, int(diff)+127)); + } +} + +void ftl::cuda::mean_subtract( + const cv::cuda::GpuMat &intensity, + cv::cuda::GpuMat &contrast, + int radius, + cudaStream_t stream +) { + static constexpr int THREADS_X = 8; + static constexpr int THREADS_Y = 8; + + const dim3 gridSize((intensity.cols + THREADS_X - 1)/THREADS_X, (intensity.rows + THREADS_Y - 1)/THREADS_Y); + const dim3 blockSize(THREADS_X, THREADS_Y); + + contrast.create(intensity.size(), CV_8U); + + mean_subtract_kernel<3><<<gridSize, blockSize, 0, stream>>>( + intensity.ptr<uchar>(), + contrast.ptr<uchar>(), + intensity.step1(), + intensity.cols, + intensity.rows + ); + cudaSafeCall( cudaGetLastError() ); +} diff --git a/components/streams/src/feed.cpp b/components/streams/src/feed.cpp index f9b468605bcc336e3bf6d89f137eae7fe252a087..2cd767af94ae51af325226340214815aecd5b41f 100644 --- a/components/streams/src/feed.cpp +++ b/components/streams/src/feed.cpp @@ -204,9 +204,9 @@ Feed::Feed(nlohmann::json &config, ftl::net::Universe*net) : if (!did_pipe) { LOG(WARNING) << "Feed Pipeline dropped (" << fs->frameset() << ")"; - ftl::pool.push([this,fs](int id) { - _dispatch(fs); - }); + //ftl::pool.push([this,fs](int id) { + // _dispatch(fs); + //}); } _processAudio(fs);