diff --git a/components/rgbd-sources/include/ftl/offilter.hpp b/components/rgbd-sources/include/ftl/offilter.hpp index c63b4ca5486d7b6698d6a9933d762093999d344a..194ee20fb39bf759afc1ba2804d16c074c4fd32d 100644 --- a/components/rgbd-sources/include/ftl/offilter.hpp +++ b/components/rgbd-sources/include/ftl/offilter.hpp @@ -1,6 +1,7 @@ #pragma once #include <ftl/config.h> +#include <ftl/rgbd/frame.hpp> #ifdef HAVE_OPTFLOW #include <opencv2/core.hpp> @@ -14,23 +15,14 @@ class OFDisparityFilter { public: OFDisparityFilter() : n_max_(0), threshold_(0.0), size_(0, 0) {} // TODO: invalid state OFDisparityFilter(cv::Size size, int n_frames, float threshold); - void filter(cv::Mat &disp, const cv::Mat &rgb); - void filter(cv::cuda::GpuMat &disp, const cv::cuda::GpuMat &optflow); + void filter(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream); private: int n_; int n_max_; float threshold_; - cv::Size size_; - cv::Mat disp_; cv::cuda::GpuMat disp_old_; - cv::Mat gray_; - - cv::Mat flowxy_; - cv::Mat flowxy_up_; - - cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> nvof_; }; } diff --git a/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp b/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp index 782338fc2be41d56a4d6fcd4f4920f6d920e663f..de0ae29326cb8bd1d0e738e049abf40389b52040 100644 --- a/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp +++ b/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp @@ -131,11 +131,7 @@ void FixstarsSGM::compute(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream) dispt_scaled.convertTo(disp, CV_32F, 1.0f / 16.0f, stream); #ifdef HAVE_OPTFLOW - if (use_off_) - { - frame.getChannel<Mat>(ftl::rgbd::kChanDisparity); - off_.filter(frame.setChannel<Mat>(ftl::rgbd::kChanDisparity), Mat(lbw_)); - } + if (use_off_) { off_.filter(frame, stream); } #endif } diff --git a/components/rgbd-sources/src/algorithms/offilter.cu b/components/rgbd-sources/src/algorithms/offilter.cu index 80e48663beb626aeaea8af4d0cabe2f8c9926bf0..546f94c1102c4bd6adb6a7d9e0ab96fb46787e26 100644 --- a/components/rgbd-sources/src/algorithms/offilter.cu +++ b/components/rgbd-sources/src/algorithms/offilter.cu @@ -11,39 +11,48 @@ __device__ void quicksort(float A[], size_t n) QSORT(n, LESS, SWAP); } -static const int max_history = 32; - template<typename T> __device__ static bool inline isValidDisparity(T d) { return (0.0 < d) && (d < 256.0); } // TODO +static const int max_history = 32; // TODO dynamic shared memory + __global__ void temporal_median_filter_kernel( cv::cuda::PtrStepSz<float> disp, - cv::cuda::PtrStepSz<int> optflow, + cv::cuda::PtrStepSz<int16_t> optflow, cv::cuda::PtrStepSz<float> history, - int n_max, float threshold) + int n_max, + int16_t threshold // fixed point 10.5 + uint granularity // 4 for Turing +) { - float sorted[max_history]; + float sorted[max_history]; // TODO: dynamic shared memory for (STRIDE_Y(y, disp.rows)) { for (STRIDE_X(x, disp.cols)) { int flowx = optflow(y, 2 * x); int flowy = optflow(y, 2 * x + 1); - if ((abs(flowx) + abs(flowy)) > 32) // TODO float <-> fixed 10.5 + int flowx = optflow(round(y / granularity), 2 * round(x / granularity)); + int flowy = optflow(round(y/ granularity), 2 * round(x / granularity) + 1); + + if ((abs(flowx) + abs(flowy)) > threshold) { + // last element in history[x][y][t] history(y, (x + 1) * n_max - 1) = 0.0; return; } - int n = history(y, (x + 1) * n_max - 1); + int count = history(y, (x + 1) * n_max - 1); + int n = count % (n_max - 1); if (isValidDisparity(disp(y, x))) { history(y, (x + 1) * n_max - 1) += 1.0; - history(y, x * n_max + n % (n_max - 1)) = disp(y, x); + count++; + history(y, x * n_max + n) = disp(y, x); } - int n_end = n; + int n_end = count; if (n_end >= n_max) { n_end = n_max - 1; } if (n_end != 0) @@ -70,8 +79,13 @@ void optflow_filter(cv::cuda::GpuMat &disp, const cv::cuda::GpuMat &optflow, dim3 threads(128, 1, 1); grid.x = cv::cuda::device::divUp(disp.cols, 128); grid.y = cv::cuda::device::divUp(disp.rows, 1); + + // TODO: dynamic shared memory temporal_median_filter_kernel<<<grid, threads, 0, cv::cuda::StreamAccessor::getStream(stream)>>> - (disp, optflow, history, n, threshold); + ( disp, optflow, history, n, + round(threshold * (1 << 5)) // TODO: documentation; 10.5 format + 4, // TODO: (4 pixels granularity for Turing) + ); cudaSafeCall(cudaGetLastError()); } diff --git a/components/rgbd-sources/src/offilter.cpp b/components/rgbd-sources/src/offilter.cpp index bf1a5414dd5973452332e558ae375011cacbc56d..2fb6d162317d0d5563a8307061556e8e685562dc 100644 --- a/components/rgbd-sources/src/offilter.cpp +++ b/components/rgbd-sources/src/offilter.cpp @@ -1,7 +1,6 @@ #include "ftl/offilter.hpp" #include "cuda_algorithms.hpp" - #ifdef HAVE_OPTFLOW #include <loguru.hpp> @@ -16,42 +15,25 @@ using std::vector; template<typename T> static bool inline isValidDisparity(T d) { return (0.0 < d) && (d < 256.0); } // TODO OFDisparityFilter::OFDisparityFilter(Size size, int n_frames, float threshold) : - n_(0), n_max_(n_frames), threshold_(threshold), size_(size) + n_(0), n_max_(n_frames + 1), threshold_(threshold) { - disp_ = Mat::zeros(cv::Size(size.width * n_frames, size.height), CV_64FC1); - disp_old_ = cv::cuda::GpuMat(cv::Size(size.width * n_frames, size.height), CV_32FC1); - gray_ = Mat::zeros(size, CV_8UC1); + CHECK((n_max_ > 1) && (n_max_ >= 32)) << "History length must be between 0 and 31!"; + disp_old_ = cv::cuda::GpuMat(cv::Size(size.width * n_max_, size.height), CV_32FC1); - nvof_ = cv::cuda::NvidiaOpticalFlow_1_0::create(size.width, size.height, + /*nvof_ = cv::cuda::NvidiaOpticalFlow_1_0::create(size.width, size.height, cv::cuda::NvidiaOpticalFlow_1_0::NV_OF_PERF_LEVEL_SLOW, - true, false, false, 0); + true, false, false, 0);*/ } -void OFDisparityFilter::filter(cv::cuda::GpuMat &disp, const cv::cuda::GpuMat &optflow) +void OFDisparityFilter::filter(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream) { - ftl::cuda::optflow_filter(disp, optflow, disp_old_, n_max_, threshold_, cv::cuda::Stream::Null()); - cv::cuda::Stream::Null().waitForCompletion(); -} + const cv::cuda::GpuMat &optflow = frame.getChannel<cv::cuda::GpuMat>(kChanFlow, stream); + frame.getChannel<cv::cuda::GpuMat>(kChanDisparity, stream); + stream.waitForCompletion(); + cv::cuda::GpuMat &disp = frame.setChannel<cv::cuda::GpuMat>(kChanDisparity); -void OFDisparityFilter::filter(Mat &disp, const Mat &gray) -{ - const int n = n_; - n_ = (n_ + 1) % n_max_; - nvof_->calc(gray, gray_, flowxy_); - nvof_->upSampler( flowxy_, size_.width, size_.height, - nvof_->getGridSize(), flowxy_up_); - - CHECK(disp.type() == CV_32FC1); - CHECK(gray.type() == CV_8UC1); - CHECK(flowxy_up_.type() == CV_32FC2); - - gray.copyTo(gray_); - - using cv::cuda::GpuMat; - GpuMat disp_gpu(disp); - filter(disp_gpu, GpuMat(flowxy_up_)); - disp_gpu.download(disp); + ftl::cuda::optflow_filter(disp, optflow, disp_old_, n_max_, threshold_, stream); } #endif // HAVE_OPTFLOW