Skip to content
Snippets Groups Projects
Commit 650ff94d authored by Sebastian Hahta's avatar Sebastian Hahta
Browse files

optical flow median filter works on GPU

parent 49a341b6
No related branches found
No related tags found
1 merge request!105CUDA optical flow smoothing
Pipeline #13573 failed
#pragma once #pragma once
#include <ftl/config.h> #include <ftl/config.h>
#include <ftl/rgbd/frame.hpp>
#ifdef HAVE_OPTFLOW #ifdef HAVE_OPTFLOW
#include <opencv2/core.hpp> #include <opencv2/core.hpp>
...@@ -14,23 +15,14 @@ class OFDisparityFilter { ...@@ -14,23 +15,14 @@ class OFDisparityFilter {
public: public:
OFDisparityFilter() : n_max_(0), threshold_(0.0), size_(0, 0) {} // TODO: invalid state OFDisparityFilter() : n_max_(0), threshold_(0.0), size_(0, 0) {} // TODO: invalid state
OFDisparityFilter(cv::Size size, int n_frames, float threshold); OFDisparityFilter(cv::Size size, int n_frames, float threshold);
void filter(cv::Mat &disp, const cv::Mat &rgb); void filter(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream);
void filter(cv::cuda::GpuMat &disp, const cv::cuda::GpuMat &optflow);
private: private:
int n_; int n_;
int n_max_; int n_max_;
float threshold_; float threshold_;
cv::Size size_;
cv::Mat disp_;
cv::cuda::GpuMat disp_old_; cv::cuda::GpuMat disp_old_;
cv::Mat gray_;
cv::Mat flowxy_;
cv::Mat flowxy_up_;
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> nvof_;
}; };
} }
......
...@@ -131,11 +131,7 @@ void FixstarsSGM::compute(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream) ...@@ -131,11 +131,7 @@ void FixstarsSGM::compute(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream)
dispt_scaled.convertTo(disp, CV_32F, 1.0f / 16.0f, stream); dispt_scaled.convertTo(disp, CV_32F, 1.0f / 16.0f, stream);
#ifdef HAVE_OPTFLOW #ifdef HAVE_OPTFLOW
if (use_off_) if (use_off_) { off_.filter(frame, stream); }
{
frame.getChannel<Mat>(ftl::rgbd::kChanDisparity);
off_.filter(frame.setChannel<Mat>(ftl::rgbd::kChanDisparity), Mat(lbw_));
}
#endif #endif
} }
......
...@@ -11,39 +11,48 @@ __device__ void quicksort(float A[], size_t n) ...@@ -11,39 +11,48 @@ __device__ void quicksort(float A[], size_t n)
QSORT(n, LESS, SWAP); QSORT(n, LESS, SWAP);
} }
static const int max_history = 32;
template<typename T> template<typename T>
__device__ static bool inline isValidDisparity(T d) { return (0.0 < d) && (d < 256.0); } // TODO __device__ static bool inline isValidDisparity(T d) { return (0.0 < d) && (d < 256.0); } // TODO
static const int max_history = 32; // TODO dynamic shared memory
__global__ void temporal_median_filter_kernel( __global__ void temporal_median_filter_kernel(
cv::cuda::PtrStepSz<float> disp, cv::cuda::PtrStepSz<float> disp,
cv::cuda::PtrStepSz<int> optflow, cv::cuda::PtrStepSz<int16_t> optflow,
cv::cuda::PtrStepSz<float> history, cv::cuda::PtrStepSz<float> history,
int n_max, float threshold) int n_max,
int16_t threshold // fixed point 10.5
uint granularity // 4 for Turing
)
{ {
float sorted[max_history]; float sorted[max_history]; // TODO: dynamic shared memory
for (STRIDE_Y(y, disp.rows)) { for (STRIDE_Y(y, disp.rows)) {
for (STRIDE_X(x, disp.cols)) { for (STRIDE_X(x, disp.cols)) {
int flowx = optflow(y, 2 * x); int flowx = optflow(y, 2 * x);
int flowy = optflow(y, 2 * x + 1); int flowy = optflow(y, 2 * x + 1);
if ((abs(flowx) + abs(flowy)) > 32) // TODO float <-> fixed 10.5 int flowx = optflow(round(y / granularity), 2 * round(x / granularity));
int flowy = optflow(round(y/ granularity), 2 * round(x / granularity) + 1);
if ((abs(flowx) + abs(flowy)) > threshold)
{ {
// last element in history[x][y][t]
history(y, (x + 1) * n_max - 1) = 0.0; history(y, (x + 1) * n_max - 1) = 0.0;
return; return;
} }
int n = history(y, (x + 1) * n_max - 1); int count = history(y, (x + 1) * n_max - 1);
int n = count % (n_max - 1);
if (isValidDisparity(disp(y, x))) if (isValidDisparity(disp(y, x)))
{ {
history(y, (x + 1) * n_max - 1) += 1.0; history(y, (x + 1) * n_max - 1) += 1.0;
history(y, x * n_max + n % (n_max - 1)) = disp(y, x); count++;
history(y, x * n_max + n) = disp(y, x);
} }
int n_end = n; int n_end = count;
if (n_end >= n_max) { n_end = n_max - 1; } if (n_end >= n_max) { n_end = n_max - 1; }
if (n_end != 0) if (n_end != 0)
...@@ -70,8 +79,13 @@ void optflow_filter(cv::cuda::GpuMat &disp, const cv::cuda::GpuMat &optflow, ...@@ -70,8 +79,13 @@ void optflow_filter(cv::cuda::GpuMat &disp, const cv::cuda::GpuMat &optflow,
dim3 threads(128, 1, 1); dim3 threads(128, 1, 1);
grid.x = cv::cuda::device::divUp(disp.cols, 128); grid.x = cv::cuda::device::divUp(disp.cols, 128);
grid.y = cv::cuda::device::divUp(disp.rows, 1); grid.y = cv::cuda::device::divUp(disp.rows, 1);
// TODO: dynamic shared memory
temporal_median_filter_kernel<<<grid, threads, 0, cv::cuda::StreamAccessor::getStream(stream)>>> temporal_median_filter_kernel<<<grid, threads, 0, cv::cuda::StreamAccessor::getStream(stream)>>>
(disp, optflow, history, n, threshold); ( disp, optflow, history, n,
round(threshold * (1 << 5)) // TODO: documentation; 10.5 format
4, // TODO: (4 pixels granularity for Turing)
);
cudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaGetLastError());
} }
......
#include "ftl/offilter.hpp" #include "ftl/offilter.hpp"
#include "cuda_algorithms.hpp" #include "cuda_algorithms.hpp"
#ifdef HAVE_OPTFLOW #ifdef HAVE_OPTFLOW
#include <loguru.hpp> #include <loguru.hpp>
...@@ -16,42 +15,25 @@ using std::vector; ...@@ -16,42 +15,25 @@ using std::vector;
template<typename T> static bool inline isValidDisparity(T d) { return (0.0 < d) && (d < 256.0); } // TODO template<typename T> static bool inline isValidDisparity(T d) { return (0.0 < d) && (d < 256.0); } // TODO
OFDisparityFilter::OFDisparityFilter(Size size, int n_frames, float threshold) : OFDisparityFilter::OFDisparityFilter(Size size, int n_frames, float threshold) :
n_(0), n_max_(n_frames), threshold_(threshold), size_(size) n_(0), n_max_(n_frames + 1), threshold_(threshold)
{ {
disp_ = Mat::zeros(cv::Size(size.width * n_frames, size.height), CV_64FC1); CHECK((n_max_ > 1) && (n_max_ >= 32)) << "History length must be between 0 and 31!";
disp_old_ = cv::cuda::GpuMat(cv::Size(size.width * n_frames, size.height), CV_32FC1); disp_old_ = cv::cuda::GpuMat(cv::Size(size.width * n_max_, size.height), CV_32FC1);
gray_ = Mat::zeros(size, CV_8UC1);
nvof_ = cv::cuda::NvidiaOpticalFlow_1_0::create(size.width, size.height, /*nvof_ = cv::cuda::NvidiaOpticalFlow_1_0::create(size.width, size.height,
cv::cuda::NvidiaOpticalFlow_1_0::NV_OF_PERF_LEVEL_SLOW, cv::cuda::NvidiaOpticalFlow_1_0::NV_OF_PERF_LEVEL_SLOW,
true, false, false, 0); true, false, false, 0);*/
} }
void OFDisparityFilter::filter(cv::cuda::GpuMat &disp, const cv::cuda::GpuMat &optflow) void OFDisparityFilter::filter(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream)
{ {
ftl::cuda::optflow_filter(disp, optflow, disp_old_, n_max_, threshold_, cv::cuda::Stream::Null()); const cv::cuda::GpuMat &optflow = frame.getChannel<cv::cuda::GpuMat>(kChanFlow, stream);
cv::cuda::Stream::Null().waitForCompletion(); frame.getChannel<cv::cuda::GpuMat>(kChanDisparity, stream);
} stream.waitForCompletion();
cv::cuda::GpuMat &disp = frame.setChannel<cv::cuda::GpuMat>(kChanDisparity);
void OFDisparityFilter::filter(Mat &disp, const Mat &gray) ftl::cuda::optflow_filter(disp, optflow, disp_old_, n_max_, threshold_, stream);
{
const int n = n_;
n_ = (n_ + 1) % n_max_;
nvof_->calc(gray, gray_, flowxy_);
nvof_->upSampler( flowxy_, size_.width, size_.height,
nvof_->getGridSize(), flowxy_up_);
CHECK(disp.type() == CV_32FC1);
CHECK(gray.type() == CV_8UC1);
CHECK(flowxy_up_.type() == CV_32FC2);
gray.copyTo(gray_);
using cv::cuda::GpuMat;
GpuMat disp_gpu(disp);
filter(disp_gpu, GpuMat(flowxy_up_));
disp_gpu.download(disp);
} }
#endif // HAVE_OPTFLOW #endif // HAVE_OPTFLOW
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment