diff --git a/applications/gui/src/src_window.cpp b/applications/gui/src/src_window.cpp index 556d1f794419547b41ee18464ff0213a967bdb3a..1fb77074c6519500005f89f92985e99add2120c0 100644 --- a/applications/gui/src/src_window.cpp +++ b/applications/gui/src/src_window.cpp @@ -167,7 +167,16 @@ SourceWindow::SourceWindow(ftl::gui::Screen *screen) bool SourceWindow::_processFrameset(ftl::rgbd::FrameSet &fs, bool fromstream) { // Request the channels required by current camera configuration - if (fromstream) interceptor_->select(fs.id, _aggregateChannels(fs.id)); + if (fromstream) { + auto cs = _aggregateChannels(fs.id); + + auto avail = static_cast<const ftl::stream::Stream*>(interceptor_)->available(fs.id); + if (cs.has(Channel::Depth) && !avail.has(Channel::Depth) && avail.has(Channel::Right)) { + cs -= Channel::Depth; + cs += Channel::Right; + } + interceptor_->select(fs.id, cs); + } /*if (fs.id > 0) { LOG(INFO) << "Got frameset: " << fs.id; @@ -218,6 +227,7 @@ bool SourceWindow::_processFrameset(ftl::rgbd::FrameSet &fs, bool fromstream) { void SourceWindow::_checkFrameSets(int id) { while (framesets_.size() <= id) { auto *p = ftl::config::create<ftl::operators::Graph>(screen_->root(), "pre_filters"); + p->append<ftl::operators::DepthChannel>("depth"); //p->append<ftl::operators::ColourChannels>("colour"); // Convert BGR to BGRA p->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false); p->append<ftl::operators::ArUco>("aruco")->value("enabled", false); diff --git a/components/common/cpp/src/cuda_common.cpp b/components/common/cpp/src/cuda_common.cpp index 779b9feb4b5fdecc81f25e30f81a423b1833bb38..3431b6d9902ade670413f4a74178e0796994867a 100644 --- a/components/common/cpp/src/cuda_common.cpp +++ b/components/common/cpp/src/cuda_common.cpp @@ -19,7 +19,7 @@ bool ftl::cuda::initialise() { properties.resize(dev_count); for (int i=0; i<dev_count; i++) { cudaSafeCall(cudaGetDeviceProperties(&properties[i], i)); - LOG(INFO) << " - " << properties[i].name; + LOG(INFO) << " - " << properties[i].name << " - compute " << properties[i].major << "." << properties[i].minor; } return true; @@ -29,7 +29,7 @@ bool ftl::cuda::hasCompute(int major, int minor) { int dev = -1; cudaSafeCall(cudaGetDevice(&dev)); - if (dev > 0) { + if (dev >= 0) { return properties[dev].major > major || (properties[dev].major == major && properties[dev].minor >= minor); } diff --git a/components/operators/include/ftl/operators/disparity.hpp b/components/operators/include/ftl/operators/disparity.hpp index aaa52e60e23c161f10fff90b685810924e53e024..54cfa1c8846f4c38327a06406905f9fbfdfe597b 100644 --- a/components/operators/include/ftl/operators/disparity.hpp +++ b/components/operators/include/ftl/operators/disparity.hpp @@ -109,6 +109,7 @@ class DepthChannel : public ftl::operators::Operator { class OpticalFlowTemporalSmoothing : public ftl::operators::Operator { public: explicit OpticalFlowTemporalSmoothing(ftl::Configurable*); + OpticalFlowTemporalSmoothing(ftl::Configurable*, const std::tuple<ftl::codecs::Channel> ¶ms); ~OpticalFlowTemporalSmoothing(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -116,9 +117,10 @@ class OpticalFlowTemporalSmoothing : public ftl::operators::Operator { bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) override; private: + void _init(ftl::Configurable* cfg); bool init(); - const ftl::codecs::Channel channel_ = ftl::codecs::Channel::Disparity; + ftl::codecs::Channel channel_ = ftl::codecs::Channel::Depth; cv::cuda::GpuMat history_; cv::Size size_; int n_max_; diff --git a/components/operators/src/depth.cpp b/components/operators/src/depth.cpp index b9462b8242348d9f632bd22d38fe2f5b853c1c0a..ef11ba02bed6c21f915f6e7706e68952bb7335a5 100644 --- a/components/operators/src/depth.cpp +++ b/components/operators/src/depth.cpp @@ -8,6 +8,7 @@ #include "ftl/operators/disparity.hpp" #include "ftl/operators/depth.hpp" #include "ftl/operators/mask.hpp" +#include "ftl/operators/opticalflow.hpp" #include "./disparity/opencv/disparity_bilateral_filter.hpp" @@ -134,16 +135,21 @@ void DepthChannel::_createPipeline() { config()->value("height", 720)); pipe_->append<ftl::operators::ColourChannels>("colour"); // Convert BGR to BGRA + pipe_->append<ftl::operators::CrossSupport>("cross"); #ifdef HAVE_LIBSGM pipe_->append<ftl::operators::FixstarsSGM>("algorithm"); #endif #ifdef HAVE_OPTFLOW - pipe_->append<ftl::operators::OpticalFlowTemporalSmoothing>("optflow_filter"); + pipe_->append<ftl::operators::NVOpticalFlow>("optflow", Channel::Colour, Channel::Flow); + pipe_->append<ftl::operators::OpticalFlowTemporalSmoothing>("optflow_filter", Channel::Disparity); #endif pipe_->append<ftl::operators::DisparityBilateralFilter>("bilateral_filter"); + //pipe_->append<ftl::operators::OpticalFlowTemporalSmoothing>("optflow_filter", Channel::Disparity); pipe_->append<ftl::operators::DisparityToDepth>("calculate_depth"); + #ifdef HAVE_OPTFLOW + //pipe_->append<ftl::operators::OpticalFlowTemporalSmoothing>("optflow_filter", Channel::Depth); // FIXME: Has a history so not with multiple sources! + #endif pipe_->append<ftl::operators::Normals>("normals"); // Estimate surface normals - pipe_->append<ftl::operators::CrossSupport>("cross"); pipe_->append<ftl::operators::DiscontinuityMask>("discontinuity_mask"); pipe_->append<ftl::operators::AggreMLS>("mls"); // Perform MLS (using smoothing channel) } @@ -161,7 +167,7 @@ bool DepthChannel::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda cv::cuda::GpuMat& left = f.get<cv::cuda::GpuMat>(Channel::Left); cv::cuda::GpuMat& right = f.get<cv::cuda::GpuMat>(Channel::Right); cv::cuda::GpuMat& depth = f.create<cv::cuda::GpuMat>(Channel::Depth); - depth.create(depth_size_, CV_32FC1); + depth.create(left.size(), CV_32FC1); if (left.empty() || right.empty()) continue; diff --git a/components/operators/src/disparity/cuda.hpp b/components/operators/src/disparity/cuda.hpp index 48c8ffd1b794432676a200ad338572a816c2351c..74102111e31b6f44a22ae083fa8cf698723ec904 100644 --- a/components/operators/src/disparity/cuda.hpp +++ b/components/operators/src/disparity/cuda.hpp @@ -11,7 +11,7 @@ void depth_to_disparity(cv::cuda::GpuMat &disparity, const cv::cuda::GpuMat &dep void optflow_filter(cv::cuda::GpuMat &disp, const cv::cuda::GpuMat &optflow, - cv::cuda::GpuMat &history, int n_max, float threshold, + cv::cuda::GpuMat &history, cv::cuda::GpuMat &support, int n_max, float threshold, bool fill, cv::cuda::Stream &stream); } diff --git a/components/operators/src/disparity/optflow_smoothing.cpp b/components/operators/src/disparity/optflow_smoothing.cpp index 71f379c9b26868975a74e371b4e5cc39a4d730fa..82476395a7ed7a5ec90b79f3aebbd1b7e9b3fb16 100644 --- a/components/operators/src/disparity/optflow_smoothing.cpp +++ b/components/operators/src/disparity/optflow_smoothing.cpp @@ -18,45 +18,55 @@ using std::vector; template<typename T> static bool inline isValidDisparity(T d) { return (0.0 < d) && (d < 256.0); } // TODO +OpticalFlowTemporalSmoothing::OpticalFlowTemporalSmoothing(ftl::Configurable* cfg, const std::tuple<ftl::codecs::Channel> ¶ms) : + ftl::operators::Operator(cfg) { + channel_ = std::get<0>(params); + _init(cfg); +} + OpticalFlowTemporalSmoothing::OpticalFlowTemporalSmoothing(ftl::Configurable* cfg) : ftl::operators::Operator(cfg) { + _init(cfg); +} + +void OpticalFlowTemporalSmoothing::_init(ftl::Configurable* cfg) { size_ = Size(0, 0); - n_max_ = cfg->value("history_size", 7); + n_max_ = cfg->value("history_size", 16); if (n_max_ < 1) { LOG(WARNING) << "History length must be larger than 0, using default (0)"; n_max_ = 7; } - if (n_max_ > 32) { + if (n_max_ > 16) { // TODO: cuda kernel uses fixed size buffer - LOG(WARNING) << "History length can't be larger than 32 (TODO)"; - n_max_ = 32; + LOG(WARNING) << "History length can't be larger than 16 (TODO)"; + n_max_ = 16; } - threshold_ = cfg->value("threshold", 1.0); + threshold_ = cfg->value("threshold", 5.0f); - cfg->on("threshold", [this, &cfg](const ftl::config::Event&) { - float threshold = cfg->value("threshold", 1.0); + cfg->on("threshold", [this](const ftl::config::Event&) { + float threshold = config()->value("threshold", 5.0f); if (threshold < 0.0) { LOG(WARNING) << "invalid threshold " << threshold << ", value must be positive"; } else { threshold_ = threshold; - init(); + //init(); } }); cfg->on("history_size", [this, &cfg](const ftl::config::Event&) { - int n_max = cfg->value("history_size", 1.0); + int n_max = cfg->value("history_size", 7); if (n_max < 1) { LOG(WARNING) << "History length must be larger than 0"; } - else if (n_max_ > 32) { + else if (n_max_ > 16) { // TODO: cuda kernel uses fixed size buffer - LOG(WARNING) << "History length can't be larger than 32 (TODO)"; + LOG(WARNING) << "History length can't be larger than 16 (TODO)"; } else { n_max_ = n_max; @@ -86,7 +96,7 @@ bool OpticalFlowTemporalSmoothing::apply(Frame &in, Frame &out, cudaStream_t str if (!init()) { return false; } } - ftl::cuda::optflow_filter(data, optflow, history_, n_max_, threshold_, cvstream); + ftl::cuda::optflow_filter(data, optflow, history_, in.get<cv::cuda::GpuMat>(Channel::Support1), n_max_, threshold_, config()->value("filling", false), cvstream); return true; } diff --git a/components/operators/src/disparity/optflow_smoothing.cu b/components/operators/src/disparity/optflow_smoothing.cu index a7633036fae1e9d20c6a52ae5617466f1b234202..8e3293cd99c8136925acbd999d1c9be90b09e2f4 100644 --- a/components/operators/src/disparity/optflow_smoothing.cu +++ b/components/operators/src/disparity/optflow_smoothing.cu @@ -14,28 +14,55 @@ __device__ void quicksort(float A[], size_t n) } template<typename T> -__device__ static bool inline isValidDisparity(T d) { return (0.0 < d) && (d < 256.0); } // TODO +__device__ static bool inline isValidDisparity(T d) { return d > 0.0f; } -static const int max_history = 32; // TODO dynamic shared memory +__device__ inline float supportArea(uchar4 s) { + const float dx = min(s.x,s.y); + const float dy = min(s.z,s.w); + return sqrt(dx*dx + dy*dy); +} + +template <int FRAC> +__device__ inline short makeFixed(float v) { + return static_cast<short>(v * (1<<FRAC)); +} +static const int MAX_HISTORY = 16; // TODO dynamic shared memory + +template <bool FILLING, int HISTORY> __global__ void temporal_median_filter_kernel( cv::cuda::PtrStepSz<float> disp, - cv::cuda::PtrStepSz<int16_t> optflow, + cv::cuda::PtrStepSz<short2> optflow, cv::cuda::PtrStepSz<float> history, + cv::cuda::PtrStepSz<uchar4> support, int n_max, - int16_t threshold, // fixed point 10.5 + float threshold, float granularity // 4 for Turing ) { - float sorted[max_history]; // TODO: dynamic shared memory + float sorted[HISTORY]; // TODO: dynamic shared memory for (STRIDE_Y(y, disp.rows)) { for (STRIDE_X(x, disp.cols)) { - int flowx = optflow(round(y / granularity), 2 * round(x / granularity)); - int flowy = optflow(round(y / granularity), 2 * round(x / granularity) + 1); + float area = supportArea(support(y,x)) / 25.0f; + short2 flow = optflow(round(y / granularity), round(x / granularity)); + //int flowy = optflow(round(y / granularity), 2 * round(x / granularity) + 1); + + float t = area * threshold + 0.25f; // 0.25 is the 1/4 pixel accuracy NVIDIA claim - if ((abs(flowx) + abs(flowy)) > threshold) + if (max(abs(flow.x),abs(flow.y)) > makeFixed<5>(t)) { + // TODO: Perhaps rather than discard it could follow the optical flow + // This would require the generation of a depth flow also. + // Perhaps do optical flow on the right image and compare motions, + // small differences should indicate a change in depth. Or perhaps + // consider any scale change? But this works less well in more cases + + // Most likely the above would have to be a totally separate process + // since the whole history would have to be moved and the idea of + // median breaks a little. Perhaps this operator is for static + // areas and another operator is for motion areas. + // last element in history[x][y][t] history(y, (x + 1) * n_max - 1) = 0.0; return; @@ -44,25 +71,30 @@ __global__ void temporal_median_filter_kernel( int count = history(y, (x + 1) * n_max - 1); int n = count % (n_max - 1); - if (isValidDisparity(disp(y, x))) + const float disparity = disp(y, x); + + if (isValidDisparity(disparity)) { history(y, (x + 1) * n_max - 1) += 1.0; count++; - history(y, x * n_max + n) = disp(y, x); + history(y, x * n_max + n) = disparity; } - int n_end = count; - if (n_end >= n_max) { n_end = n_max - 1; } + if (FILLING || isValidDisparity(disparity)) { + int n_end = count; + if (n_end >= n_max) { n_end = n_max - 1; } - if (n_end != 0) - { - for (size_t i = 0; i < n_end; i++) + if (n_end != 0) { - sorted[i] = history(y, x * n_max + i); + for (size_t i = 0; i < n_end; i++) + { + sorted[i] = history(y, x * n_max + i); + } + + quicksort(sorted, n_end); + const float sd = sorted[n_end / 2]; + if (isValidDisparity(sd)) disp(y, x) = sd; } - - quicksort(sorted, n_end); - disp(y, x) = sorted[n_end / 2]; } }} } @@ -71,7 +103,8 @@ namespace ftl { namespace cuda { void optflow_filter(cv::cuda::GpuMat &disp, const cv::cuda::GpuMat &optflow, - cv::cuda::GpuMat &history, int n, float threshold, + cv::cuda::GpuMat &history, cv::cuda::GpuMat &support, + int n, float threshold, bool fill, cv::cuda::Stream &stream) { dim3 grid(1, 1, 1); @@ -79,12 +112,19 @@ void optflow_filter(cv::cuda::GpuMat &disp, const cv::cuda::GpuMat &optflow, grid.x = cv::cuda::device::divUp(disp.cols, 128); grid.y = cv::cuda::device::divUp(disp.rows, 1); - // TODO: dynamic shared memory - temporal_median_filter_kernel<<<grid, threads, 0, cv::cuda::StreamAccessor::getStream(stream)>>> - ( disp, optflow, history, n, - round(threshold * (1 << 5)), // TODO: documentation; 10.5 format - 4 // TODO: (4 pixels granularity for Turing) - ); + if (fill) { + temporal_median_filter_kernel<true, MAX_HISTORY><<<grid, threads, 0, cv::cuda::StreamAccessor::getStream(stream)>>> + ( disp, optflow, history, support, n, + threshold, + 4 // TODO: (4 pixels granularity for Turing) + ); + } else { + temporal_median_filter_kernel<false, MAX_HISTORY><<<grid, threads, 0, cv::cuda::StreamAccessor::getStream(stream)>>> + ( disp, optflow, history, support, n, + threshold, + 4 // TODO: (4 pixels granularity for Turing) + ); + } cudaSafeCall(cudaGetLastError()); } diff --git a/components/operators/src/nvopticalflow.cpp b/components/operators/src/nvopticalflow.cpp index 17a5a58559d60a70b9bcb9758d37f6beed9d4753..deee3ecdbc27d9d99228cf607d02703beca8c866 100644 --- a/components/operators/src/nvopticalflow.cpp +++ b/components/operators/src/nvopticalflow.cpp @@ -1,4 +1,5 @@ #include <ftl/operators/opticalflow.hpp> +#include <ftl/exception.hpp> #include <opencv2/cudaimgproc.hpp> @@ -14,6 +15,7 @@ using cv::cuda::GpuMat; NVOpticalFlow::NVOpticalFlow(ftl::Configurable* cfg) : ftl::operators::Operator(cfg), channel_in_(ftl::codecs::Channel::Colour), channel_out_(ftl::codecs::Channel::Flow) { size_ = Size(0, 0); + } NVOpticalFlow::NVOpticalFlow(ftl::Configurable*cfg, const std::tuple<ftl::codecs::Channel,ftl::codecs::Channel> &channels) : ftl::operators::Operator(cfg) { @@ -25,6 +27,10 @@ NVOpticalFlow::~NVOpticalFlow() { } bool NVOpticalFlow::init() { + if (!ftl::cuda::hasCompute(7,5)) { + config()->set("enabled", false); + throw FTL_Error("GPU does not support optical flow"); + } nvof_ = cv::cuda::NvidiaOpticalFlow_1_0::create( size_.width, size_.height, cv::cuda::NvidiaOpticalFlow_1_0::NV_OF_PERF_LEVEL_SLOW, @@ -36,7 +42,8 @@ bool NVOpticalFlow::init() { } bool NVOpticalFlow::apply(Frame &in, Frame &out, cudaStream_t stream) { - if (!in.hasChannel(channel_in_)) { return false; } + if (!in.hasChannel(channel_in_)) return false; + if (in.hasChannel(channel_out_)) return true; if (in.get<GpuMat>(channel_in_).size() != size_) { size_ = in.get<GpuMat>(channel_in_).size(); @@ -48,6 +55,8 @@ bool NVOpticalFlow::apply(Frame &in, Frame &out, cudaStream_t stream) { cv::cuda::cvtColor(in.get<GpuMat>(channel_in_), left_gray_, cv::COLOR_BGRA2GRAY, 0, cvstream); + // TODO: Use optical flow confidence output, perhaps combined with a + // sensitivity adjustment nvof_->calc(left_gray_, left_gray_prev_, flow, cvstream); std::swap(left_gray_, left_gray_prev_); diff --git a/components/operators/src/weighting.cpp b/components/operators/src/weighting.cpp index 928043d6f5d1f58506447f02ccd7cb2be10c54a2..c1c57aa51739b6285248176c5d4ca7db36101917 100644 --- a/components/operators/src/weighting.cpp +++ b/components/operators/src/weighting.cpp @@ -18,7 +18,7 @@ PixelWeights::~PixelWeights() { } bool PixelWeights::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) { - if (in.hasChannel(Channel::Mask)) return true; + //if (in.hasChannel(Channel::Mask)) return true; ftl::cuda::PixelWeightingParameters params; //int radius = config()->value("radius", 2);