Skip to content
Snippets Groups Projects
Commit 8c315324 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Merge branch 'exp/bilatperf' into 'master'

Improved bilateral filter performance

See merge request nicolas.pope/ftl!287
parents e76cc98b c931b499
No related branches found
No related tags found
1 merge request!287Improved bilateral filter performance
Pipeline #22842 passed
......@@ -433,10 +433,10 @@ void ftl::gui::Camera::_draw(std::vector<ftl::rgbd::FrameSet*> &fss) {
void ftl::gui::Camera::update(int fsid, const ftl::codecs::Channels<0> &c) {
if (!isVirtual() && ((1 << fsid) & fsmask_)) {
channels_ = c;
if (c.has(Channel::Depth)) {
channels_ += c;
//if (c.has(Channel::Depth)) {
//channels_ += Channel::ColourNormals;
}
//}
}
}
......@@ -469,6 +469,7 @@ void ftl::gui::Camera::update(std::vector<ftl::rgbd::FrameSet*> &fss) {
if ((size_t)fid_ >= fs->frames.size()) return;
frame = &fs->frames[fid_];
channels_ = frame->getChannels();
if (frame->hasChannel(Channel::Messages)) {
msgs_.clear();
......
......@@ -248,7 +248,7 @@ bool SourceWindow::_processFrameset(ftl::rgbd::FrameSet &fs, bool fromstream) {
ftl::codecs::Channels<0> channels;
if (fromstream) channels = cstream->available(fs.id);
if ((*framesets_[fs.id]).frames.size() > 0) channels += (*framesets_[fs.id]).frames[0].getChannels();
//if ((*framesets_[fs.id]).frames.size() > 0) channels += (*framesets_[fs.id]).frames[0].getChannels();
cam.second.camera->update(fs.id, channels);
}
++cycle_;
......
......@@ -107,9 +107,13 @@ bool DepthBilateralFilter::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out,
const GpuMat &rgb = in.get<GpuMat>(Channel::Colour);
GpuMat &depth = in.get<GpuMat>(channel_);
ftl::cuda::device::disp_bilateral_filter::disp_bilateral_filter<float>(depth, rgb, rgb.channels(), iter_,
table_color_.ptr<float>(), (float *)table_space_.data, table_space_.step / sizeof(float),
radius_, edge_disc_, max_disc_, stream);
UNUSED(rgb);
UNUSED(depth);
// FIXME: Not working right now
//ftl::cuda::device::disp_bilateral_filter::disp_bilateral_filter<float>(depth, rgb, rgb.channels(), iter_,
// table_color_.ptr<float>(), (float *)table_space_.data, table_space_.step / sizeof(float),
// radius_, edge_disc_, max_disc_, stream);
//disp_in.convertTo(disp_int_, CV_16SC1, scale_, cvstream);
//filter_->apply(disp_in, rgb, disp_out, cvstream);
......@@ -160,6 +164,7 @@ bool DepthChannel::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda
rbuf_.resize(in.frames.size());
for (size_t i=0; i<in.frames.size(); ++i) {
if (!in.hasFrame(i)) continue;
auto &f = in.frames[i];
if (!f.hasChannel(Channel::Depth) && f.hasChannel(Channel::Right)) {
_createPipeline();
......
......@@ -3,6 +3,8 @@
#include "opencv/joint_bilateral.hpp"
#include "cuda.hpp"
#include <opencv2/cudaimgproc.hpp>
using cv::cuda::GpuMat;
using cv::Size;
......@@ -14,7 +16,7 @@ DisparityBilateralFilter::DisparityBilateralFilter(ftl::Configurable* cfg) :
scale_ = 16.0;
n_disp_ = cfg->value("n_disp", 256);
radius_ = cfg->value("radius", 7);
radius_ = cfg->value("radius", 4);
iter_ = cfg->value("iter", 13);
filter_ = nullptr;
}
......@@ -46,14 +48,18 @@ bool DisparityBilateralFilter::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out
if (!filter_) filter_ = ftl::cuda::createDisparityBilateralFilter(n_disp_ * scale_, radius_, iter_);
filter_->setNumIters(config()->value("iter", 13));
auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
const GpuMat &rgb = in.get<GpuMat>(Channel::Colour);
GpuMat &disp_in = in.get<GpuMat>(Channel::Disparity);
GpuMat &disp_out = out.create<GpuMat>(Channel::Disparity);
disp_out.create(disp_in.size(), disp_in.type());
disp_int_.create(disp_in.size(), disp_in.type());
disp_in.convertTo(disp_int_, CV_16SC1, scale_, cvstream);
filter_->apply(disp_int_, rgb, disp_int_result_, cvstream);
disp_int_result_.convertTo(disp_out, disp_in.type(), 1.0/scale_, cvstream);
//disp_in.convertTo(disp_int_, CV_16SC1, scale_, cvstream);
//cv::cuda::cvtColor(rgb, bw_, cv::COLOR_BGRA2GRAY, 0, cvstream);
filter_->apply(disp_in, rgb, disp_int_, cvstream);
cv::cuda::swap(disp_out, disp_int_);
//disp_int_result_.convertTo(disp_out, disp_in.type(), 1.0/scale_, cvstream);
return true;
}
\ No newline at end of file
......@@ -6,13 +6,13 @@
#define PINF __int_as_float(0x7f800000)
#endif
__global__ void d2d_kernel(cv::cuda::PtrStepSz<float> disp, cv::cuda::PtrStepSz<float> depth,
__global__ void d2d_kernel(cv::cuda::PtrStepSz<short> disp, cv::cuda::PtrStepSz<float> depth,
ftl::rgbd::Camera cam) {
for (STRIDE_Y(v,disp.rows)) {
for (STRIDE_X(u,disp.cols)) {
float d = disp(v,u);
depth(v,u) = (d == 0) ? 0.0f : ((cam.baseline*cam.fx) / (d - cam.doffs));
short d = disp(v,u);
depth(v,u) = (d == 0) ? 0.0f : ((cam.baseline*cam.fx) / ((float(d)/16.0f) - cam.doffs));
}
}
}
......@@ -34,14 +34,14 @@ namespace cuda {
//==============================================================================
__global__ void d2drev_kernel(cv::cuda::PtrStepSz<float> disp, cv::cuda::PtrStepSz<float> depth,
__global__ void d2drev_kernel(cv::cuda::PtrStepSz<short> disp, cv::cuda::PtrStepSz<float> depth,
ftl::rgbd::Camera cam) {
for (STRIDE_Y(v,disp.rows)) {
for (STRIDE_X(u,disp.cols)) {
float d = depth(v,u);
float disparity = (d > cam.maxDepth || d < cam.minDepth) ? 0.0f : ((cam.baseline*cam.fx) / d) + cam.doffs;
disp(v,u) = disparity;
disp(v,u) = short(disparity*16.0f);
}
}
}
......
......@@ -123,7 +123,7 @@ bool FixstarsSGM::apply(Frame &in, Frame &out, cudaStream_t stream) {
if (!init()) { return false; }
}
auto &disp = out.create<GpuMat>(Channel::Disparity, Format<float>(l.size()));
auto &disp = out.create<GpuMat>(Channel::Disparity, Format<short>(l.size()));
auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
cv::cuda::cvtColor(l, lbw_, cv::COLOR_BGRA2GRAY, 0, cvstream);
......@@ -135,8 +135,8 @@ bool FixstarsSGM::apply(Frame &in, Frame &out, cudaStream_t stream) {
// GpuMat left_pixels(dispt_, cv::Rect(0, 0, max_disp_, dispt_.rows));
// left_pixels.setTo(0);
cv::cuda::threshold(disp_int_, disp_int_, 4096.0f, 0.0f, cv::THRESH_TOZERO_INV, cvstream);
cv::cuda::threshold(disp_int_, disp, 4096.0f, 0.0f, cv::THRESH_TOZERO_INV, cvstream);
disp_int_.convertTo(disp, CV_32F, 1.0f / 16.0f, cvstream);
//disp_int_.convertTo(disp, CV_32F, 1.0f / 16.0f, cvstream);
return true;
}
......@@ -167,7 +167,11 @@ namespace
if (dst.data != disp.data)
disp.copyTo(dst, stream);
disp_bilateral_filter<T>(dst, img, img.channels(), iters, table_color.ptr<float>(), (float *)table_space.data, table_space_step, radius, edge_disc, max_disc, StreamAccessor::getStream(stream));
if (img.channels() == 4) {
disp_bilateral_filter<T,uchar4>(disp, dst, img, iters, table_color.ptr<float>(), table_space_step, radius, edge_disc, max_disc, StreamAccessor::getStream(stream));
} else {
// TODO: If we need other channels...
}
}
void DispBilateralFilterImpl::apply(InputArray _disp, InputArray _image, OutputArray dst, Stream& stream)
......@@ -184,7 +188,8 @@ namespace
GpuMat img = _image.getGpuMat();
CV_Assert( disp.type() == CV_8U || disp.type() == CV_16S );
CV_Assert( img.type() == CV_8UC1 || img.type() == CV_8UC3 || img.type() == CV_8UC4 );
//CV_Assert( img.type() == CV_8UC1 || img.type() == CV_8UC3 || img.type() == CV_8UC4 );
CV_Assert( img.type() == CV_8UC4 ); // Nick: We only need/allow 4 channel
CV_Assert( disp.size() == img.size() );
operators[disp.type()](ndisp_, radius_, iters_, edge_threshold_, max_disc_threshold_,
......
......@@ -2,7 +2,7 @@ namespace ftl { namespace cuda { namespace device
{
namespace disp_bilateral_filter
{
template<typename T>
void disp_bilateral_filter(cv::cuda::PtrStepSz<T> disp, cv::cuda::PtrStepSzb img, int channels, int iters, const float *, const float *, size_t, int radius, T edge_disc, T max_disc, cudaStream_t stream);
template<typename T, typename C>
void disp_bilateral_filter(cv::cuda::PtrStepSz<T> disp, cv::cuda::PtrStepSz<T> dispout, cv::cuda::PtrStepSz<C> img, int iters, const float *, size_t, int radius, T edge_disc, T max_disc, cudaStream_t stream);
}
}}}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment