Skip to content
Snippets Groups Projects
Commit f56011a8 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Implements #169 use of cuda streams

parent 3d2393d0
No related branches found
No related tags found
No related merge requests found
......@@ -133,27 +133,32 @@ ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) {
on("clipping_enabled", [this](const ftl::config::Event &e) {
clipping_ = value("clipping_enabled", true);
});
cudaSafeCall(cudaStreamCreate(&stream_));
}
ILW::~ILW() {
}
bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
bool ILW::process(ftl::rgbd::FrameSet &fs) {
if (!enabled_) return false;
_phase0(fs, stream);
fs.upload(Channel::Colour + Channel::Depth, stream_);
_phase0(fs, stream_);
params_.range = value("search_range", 0.05f);
for (int i=0; i<iterations_; ++i) {
_phase1(fs, value("cost_function",3), stream);
_phase1(fs, value("cost_function",3), stream_);
//for (int j=0; j<3; ++j) {
_phase2(fs, motion_rate_, stream);
_phase2(fs, motion_rate_, stream_);
//}
params_.range *= value("search_reduce", 0.9f);
// TODO: Break if no time left
//cudaSafeCall(cudaStreamSynchronize(stream_));
}
for (size_t i=0; i<fs.frames.size(); ++i) {
......@@ -162,9 +167,10 @@ bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size()));
auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse());
ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, discon_mask_, stream);
ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, discon_mask_, stream_);
}
cudaSafeCall(cudaStreamSynchronize(stream_));
return true;
}
......@@ -211,6 +217,8 @@ bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
f.createTexture<float>(Channel::Depth);
}
//cudaSafeCall(cudaStreamSynchronize(stream_));
return true;
}
......@@ -239,6 +247,8 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream) {
}
}
//cudaSafeCall(cudaStreamSynchronize(stream_));
// For each camera combination
for (size_t i=0; i<fs.frames.size(); ++i) {
auto &f1 = fs.frames[i];
......@@ -294,6 +304,8 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream) {
}
}
//cudaSafeCall(cudaStreamSynchronize(stream_));
return true;
}
......
......@@ -44,7 +44,7 @@ class ILW : public ftl::Configurable {
/**
* Take a frameset and perform the iterative lattice warping.
*/
bool process(ftl::rgbd::FrameSet &fs, cudaStream_t stream=0);
bool process(ftl::rgbd::FrameSet &fs);
inline bool isLabColour() const { return use_lab_; }
......@@ -75,6 +75,8 @@ class ILW : public ftl::Configurable {
bool fill_depth_;
ftl::cuda::ClipSpace clip_;
bool clipping_;
cudaStream_t stream_;
};
}
......
......@@ -174,7 +174,7 @@ static void run(ftl::Configurable *root) {
if (align->isLabColour()) {
for (auto &f : scene_B.frames) {
auto &col = f.get<cv::cuda::GpuMat>(Channel::Colour);
cv::cuda::cvtColor(col,col, cv::COLOR_Lab2BGR);
cv::cuda::cvtColor(col,col, cv::COLOR_Lab2BGR); // TODO: Add stream
}
}
splat->render(virt, out);
......@@ -257,7 +257,7 @@ static void run(ftl::Configurable *root) {
UNIQUE_LOCK(scene_A.mtx, lk);
// Send all frames to GPU, block until done?
scene_A.upload(Channel::Colour + Channel::Depth); // TODO: (Nick) Add scene stream.
//scene_A.upload(Channel::Colour + Channel::Depth); // TODO: (Nick) Add scene stream.
align->process(scene_A);
// TODO: To use second GPU, could do a download, swap, device change,
......
......@@ -5,6 +5,8 @@
#include <ftl/cuda_util.hpp>
//#include <cuda_runtime.h>
#include <opencv2/core/cuda/common.hpp>
using ftl::codecs::NvPipeDecoder;
NvPipeDecoder::NvPipeDecoder() {
......
......@@ -4,6 +4,8 @@
#include <ftl/codecs/bitrates.hpp>
#include <ftl/cuda_util.hpp>
#include <opencv2/core/cuda/common.hpp>
using ftl::codecs::NvPipeEncoder;
using ftl::codecs::bitrate_t;
using ftl::codecs::codec_t;
......
......@@ -26,7 +26,7 @@ class Renderer : public ftl::Configurable {
* the virtual camera object passed, and writes the result into the
* virtual camera.
*/
virtual bool render(ftl::rgbd::VirtualSource *, ftl::rgbd::Frame &, cudaStream_t)=0;
virtual bool render(ftl::rgbd::VirtualSource *, ftl::rgbd::Frame &)=0;
};
}
......
......@@ -22,7 +22,7 @@ class Splatter : public ftl::render::Renderer {
explicit Splatter(nlohmann::json &config, ftl::rgbd::FrameSet *fs);
~Splatter();
bool render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cudaStream_t stream=0) override;
bool render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) override;
//void setOutputDevice(int);
protected:
......@@ -52,6 +52,7 @@ class Splatter : public ftl::render::Renderer {
uchar4 light_diffuse_;
uchar4 light_ambient_;
ftl::render::SplatParams params_;
cudaStream_t stream_;
template <typename T>
void __blendChannel(ftl::rgbd::Frame &, ftl::rgbd::Channel in, ftl::rgbd::Channel out, cudaStream_t);
......
......@@ -120,6 +120,8 @@ Splatter::Splatter(nlohmann::json &config, ftl::rgbd::FrameSet *fs) : ftl::rende
on("ambient", [this](const ftl::config::Event &e) {
light_ambient_ = parseCUDAColour(value("ambient", std::string("#0e0e0e")));
});
cudaSafeCall(cudaStreamCreate(&stream_));
}
Splatter::~Splatter() {
......@@ -295,7 +297,7 @@ void Splatter::_renderChannel(
}
}
bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cudaStream_t stream) {
bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
SHARED_LOCK(scene_->mtx, lk);
if (!src->isReady()) return false;
......@@ -316,7 +318,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
temp_.create<GpuMat>(Channel::Depth2, Format<int>(camera.width, camera.height));
temp_.create<GpuMat>(Channel::Normals, Format<float4>(g.cols, g.rows));
cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream_);
// Parameters object to pass to CUDA describing the camera
SplatParams &params = params_;
......@@ -349,14 +351,14 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size()));
auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse());
ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, 0, stream);
ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, 0, stream_);
//LOG(INFO) << "POINTS Added";
}
// Clip first?
if (clipping_) {
ftl::cuda::clipping(f.createTexture<float4>(Channel::Points), clip_, stream);
ftl::cuda::clipping(f.createTexture<float4>(Channel::Points), clip_, stream_);
}
if (!f.hasChannel(Channel::Normals)) {
......@@ -368,16 +370,16 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
temp_.getTexture<float4>(Channel::Normals),
f.getTexture<float4>(Channel::Points),
3, 0.04f,
s->parameters(), pose.getFloat3x3(), stream);
s->parameters(), pose.getFloat3x3(), stream_);
if (norm_filter_ > -0.1f) {
ftl::cuda::normal_filter(f.getTexture<float4>(Channel::Normals), f.getTexture<float4>(Channel::Points), s->parameters(), pose, norm_filter_, stream);
ftl::cuda::normal_filter(f.getTexture<float4>(Channel::Normals), f.getTexture<float4>(Channel::Points), s->parameters(), pose, norm_filter_, stream_);
}
}
}
_dibr(stream);
_renderChannel(out, Channel::Colour, Channel::Colour, stream);
_dibr(stream_);
_renderChannel(out, Channel::Colour, Channel::Colour, stream_);
Channel chan = src->getChannel();
if (chan == Channel::Depth)
......@@ -387,14 +389,14 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
out.create<GpuMat>(Channel::Normals, Format<float4>(camera.width, camera.height));
// Render normal attribute
_renderChannel(out, Channel::Normals, Channel::Normals, stream);
_renderChannel(out, Channel::Normals, Channel::Normals, stream_);
// Convert normal to single float value
temp_.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height));
ftl::cuda::normal_visualise(out.getTexture<float4>(Channel::Normals), temp_.createTexture<uchar4>(Channel::Colour),
make_float3(0.3f, 0.2f, 1.0f),
light_diffuse_,
light_ambient_, stream);
light_ambient_, stream_);
// Put in output as single float
cv::cuda::swap(temp_.get<GpuMat>(Channel::Colour), out.create<GpuMat>(Channel::Normals));
......@@ -407,7 +409,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
else if (chan == Channel::Density) {
out.create<GpuMat>(chan, Format<float>(camera.width, camera.height));
out.get<GpuMat>(chan).setTo(cv::Scalar(0.0f), cvstream);
_renderChannel(out, Channel::Depth, Channel::Density, stream);
_renderChannel(out, Channel::Depth, Channel::Density, stream_);
}
else if (chan == Channel::Right)
{
......@@ -419,8 +421,8 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
out.create<GpuMat>(Channel::Right, Format<uchar4>(camera.width, camera.height));
out.get<GpuMat>(Channel::Right).setTo(background_, cvstream);
_dibr(stream); // Need to re-dibr due to pose change
_renderChannel(out, Channel::Right, Channel::Right, stream);
_dibr(stream_); // Need to re-dibr due to pose change
_renderChannel(out, Channel::Right, Channel::Right, stream_);
} else if (chan != Channel::None) {
if (ftl::rgbd::isFloatChannel(chan)) {
out.create<GpuMat>(chan, Format<float>(camera.width, camera.height));
......@@ -429,9 +431,10 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
out.create<GpuMat>(chan, Format<uchar4>(camera.width, camera.height));
out.get<GpuMat>(chan).setTo(background_, cvstream);
}
_renderChannel(out, chan, chan, stream);
_renderChannel(out, chan, chan, stream_);
}
cudaSafeCall(cudaStreamSynchronize(stream_));
return true;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment