diff --git a/applications/reconstruct/src/ilw/ilw.cpp b/applications/reconstruct/src/ilw/ilw.cpp index 7e0c292595e446960b51c3473d6fb3c55a362c75..1537b556ff4350a18ac6f9ddb14135f249995b9a 100644 --- a/applications/reconstruct/src/ilw/ilw.cpp +++ b/applications/reconstruct/src/ilw/ilw.cpp @@ -133,27 +133,32 @@ ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) { on("clipping_enabled", [this](const ftl::config::Event &e) { clipping_ = value("clipping_enabled", true); }); + + cudaSafeCall(cudaStreamCreate(&stream_)); } ILW::~ILW() { } -bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { +bool ILW::process(ftl::rgbd::FrameSet &fs) { if (!enabled_) return false; - _phase0(fs, stream); + fs.upload(Channel::Colour + Channel::Depth, stream_); + _phase0(fs, stream_); params_.range = value("search_range", 0.05f); for (int i=0; i<iterations_; ++i) { - _phase1(fs, value("cost_function",3), stream); + _phase1(fs, value("cost_function",3), stream_); //for (int j=0; j<3; ++j) { - _phase2(fs, motion_rate_, stream); + _phase2(fs, motion_rate_, stream_); //} params_.range *= value("search_reduce", 0.9f); // TODO: Break if no time left + + //cudaSafeCall(cudaStreamSynchronize(stream_)); } for (size_t i=0; i<fs.frames.size(); ++i) { @@ -162,9 +167,10 @@ bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size())); auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse()); - ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, discon_mask_, stream); + ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, discon_mask_, stream_); } + cudaSafeCall(cudaStreamSynchronize(stream_)); return true; } @@ -211,6 +217,8 @@ bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { f.createTexture<float>(Channel::Depth); } + //cudaSafeCall(cudaStreamSynchronize(stream_)); + return true; } @@ -239,6 +247,8 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream) { } } + //cudaSafeCall(cudaStreamSynchronize(stream_)); + // For each camera combination for (size_t i=0; i<fs.frames.size(); ++i) { auto &f1 = fs.frames[i]; @@ -294,6 +304,8 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream) { } } + //cudaSafeCall(cudaStreamSynchronize(stream_)); + return true; } diff --git a/applications/reconstruct/src/ilw/ilw.hpp b/applications/reconstruct/src/ilw/ilw.hpp index 5abf31207e7fc53c18a66a35c0d278f7a30a2b39..78251832d5647233082c8c59072084c6b1d60559 100644 --- a/applications/reconstruct/src/ilw/ilw.hpp +++ b/applications/reconstruct/src/ilw/ilw.hpp @@ -44,7 +44,7 @@ class ILW : public ftl::Configurable { /** * Take a frameset and perform the iterative lattice warping. */ - bool process(ftl::rgbd::FrameSet &fs, cudaStream_t stream=0); + bool process(ftl::rgbd::FrameSet &fs); inline bool isLabColour() const { return use_lab_; } @@ -75,6 +75,8 @@ class ILW : public ftl::Configurable { bool fill_depth_; ftl::cuda::ClipSpace clip_; bool clipping_; + + cudaStream_t stream_; }; } diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp index a4010c776b962b97ea02702a6a9ee9b8594e0c76..41359ec65634509df1e00370b4622408fa0e53ec 100644 --- a/applications/reconstruct/src/main.cpp +++ b/applications/reconstruct/src/main.cpp @@ -174,7 +174,7 @@ static void run(ftl::Configurable *root) { if (align->isLabColour()) { for (auto &f : scene_B.frames) { auto &col = f.get<cv::cuda::GpuMat>(Channel::Colour); - cv::cuda::cvtColor(col,col, cv::COLOR_Lab2BGR); + cv::cuda::cvtColor(col,col, cv::COLOR_Lab2BGR); // TODO: Add stream } } splat->render(virt, out); @@ -257,7 +257,7 @@ static void run(ftl::Configurable *root) { UNIQUE_LOCK(scene_A.mtx, lk); // Send all frames to GPU, block until done? - scene_A.upload(Channel::Colour + Channel::Depth); // TODO: (Nick) Add scene stream. + //scene_A.upload(Channel::Colour + Channel::Depth); // TODO: (Nick) Add scene stream. align->process(scene_A); // TODO: To use second GPU, could do a download, swap, device change, diff --git a/components/codecs/src/nvpipe_decoder.cpp b/components/codecs/src/nvpipe_decoder.cpp index 0dda5884cc7bf156e7cb134795e6f0ff2c180130..fefd5ead5968b766f1f4188dd1bdf1f91ac13d36 100644 --- a/components/codecs/src/nvpipe_decoder.cpp +++ b/components/codecs/src/nvpipe_decoder.cpp @@ -5,6 +5,8 @@ #include <ftl/cuda_util.hpp> //#include <cuda_runtime.h> +#include <opencv2/core/cuda/common.hpp> + using ftl::codecs::NvPipeDecoder; NvPipeDecoder::NvPipeDecoder() { diff --git a/components/codecs/src/nvpipe_encoder.cpp b/components/codecs/src/nvpipe_encoder.cpp index f1b068d740d6c9ff5210cc6598765061752535c3..1947170b92014da359f32c9e776e0fe890652aa2 100644 --- a/components/codecs/src/nvpipe_encoder.cpp +++ b/components/codecs/src/nvpipe_encoder.cpp @@ -4,6 +4,8 @@ #include <ftl/codecs/bitrates.hpp> #include <ftl/cuda_util.hpp> +#include <opencv2/core/cuda/common.hpp> + using ftl::codecs::NvPipeEncoder; using ftl::codecs::bitrate_t; using ftl::codecs::codec_t; diff --git a/components/renderers/cpp/include/ftl/render/renderer.hpp b/components/renderers/cpp/include/ftl/render/renderer.hpp index 1871b9f9f2a8e1fda0766e1c2e74d2169f47f3fa..432be6839de24e94448afbaf407260ea44c5a508 100644 --- a/components/renderers/cpp/include/ftl/render/renderer.hpp +++ b/components/renderers/cpp/include/ftl/render/renderer.hpp @@ -26,7 +26,7 @@ class Renderer : public ftl::Configurable { * the virtual camera object passed, and writes the result into the * virtual camera. */ - virtual bool render(ftl::rgbd::VirtualSource *, ftl::rgbd::Frame &, cudaStream_t)=0; + virtual bool render(ftl::rgbd::VirtualSource *, ftl::rgbd::Frame &)=0; }; } diff --git a/components/renderers/cpp/include/ftl/render/splat_render.hpp b/components/renderers/cpp/include/ftl/render/splat_render.hpp index 8fc0e10311668c3938b77c835ef31f8100905f64..70d326935fe9524cdef2aac040cd82a8fd5ab1d9 100644 --- a/components/renderers/cpp/include/ftl/render/splat_render.hpp +++ b/components/renderers/cpp/include/ftl/render/splat_render.hpp @@ -22,7 +22,7 @@ class Splatter : public ftl::render::Renderer { explicit Splatter(nlohmann::json &config, ftl::rgbd::FrameSet *fs); ~Splatter(); - bool render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cudaStream_t stream=0) override; + bool render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) override; //void setOutputDevice(int); protected: @@ -52,6 +52,7 @@ class Splatter : public ftl::render::Renderer { uchar4 light_diffuse_; uchar4 light_ambient_; ftl::render::SplatParams params_; + cudaStream_t stream_; template <typename T> void __blendChannel(ftl::rgbd::Frame &, ftl::rgbd::Channel in, ftl::rgbd::Channel out, cudaStream_t); diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp index 74e5a448d011ed97e00374702200f150a7bcf68a..152f5b3665c9af71a15b7be3915c54bbc3c02b0c 100644 --- a/components/renderers/cpp/src/splat_render.cpp +++ b/components/renderers/cpp/src/splat_render.cpp @@ -120,6 +120,8 @@ Splatter::Splatter(nlohmann::json &config, ftl::rgbd::FrameSet *fs) : ftl::rende on("ambient", [this](const ftl::config::Event &e) { light_ambient_ = parseCUDAColour(value("ambient", std::string("#0e0e0e"))); }); + + cudaSafeCall(cudaStreamCreate(&stream_)); } Splatter::~Splatter() { @@ -295,7 +297,7 @@ void Splatter::_renderChannel( } } -bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cudaStream_t stream) { +bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { SHARED_LOCK(scene_->mtx, lk); if (!src->isReady()) return false; @@ -316,7 +318,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda temp_.create<GpuMat>(Channel::Depth2, Format<int>(camera.width, camera.height)); temp_.create<GpuMat>(Channel::Normals, Format<float4>(g.cols, g.rows)); - cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream_); // Parameters object to pass to CUDA describing the camera SplatParams ¶ms = params_; @@ -349,14 +351,14 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size())); auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse()); - ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, 0, stream); + ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, 0, stream_); //LOG(INFO) << "POINTS Added"; } // Clip first? if (clipping_) { - ftl::cuda::clipping(f.createTexture<float4>(Channel::Points), clip_, stream); + ftl::cuda::clipping(f.createTexture<float4>(Channel::Points), clip_, stream_); } if (!f.hasChannel(Channel::Normals)) { @@ -368,16 +370,16 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda temp_.getTexture<float4>(Channel::Normals), f.getTexture<float4>(Channel::Points), 3, 0.04f, - s->parameters(), pose.getFloat3x3(), stream); + s->parameters(), pose.getFloat3x3(), stream_); if (norm_filter_ > -0.1f) { - ftl::cuda::normal_filter(f.getTexture<float4>(Channel::Normals), f.getTexture<float4>(Channel::Points), s->parameters(), pose, norm_filter_, stream); + ftl::cuda::normal_filter(f.getTexture<float4>(Channel::Normals), f.getTexture<float4>(Channel::Points), s->parameters(), pose, norm_filter_, stream_); } } } - _dibr(stream); - _renderChannel(out, Channel::Colour, Channel::Colour, stream); + _dibr(stream_); + _renderChannel(out, Channel::Colour, Channel::Colour, stream_); Channel chan = src->getChannel(); if (chan == Channel::Depth) @@ -387,14 +389,14 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda out.create<GpuMat>(Channel::Normals, Format<float4>(camera.width, camera.height)); // Render normal attribute - _renderChannel(out, Channel::Normals, Channel::Normals, stream); + _renderChannel(out, Channel::Normals, Channel::Normals, stream_); // Convert normal to single float value temp_.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height)); ftl::cuda::normal_visualise(out.getTexture<float4>(Channel::Normals), temp_.createTexture<uchar4>(Channel::Colour), make_float3(0.3f, 0.2f, 1.0f), light_diffuse_, - light_ambient_, stream); + light_ambient_, stream_); // Put in output as single float cv::cuda::swap(temp_.get<GpuMat>(Channel::Colour), out.create<GpuMat>(Channel::Normals)); @@ -407,7 +409,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda else if (chan == Channel::Density) { out.create<GpuMat>(chan, Format<float>(camera.width, camera.height)); out.get<GpuMat>(chan).setTo(cv::Scalar(0.0f), cvstream); - _renderChannel(out, Channel::Depth, Channel::Density, stream); + _renderChannel(out, Channel::Depth, Channel::Density, stream_); } else if (chan == Channel::Right) { @@ -419,8 +421,8 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda out.create<GpuMat>(Channel::Right, Format<uchar4>(camera.width, camera.height)); out.get<GpuMat>(Channel::Right).setTo(background_, cvstream); - _dibr(stream); // Need to re-dibr due to pose change - _renderChannel(out, Channel::Right, Channel::Right, stream); + _dibr(stream_); // Need to re-dibr due to pose change + _renderChannel(out, Channel::Right, Channel::Right, stream_); } else if (chan != Channel::None) { if (ftl::rgbd::isFloatChannel(chan)) { out.create<GpuMat>(chan, Format<float>(camera.width, camera.height)); @@ -429,9 +431,10 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda out.create<GpuMat>(chan, Format<uchar4>(camera.width, camera.height)); out.get<GpuMat>(chan).setTo(background_, cvstream); } - _renderChannel(out, chan, chan, stream); + _renderChannel(out, chan, chan, stream_); } + cudaSafeCall(cudaStreamSynchronize(stream_)); return true; }