diff --git a/components/codecs/include/ftl/codecs/nvpipe_decoder.hpp b/components/codecs/include/ftl/codecs/nvpipe_decoder.hpp index 7734998e2d80ecb0aa01930adadf01e8e87c31e3..987915ea2595c6b1e88cf8959b239cb7eb7a3f09 100644 --- a/components/codecs/include/ftl/codecs/nvpipe_decoder.hpp +++ b/components/codecs/include/ftl/codecs/nvpipe_decoder.hpp @@ -25,6 +25,7 @@ class NvPipeDecoder : public ftl::codecs::Decoder { MUTEX mutex_; bool seen_iframe_; cv::cuda::GpuMat tmp_; + cv::cuda::Stream stream_; }; } diff --git a/components/codecs/include/ftl/codecs/nvpipe_encoder.hpp b/components/codecs/include/ftl/codecs/nvpipe_encoder.hpp index 3b5515f296c06978428b8cfe8f0854129791d83a..5d04068c53cf3b46dee73c63cf8e2fcf674f148d 100644 --- a/components/codecs/include/ftl/codecs/nvpipe_encoder.hpp +++ b/components/codecs/include/ftl/codecs/nvpipe_encoder.hpp @@ -37,6 +37,7 @@ class NvPipeEncoder : public ftl::codecs::Encoder { ftl::codecs::codec_t preference_; cv::cuda::GpuMat tmp_; cv::cuda::GpuMat tmp2_; + cv::cuda::Stream stream_; bool _encoderMatch(const cv::cuda::GpuMat &in, definition_t def); bool _createEncoder(const cv::cuda::GpuMat &in, definition_t def, bitrate_t rate); diff --git a/components/codecs/src/nvpipe_decoder.cpp b/components/codecs/src/nvpipe_decoder.cpp index eeb89f93c9e5194260360fefcbb57e45719ab2cb..d91a11ac06c4bb298f83332351b2b82eb489813e 100644 --- a/components/codecs/src/nvpipe_decoder.cpp +++ b/components/codecs/src/nvpipe_decoder.cpp @@ -75,35 +75,37 @@ bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out if (is_float_frame) { // Is the received frame the same size as requested output? if (out.rows == ftl::codecs::getHeight(pkt.definition)) { - tmp_.convertTo(out, CV_32FC1, 1.0f/1000.0f); + tmp_.convertTo(out, CV_32FC1, 1.0f/1000.0f, stream_); } else { LOG(WARNING) << "Resizing decoded frame from " << tmp_.size() << " to " << out.size(); // FIXME: This won't work on GPU - tmp_.convertTo(tmp_, CV_32FC1, 1.0f/1000.0f); - cv::cuda::resize(tmp_, out, out.size(), 0, 0, cv::INTER_NEAREST); + tmp_.convertTo(tmp_, CV_32FC1, 1.0f/1000.0f, stream_); + cv::cuda::resize(tmp_, out, out.size(), 0, 0, cv::INTER_NEAREST, stream_); } } else { // Is the received frame the same size as requested output? if (out.rows == ftl::codecs::getHeight(pkt.definition)) { // Flag 0x1 means frame is in RGB so needs conversion to BGR if (pkt.flags & 0x1) { - cv::cuda::cvtColor(tmp_, out, cv::COLOR_RGBA2BGR); + cv::cuda::cvtColor(tmp_, out, cv::COLOR_RGBA2BGR, 0, stream_); } else { - cv::cuda::cvtColor(tmp_, out, cv::COLOR_BGRA2BGR); + cv::cuda::cvtColor(tmp_, out, cv::COLOR_BGRA2BGR, 0, stream_); } } else { LOG(WARNING) << "Resizing decoded frame from " << tmp_.size() << " to " << out.size(); // FIXME: This won't work on GPU, plus it allocates extra memory... // Flag 0x1 means frame is in RGB so needs conversion to BGR if (pkt.flags & 0x1) { - cv::cuda::cvtColor(tmp_, tmp_, cv::COLOR_RGBA2BGR); + cv::cuda::cvtColor(tmp_, tmp_, cv::COLOR_RGBA2BGR, 0, stream_); } else { - cv::cuda::cvtColor(tmp_, tmp_, cv::COLOR_BGRA2BGR); + cv::cuda::cvtColor(tmp_, tmp_, cv::COLOR_BGRA2BGR, 0, stream_); } - cv::cuda::resize(tmp_, out, out.size()); + cv::cuda::resize(tmp_, out, out.size(), 0.0, 0.0, cv::INTER_LINEAR, stream_); } } + stream_.waitForCompletion(); + return rc > 0; } diff --git a/components/codecs/src/nvpipe_encoder.cpp b/components/codecs/src/nvpipe_encoder.cpp index 3ed25448b9138e103563ece1221da860fb9a0a3f..10901ef56993f25960cadd0e6f17778093f256a7 100644 --- a/components/codecs/src/nvpipe_encoder.cpp +++ b/components/codecs/src/nvpipe_encoder.cpp @@ -63,9 +63,9 @@ bool NvPipeEncoder::encode(const cv::cuda::GpuMat &in, definition_t odefinition, if (width != in.cols || height != in.rows) { LOG(WARNING) << "Mismatch resolution with encoding resolution"; if (in.type() == CV_32F) { - cv::cuda::resize(in, tmp_, cv::Size(width,height), 0.0, 0.0, cv::INTER_NEAREST); + cv::cuda::resize(in, tmp_, cv::Size(width,height), 0.0, 0.0, cv::INTER_NEAREST, stream_); } else { - cv::cuda::resize(in, tmp_, cv::Size(width,height)); + cv::cuda::resize(in, tmp_, cv::Size(width,height), 0.0, 0.0, cv::INTER_LINEAR, stream_); } tmp = tmp_; } else { @@ -84,16 +84,19 @@ bool NvPipeEncoder::encode(const cv::cuda::GpuMat &in, definition_t odefinition, //cv::Mat tmp; if (tmp.type() == CV_32F) { - tmp.convertTo(tmp2_, CV_16UC1, 1000); + tmp.convertTo(tmp2_, CV_16UC1, 1000, stream_); } else if (tmp.type() == CV_8UC3) { - cv::cuda::cvtColor(tmp, tmp2_, cv::COLOR_BGR2RGBA); + cv::cuda::cvtColor(tmp, tmp2_, cv::COLOR_BGR2RGBA, 0, stream_); } else if (tmp.type() == CV_8UC4) { - cv::cuda::cvtColor(tmp, tmp2_, cv::COLOR_BGRA2RGBA); + cv::cuda::cvtColor(tmp, tmp2_, cv::COLOR_BGRA2RGBA, 0, stream_); } else { LOG(ERROR) << "Unsupported cv::Mat type in Nvidia encoder"; return false; } + // Make sure conversions complete... + stream_.waitForCompletion(); + Packet pkt; pkt.codec = (preference_ == codec_t::Any) ? codec_t::HEVC : preference_; pkt.definition = definition;