From 33fda9e470e6a58ace806bb94aa2ae26c01748d5 Mon Sep 17 00:00:00 2001
From: Nicolas Pope <nwpope@utu.fi>
Date: Wed, 30 Oct 2019 09:51:53 +0200
Subject: [PATCH] Use streams in encode decode

---
 .../include/ftl/codecs/nvpipe_decoder.hpp      |  1 +
 .../include/ftl/codecs/nvpipe_encoder.hpp      |  1 +
 components/codecs/src/nvpipe_decoder.cpp       | 18 ++++++++++--------
 components/codecs/src/nvpipe_encoder.cpp       | 13 ++++++++-----
 4 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/components/codecs/include/ftl/codecs/nvpipe_decoder.hpp b/components/codecs/include/ftl/codecs/nvpipe_decoder.hpp
index 7734998e2..987915ea2 100644
--- a/components/codecs/include/ftl/codecs/nvpipe_decoder.hpp
+++ b/components/codecs/include/ftl/codecs/nvpipe_decoder.hpp
@@ -25,6 +25,7 @@ class NvPipeDecoder : public ftl::codecs::Decoder {
 	MUTEX mutex_;
 	bool seen_iframe_;
 	cv::cuda::GpuMat tmp_;
+	cv::cuda::Stream stream_;
 };
 
 }
diff --git a/components/codecs/include/ftl/codecs/nvpipe_encoder.hpp b/components/codecs/include/ftl/codecs/nvpipe_encoder.hpp
index 3b5515f29..5d04068c5 100644
--- a/components/codecs/include/ftl/codecs/nvpipe_encoder.hpp
+++ b/components/codecs/include/ftl/codecs/nvpipe_encoder.hpp
@@ -37,6 +37,7 @@ class NvPipeEncoder : public ftl::codecs::Encoder {
 	ftl::codecs::codec_t preference_;
 	cv::cuda::GpuMat tmp_;
 	cv::cuda::GpuMat tmp2_;
+	cv::cuda::Stream stream_;
 
     bool _encoderMatch(const cv::cuda::GpuMat &in, definition_t def);
     bool _createEncoder(const cv::cuda::GpuMat &in, definition_t def, bitrate_t rate);
diff --git a/components/codecs/src/nvpipe_decoder.cpp b/components/codecs/src/nvpipe_decoder.cpp
index eeb89f93c..d91a11ac0 100644
--- a/components/codecs/src/nvpipe_decoder.cpp
+++ b/components/codecs/src/nvpipe_decoder.cpp
@@ -75,35 +75,37 @@ bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out
 	if (is_float_frame) {
 		// Is the received frame the same size as requested output?
 		if (out.rows == ftl::codecs::getHeight(pkt.definition)) {
-			tmp_.convertTo(out, CV_32FC1, 1.0f/1000.0f);
+			tmp_.convertTo(out, CV_32FC1, 1.0f/1000.0f, stream_);
 		} else {
 			LOG(WARNING) << "Resizing decoded frame from " << tmp_.size() << " to " << out.size();
 			// FIXME: This won't work on GPU
-			tmp_.convertTo(tmp_, CV_32FC1, 1.0f/1000.0f);
-			cv::cuda::resize(tmp_, out, out.size(), 0, 0, cv::INTER_NEAREST);
+			tmp_.convertTo(tmp_, CV_32FC1, 1.0f/1000.0f, stream_);
+			cv::cuda::resize(tmp_, out, out.size(), 0, 0, cv::INTER_NEAREST, stream_);
 		}
 	} else {
 		// Is the received frame the same size as requested output?
 		if (out.rows == ftl::codecs::getHeight(pkt.definition)) {
 			// Flag 0x1 means frame is in RGB so needs conversion to BGR
 			if (pkt.flags & 0x1) {
-				cv::cuda::cvtColor(tmp_, out, cv::COLOR_RGBA2BGR);
+				cv::cuda::cvtColor(tmp_, out, cv::COLOR_RGBA2BGR, 0, stream_);
 			} else {
-				cv::cuda::cvtColor(tmp_, out, cv::COLOR_BGRA2BGR);
+				cv::cuda::cvtColor(tmp_, out, cv::COLOR_BGRA2BGR, 0, stream_);
 			}
 		} else {
 			LOG(WARNING) << "Resizing decoded frame from " << tmp_.size() << " to " << out.size();
 			// FIXME: This won't work on GPU, plus it allocates extra memory...
 			// Flag 0x1 means frame is in RGB so needs conversion to BGR
 			if (pkt.flags & 0x1) {
-				cv::cuda::cvtColor(tmp_, tmp_, cv::COLOR_RGBA2BGR);
+				cv::cuda::cvtColor(tmp_, tmp_, cv::COLOR_RGBA2BGR, 0, stream_);
 			} else {
-				cv::cuda::cvtColor(tmp_, tmp_, cv::COLOR_BGRA2BGR);
+				cv::cuda::cvtColor(tmp_, tmp_, cv::COLOR_BGRA2BGR, 0, stream_);
 			}
-			cv::cuda::resize(tmp_, out, out.size());
+			cv::cuda::resize(tmp_, out, out.size(), 0.0, 0.0, cv::INTER_LINEAR, stream_);
 		}
 	}
 
+	stream_.waitForCompletion();
+
 	return rc > 0;
 }
 
diff --git a/components/codecs/src/nvpipe_encoder.cpp b/components/codecs/src/nvpipe_encoder.cpp
index 3ed25448b..10901ef56 100644
--- a/components/codecs/src/nvpipe_encoder.cpp
+++ b/components/codecs/src/nvpipe_encoder.cpp
@@ -63,9 +63,9 @@ bool NvPipeEncoder::encode(const cv::cuda::GpuMat &in, definition_t odefinition,
 	if (width != in.cols || height != in.rows) {
 		LOG(WARNING) << "Mismatch resolution with encoding resolution";
 		if (in.type() == CV_32F) {
-			cv::cuda::resize(in, tmp_, cv::Size(width,height), 0.0, 0.0, cv::INTER_NEAREST);
+			cv::cuda::resize(in, tmp_, cv::Size(width,height), 0.0, 0.0, cv::INTER_NEAREST, stream_);
 		} else {
-			cv::cuda::resize(in, tmp_, cv::Size(width,height));
+			cv::cuda::resize(in, tmp_, cv::Size(width,height), 0.0, 0.0, cv::INTER_LINEAR, stream_);
 		}
 		tmp = tmp_;
 	} else {
@@ -84,16 +84,19 @@ bool NvPipeEncoder::encode(const cv::cuda::GpuMat &in, definition_t odefinition,
 
 	//cv::Mat tmp;
 	if (tmp.type() == CV_32F) {
-		tmp.convertTo(tmp2_, CV_16UC1, 1000);
+		tmp.convertTo(tmp2_, CV_16UC1, 1000, stream_);
 	} else if (tmp.type() == CV_8UC3) {
-		cv::cuda::cvtColor(tmp, tmp2_, cv::COLOR_BGR2RGBA);
+		cv::cuda::cvtColor(tmp, tmp2_, cv::COLOR_BGR2RGBA, 0, stream_);
 	} else if (tmp.type() == CV_8UC4) {
-		cv::cuda::cvtColor(tmp, tmp2_, cv::COLOR_BGRA2RGBA);
+		cv::cuda::cvtColor(tmp, tmp2_, cv::COLOR_BGRA2RGBA, 0, stream_);
 	} else {
 		LOG(ERROR) << "Unsupported cv::Mat type in Nvidia encoder";
 		return false;
 	}
 
+	// Make sure conversions complete...
+	stream_.waitForCompletion();
+
 	Packet pkt;
 	pkt.codec = (preference_ == codec_t::Any) ? codec_t::HEVC : preference_;
 	pkt.definition = definition;
-- 
GitLab