From 02b56294c878d5d1a80986fa05672562ef9bab69 Mon Sep 17 00:00:00 2001
From: Nicolas Pope <nwpope@utu.fi>
Date: Mon, 11 Nov 2019 20:40:16 +0200
Subject: [PATCH] Actually use streams in operators

---
 .../operators/include/ftl/operators/operator.hpp   |  1 +
 components/operators/src/colours.cpp               |  4 +++-
 components/operators/src/filling.cpp               |  2 +-
 components/operators/src/mask.cu                   | 14 ++++----------
 components/operators/src/normals.cpp               |  2 +-
 components/operators/src/operator.cpp              | 13 ++++++++++---
 components/operators/src/segmentation.cpp          | 10 +++++-----
 components/operators/src/smoothing.cpp             | 10 +++++-----
 8 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/components/operators/include/ftl/operators/operator.hpp b/components/operators/include/ftl/operators/operator.hpp
index 729092dd1..bc47afef9 100644
--- a/components/operators/include/ftl/operators/operator.hpp
+++ b/components/operators/include/ftl/operators/operator.hpp
@@ -99,6 +99,7 @@ class Graph : public ftl::Configurable {
 	private:
 	std::list<ftl::operators::detail::OperatorNode> operators_;
 	std::map<std::string, ftl::Configurable*> configs_;
+	cudaStream_t stream_;
 
 	ftl::Configurable *_append(ftl::operators::detail::ConstructionHelperBase*);
 };
diff --git a/components/operators/src/colours.cpp b/components/operators/src/colours.cpp
index 3174c42e4..474b783c4 100644
--- a/components/operators/src/colours.cpp
+++ b/components/operators/src/colours.cpp
@@ -12,6 +12,8 @@ ColourChannels::~ColourChannels() {
 }
 
 bool ColourChannels::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+	auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+
 	// Convert colour from BGR to BGRA if needed
 	if (in.get<cv::cuda::GpuMat>(Channel::Colour).type() == CV_8UC3) {
 		//cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
@@ -19,7 +21,7 @@ bool ColourChannels::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgb
 		auto &col = in.get<cv::cuda::GpuMat>(Channel::Colour);
 		temp_.create(col.size(), CV_8UC4);
 		cv::cuda::swap(col, temp_);
-		cv::cuda::cvtColor(temp_,col, cv::COLOR_BGR2BGRA, 0);
+		cv::cuda::cvtColor(temp_,col, cv::COLOR_BGR2BGRA, 0, cvstream);
 	}
 
 	return true;
diff --git a/components/operators/src/filling.cpp b/components/operators/src/filling.cpp
index b1e68b4d7..da7efea6b 100644
--- a/components/operators/src/filling.cpp
+++ b/components/operators/src/filling.cpp
@@ -22,7 +22,7 @@ bool ScanFieldFill::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd
 		in.createTexture<float>(Channel::Depth),
 		in.createTexture<float>(Channel::Smoothing),
 		thresh,
-		s->parameters(), 0
+		s->parameters(), stream
 	);
 
 	return true;
diff --git a/components/operators/src/mask.cu b/components/operators/src/mask.cu
index b18b71e47..e385f41b1 100644
--- a/components/operators/src/mask.cu
+++ b/components/operators/src/mask.cu
@@ -13,19 +13,13 @@ __global__ void discontinuity_kernel(ftl::cuda::TextureObject<int> mask_out, ftl
 
 		const float d = depth.tex2D((int)x, (int)y);
 
-		// Calculate depth between 0.0 and 1.0
-		//float p = (d - params.minDepth) / (params.maxDepth - params.minDepth);
-
 		if (d >= params.minDepth && d <= params.maxDepth) {
 			/* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. */
-			// Is there a discontinuity nearby?
-			//for (int u=-RADIUS; u<=RADIUS; ++u) {
-			//	for (int v=-RADIUS; v<=RADIUS; ++v) {
-					// If yes, the flag using w = -1
-			//		if (fabs(depth.tex2D((int)x+u, (int)y+v) - d) > 0.1f) mask.isDiscontinuity(true);
-			//	}
-			//}
 
+			// If colour cross support region terminates within the requested
+			// radius, and the absolute depth difference on the other side is
+			// greater than threshold, then is is a discontinuity.
+			// Repeat for left, right, up and down.
 			const uchar4 sup = support.tex2D((int)x, (int)y);
 			if (sup.x <= radius) {
 				float dS = depth.tex2D((int)x - sup.x - radius, (int)y);
diff --git a/components/operators/src/normals.cpp b/components/operators/src/normals.cpp
index 5f8554c29..57903aa1d 100644
--- a/components/operators/src/normals.cpp
+++ b/components/operators/src/normals.cpp
@@ -28,7 +28,7 @@ bool Normals::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Sour
 	ftl::cuda::normals(
 		out.createTexture<float4>(Channel::Normals, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
 		in.createTexture<float>(Channel::Depth),
-		s->parameters(), 0
+		s->parameters(), stream
 	);
 
 	return true;
diff --git a/components/operators/src/operator.cpp b/components/operators/src/operator.cpp
index 91dada28b..e55c181c4 100644
--- a/components/operators/src/operator.cpp
+++ b/components/operators/src/operator.cpp
@@ -31,16 +31,18 @@ bool Operator::apply(FrameSet &in, Frame &out, Source *os, cudaStream_t stream)
 
 
 Graph::Graph(nlohmann::json &config) : ftl::Configurable(config) {
-
+	cudaSafeCall( cudaStreamCreate(&stream_) );
 }
 
 Graph::~Graph() {
-
+	cudaStreamDestroy(stream_);
 }
 
 bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) {
 	if (!value("enabled", true)) return false;
 
+	auto stream_actual = (stream == 0) ? stream_ : stream;
+
 	if (in.frames.size() != out.frames.size()) return false;
 
 	for (auto &i : operators_) {
@@ -53,11 +55,16 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) {
 			auto *instance = i.instances[j];
 
 			if (instance->enabled()) {
-				instance->apply(in.frames[j], out.frames[j], in.sources[j], stream);
+				instance->apply(in.frames[j], out.frames[j], in.sources[j], stream_actual);
 			}
 		}
 	}
 
+	if (stream == 0) {
+		cudaStreamSynchronize(stream_actual);
+		cudaSafeCall( cudaGetLastError() );
+	}
+
 	return true;
 }
 
diff --git a/components/operators/src/segmentation.cpp b/components/operators/src/segmentation.cpp
index a4bb9b0f7..08b4de467 100644
--- a/components/operators/src/segmentation.cpp
+++ b/components/operators/src/segmentation.cpp
@@ -22,7 +22,7 @@ bool CrossSupport::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd:
 			out.createTexture<uchar4>(Channel::Support2, ftl::rgbd::Format<uchar4>(in.get<cv::cuda::GpuMat>(Channel::Colour).size())),
 			config()->value("depth_tau", 0.04f),
 			config()->value("v_max", 5),
-			config()->value("h_max", 5), 0
+			config()->value("h_max", 5), stream
 		);
 	} //else {
 		ftl::cuda::support_region(
@@ -30,7 +30,7 @@ bool CrossSupport::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd:
 			out.createTexture<uchar4>(Channel::Support1, ftl::rgbd::Format<uchar4>(in.get<cv::cuda::GpuMat>(Channel::Colour).size())),
 			config()->value("tau", 5.0f),
 			config()->value("v_max", 5),
-			config()->value("h_max", 5), 0
+			config()->value("h_max", 5), stream
 		);
 	//}
 
@@ -62,7 +62,7 @@ bool VisCrossSupport::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rg
 			in.createTexture<float>(Channel::Depth),
 			in.createTexture<uchar4>(Channel::Support1),
 			in.createTexture<uchar4>(Channel::Support2),
-			0
+			stream
 		);
 	} else {
 		ftl::cuda::vis_support_region(
@@ -74,7 +74,7 @@ bool VisCrossSupport::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rg
 			config()->value("offset_y", 0),
 			config()->value("spacing_x", 50),
 			config()->value("spacing_y", 50),
-			0
+			stream
 		);
 
 		if (show_depth) {
@@ -87,7 +87,7 @@ bool VisCrossSupport::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rg
 				config()->value("offset_y", 0),
 				config()->value("spacing_x", 50),
 				config()->value("spacing_y", 50),
-				0
+				stream
 			);
 		}
 	}
diff --git a/components/operators/src/smoothing.cpp b/components/operators/src/smoothing.cpp
index a9041087a..359b17714 100644
--- a/components/operators/src/smoothing.cpp
+++ b/components/operators/src/smoothing.cpp
@@ -33,7 +33,7 @@ bool HFSmoother::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::S
             in.createTexture<float>(Channel::Energy, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
             in.createTexture<float>(Channel::Smoothing, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
             var_thresh,
-            s->parameters(), 0
+            s->parameters(), stream
         );
     }
 
@@ -158,7 +158,7 @@ bool SimpleMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::So
 			thresh,
 			radius,
 			s->parameters(),
-			0
+			stream
 		);
 
 		in.swapChannels(Channel::Depth, Channel::Depth2);
@@ -204,7 +204,7 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::So
 				col_smooth,
 				radius,
 				s->parameters(),
-				0
+				stream
 			);
 		} else {
 			ftl::cuda::colour_mls_smooth_csr(
@@ -218,7 +218,7 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::So
 				col_smooth,
 				filling,
 				s->parameters(),
-				0
+				stream
 			);
 		}
 
@@ -259,7 +259,7 @@ bool AdaptiveMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::
 			in.createTexture<float>(Channel::Smoothing),
 			radius,
 			s->parameters(),
-			0
+			stream
 		);
 
 		in.swapChannels(Channel::Depth, Channel::Depth2);
-- 
GitLab