From a24cd871b2a77493208b40ede67751a1d666ef6a Mon Sep 17 00:00:00 2001
From: Nicolas Pope <nwpope@utu.fi>
Date: Wed, 29 Jan 2020 19:59:48 +0200
Subject: [PATCH] Sender stream sync fix

---
 applications/reconstruct/src/reconstruction.cpp  | 16 ++++++++++++----
 .../include/ftl/codecs/depth_convert_cuda.hpp    |  6 +++---
 components/codecs/src/depth_convert.cu           |  6 +++---
 components/operators/src/operator.cpp            | 10 +++++++---
 components/streams/src/sender.cpp                |  5 ++++-
 5 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/applications/reconstruct/src/reconstruction.cpp b/applications/reconstruct/src/reconstruction.cpp
index e4e448793..524128dd5 100644
--- a/applications/reconstruct/src/reconstruction.cpp
+++ b/applications/reconstruct/src/reconstruction.cpp
@@ -75,9 +75,13 @@ void Reconstruction::onFrameSet(const ftl::rgbd::VideoCallback &cb) {
 
 bool Reconstruction::post(ftl::rgbd::FrameSet &fs) {
 	pipeline_->apply(fs, fs, 0);
+
+	/*for (size_t i=0; i<fs.frames.size(); ++i) {
+		fs.frames[i].create<cv::cuda::GpuMat>(Channel::Depth);
+	}*/
 		
 	{
-		UNIQUE_LOCK(exchange_mtx_, lk);
+		//UNIQUE_LOCK(exchange_mtx_, lk);
 		//if (new_frame_ == true) LOG(WARNING) << "Frame lost";
 		fs.swapTo(fs_align_);
 		new_frame_ = true;
@@ -85,12 +89,16 @@ bool Reconstruction::post(ftl::rgbd::FrameSet &fs) {
 
 	if (cb_) {
 		ftl::pool.push([this](int id) {
+			UNIQUE_LOCK(fs_align_.mtx, lk);
 			if (new_frame_) {
-				{
-					UNIQUE_LOCK(exchange_mtx_, lk);
+				//{
+					//UNIQUE_LOCK(exchange_mtx_, lk);
 					new_frame_ = false;
 					fs_align_.swapTo(fs_render_);
-				}
+				//}
+
+				UNIQUE_LOCK(fs_render_.mtx, lk2);
+				lk.unlock();
 
 				if (cb_) cb_(fs_render_);
 			}
diff --git a/components/codecs/include/ftl/codecs/depth_convert_cuda.hpp b/components/codecs/include/ftl/codecs/depth_convert_cuda.hpp
index 7c4011dfb..7370f2357 100644
--- a/components/codecs/include/ftl/codecs/depth_convert_cuda.hpp
+++ b/components/codecs/include/ftl/codecs/depth_convert_cuda.hpp
@@ -6,11 +6,11 @@
 namespace ftl {
 namespace cuda {
 
-void depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream stream);
+void depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream &stream);
 
-void vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream stream);
+void vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream &stream);
 
-void smooth_y(const cv::cuda::PtrStepSz<ushort4> &rgba, cv::cuda::Stream stream);
+void smooth_y(const cv::cuda::PtrStepSz<ushort4> &rgba, cv::cuda::Stream &stream);
 
 }
 }
diff --git a/components/codecs/src/depth_convert.cu b/components/codecs/src/depth_convert.cu
index d11a78a3d..e18d16007 100644
--- a/components/codecs/src/depth_convert.cu
+++ b/components/codecs/src/depth_convert.cu
@@ -46,7 +46,7 @@ __global__ void depth_to_vuya_kernel(cv::cuda::PtrStepSz<float> depth, cv::cuda:
 	}
 }
 
-void ftl::cuda::depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream stream) {
+void ftl::cuda::depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream &stream) {
 	const dim3 gridSize((depth.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.rows + T_PER_BLOCK - 1)/T_PER_BLOCK);
     const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
@@ -97,7 +97,7 @@ __global__ void vuya_to_depth_kernel(cv::cuda::PtrStepSz<float> depth, cv::cuda:
 	}
 }
 
-void ftl::cuda::vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream stream) {
+void ftl::cuda::vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream &stream) {
 	const dim3 gridSize((depth.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.rows + T_PER_BLOCK - 1)/T_PER_BLOCK);
     const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
@@ -201,7 +201,7 @@ void ftl::cuda::vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv:
 	}
 }
 
-void ftl::cuda::smooth_y(const cv::cuda::PtrStepSz<ushort4> &rgba, cv::cuda::Stream stream) {
+void ftl::cuda::smooth_y(const cv::cuda::PtrStepSz<ushort4> &rgba, cv::cuda::Stream &stream) {
 	const dim3 gridSize((rgba.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (rgba.rows + T_PER_BLOCK - 1)/T_PER_BLOCK);
     const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
diff --git a/components/operators/src/operator.cpp b/components/operators/src/operator.cpp
index f0e8868ab..080d5c170 100644
--- a/components/operators/src/operator.cpp
+++ b/components/operators/src/operator.cpp
@@ -67,7 +67,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) {
 
 				if (instance->enabled()) {
 					try {
-						if (!instance->apply(in.frames[j], out.frames[j], stream_actual)) return false;
+						instance->apply(in.frames[j], out.frames[j], stream_actual);
 					} catch (const std::exception &e) {
 						LOG(ERROR) << "Operator exception: " << e.what();
 					}
@@ -78,7 +78,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) {
 
 			if (instance->enabled()) {
 				try {
-					if (!instance->apply(in, out, stream_actual)) return false;
+					instance->apply(in, out, stream_actual);
 				} catch (const std::exception &e) {
 					LOG(ERROR) << "Operator exception: " << e.what();
 				}
@@ -108,7 +108,11 @@ bool Graph::apply(Frame &in, Frame &out, cudaStream_t stream) {
 		auto *instance = i.instances[0];
 
 		if (instance->enabled()) {
-			if (!instance->apply(in, out, stream_actual)) return false;
+			try {
+				instance->apply(in, out, stream_actual);
+			} catch (const std::exception &e) {
+				LOG(ERROR) << "Operator exception: " << e.what();
+			}
 		}
 	}
 
diff --git a/components/streams/src/sender.cpp b/components/streams/src/sender.cpp
index 11ababc85..6036b1a19 100644
--- a/components/streams/src/sender.cpp
+++ b/components/streams/src/sender.cpp
@@ -158,7 +158,7 @@ void Sender::post(const ftl::rgbd::FrameSet &fs) {
 				const auto &packets = frame.getPackets(cc);
 				if (packets.size() > 0) {
 					if (packets.size() > 1) {
-						LOG(WARNING) << "Multi-packet send";
+						LOG(WARNING) << "Multi-packet send: " << (int)cc;
 						ftl::codecs::Packet pkt;
 						mergeNALUnits(packets, pkt);
 						stream_->post(spkt, pkt);
@@ -256,6 +256,9 @@ void Sender::_encodeChannel(const ftl::rgbd::FrameSet &fs, Channel c, bool reset
 			break;
 		}
 
+		//cudaSafeCall(cudaStreamSynchronize(enc->stream()));
+		enc->stream().waitForCompletion();
+
 		if (enc) {
 			// FIXME: Timestamps may not always be aligned to interval.
 			//if (do_inject || fs.timestamp % (10*ftl::timer::getInterval()) == 0) enc->reset();
-- 
GitLab