Sender stream sync fix

a24cd871 · Nicolas Pope · 5fe60517 · a24cd871 · a24cd871 · a24cd871
Commit a24cd871 authored 5 years ago by Nicolas Pope
--- a/applications/reconstruct/src/reconstruction.cpp
+++ b/applications/reconstruct/src/reconstruction.cpp
@@ -76,8 +76,12 @@ void Reconstruction::onFrameSet(const ftl::rgbd::VideoCallback &cb) {
 bool Reconstruction::post(ftl::rgbd::FrameSet &fs) {
 	pipeline_->apply(fs, fs, 0);
+	/*for (size_t i=0; i<fs.frames.size(); ++i) {
+		fs.frames[i].create<cv::cuda::GpuMat>(Channel::Depth);
+	}*/
 	{
-		UNIQUE_LOCK(exchange_mtx_, lk);
+		//UNIQUE_LOCK(exchange_mtx_, lk);
 		//if (new_frame_ == true) LOG(WARNING) << "Frame lost";
 		fs.swapTo(fs_align_);
 		new_frame_ = true;
@@ -85,12 +89,16 @@ bool Reconstruction::post(ftl::rgbd::FrameSet &fs) {
 	if (cb_) {
 		ftl::pool.push([this](int id) {
+			UNIQUE_LOCK(fs_align_.mtx, lk);
 			if (new_frame_) {
-				{
+				//{
-					UNIQUE_LOCK(exchange_mtx_, lk);
+					//UNIQUE_LOCK(exchange_mtx_, lk);
 					new_frame_ = false;
 					fs_align_.swapTo(fs_render_);
-				}
+				//}
+				UNIQUE_LOCK(fs_render_.mtx, lk2);
+				lk.unlock();
 				if (cb_) cb_(fs_render_);
 			}

--- a/components/codecs/include/ftl/codecs/depth_convert_cuda.hpp
+++ b/components/codecs/include/ftl/codecs/depth_convert_cuda.hpp
@@ -6,11 +6,11 @@
 namespace ftl {
 namespace cuda {
-void depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream stream);
+void depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream &stream);
-void vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream stream);
+void vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream &stream);
-void smooth_y(const cv::cuda::PtrStepSz<ushort4> &rgba, cv::cuda::Stream stream);
+void smooth_y(const cv::cuda::PtrStepSz<ushort4> &rgba, cv::cuda::Stream &stream);
 }
 }

--- a/components/codecs/src/depth_convert.cu
+++ b/components/codecs/src/depth_convert.cu
@@ -46,7 +46,7 @@ __global__ void depth_to_vuya_kernel(cv::cuda::PtrStepSz<float> depth, cv::cuda:
 	}
 }
-void ftl::cuda::depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream stream) {
+void ftl::cuda::depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream &stream) {
 	const dim3 gridSize((depth.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.rows + T_PER_BLOCK - 1)/T_PER_BLOCK);
    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
@@ -97,7 +97,7 @@ __global__ void vuya_to_depth_kernel(cv::cuda::PtrStepSz<float> depth, cv::cuda:
 	}
 }
-void ftl::cuda::vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream stream) {
+void ftl::cuda::vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream &stream) {
 	const dim3 gridSize((depth.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.rows + T_PER_BLOCK - 1)/T_PER_BLOCK);
    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
@@ -201,7 +201,7 @@ void ftl::cuda::vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv:
 	}
 }
-void ftl::cuda::smooth_y(const cv::cuda::PtrStepSz<ushort4> &rgba, cv::cuda::Stream stream) {
+void ftl::cuda::smooth_y(const cv::cuda::PtrStepSz<ushort4> &rgba, cv::cuda::Stream &stream) {
 	const dim3 gridSize((rgba.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (rgba.rows + T_PER_BLOCK - 1)/T_PER_BLOCK);
    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);

--- a/components/operators/src/operator.cpp
+++ b/components/operators/src/operator.cpp
@@ -67,7 +67,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) {
 				if (instance->enabled()) {
 					try {
-						if (!instance->apply(in.frames[j], out.frames[j], stream_actual)) return false;
+						instance->apply(in.frames[j], out.frames[j], stream_actual);
 					} catch (const std::exception &e) {
 						LOG(ERROR) << "Operator exception: " << e.what();
 					}
@@ -78,7 +78,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) {
 			if (instance->enabled()) {
 				try {
-					if (!instance->apply(in, out, stream_actual)) return false;
+					instance->apply(in, out, stream_actual);
 				} catch (const std::exception &e) {
 					LOG(ERROR) << "Operator exception: " << e.what();
 				}
@@ -108,7 +108,11 @@ bool Graph::apply(Frame &in, Frame &out, cudaStream_t stream) {
 		auto *instance = i.instances[0];
 		if (instance->enabled()) {
-			if (!instance->apply(in, out, stream_actual)) return false;
+			try {
+				instance->apply(in, out, stream_actual);
+			} catch (const std::exception &e) {
+				LOG(ERROR) << "Operator exception: " << e.what();
+			}
 		}
 	}

--- a/components/streams/src/sender.cpp
+++ b/components/streams/src/sender.cpp
@@ -158,7 +158,7 @@ void Sender::post(const ftl::rgbd::FrameSet &fs) {
 				const auto &packets = frame.getPackets(cc);
 				if (packets.size() > 0) {
 					if (packets.size() > 1) {
-						LOG(WARNING) << "Multi-packet send";
+						LOG(WARNING) << "Multi-packet send: " << (int)cc;
 						ftl::codecs::Packet pkt;
 						mergeNALUnits(packets, pkt);
 						stream_->post(spkt, pkt);
@@ -256,6 +256,9 @@ void Sender::_encodeChannel(const ftl::rgbd::FrameSet &fs, Channel c, bool reset
 			break;
 		}
+		//cudaSafeCall(cudaStreamSynchronize(enc->stream()));
+		enc->stream().waitForCompletion();
 		if (enc) {
 			// FIXME: Timestamps may not always be aligned to interval.
 			//if (do_inject || fs.timestamp % (10*ftl::timer::getInterval()) == 0) enc->reset();