From 223f9e0e73f27a7d7353f07cef1d8604800c2ab0 Mon Sep 17 00:00:00 2001 From: Nicolas Pope <nicolas.pope@utu.fi> Date: Thu, 30 Jan 2020 09:38:45 +0200 Subject: [PATCH] Resolves #298 and resolves #291 framerate issue --- .../reconstruct/src/reconstruction.cpp | 16 ++- .../include/ftl/codecs/depth_convert_cuda.hpp | 6 +- components/codecs/src/depth_convert.cu | 6 +- components/codecs/src/nvpipe_encoder.cpp | 14 +- components/operators/src/operator.cpp | 10 +- .../include/ftl/rgbd/frameset.hpp | 4 +- components/rgbd-sources/src/frame.cpp | 2 + components/rgbd-sources/src/frameset.cpp | 123 ++++++++++-------- components/streams/src/netstream.cpp | 4 +- components/streams/src/sender.cpp | 7 +- 10 files changed, 114 insertions(+), 78 deletions(-) diff --git a/applications/reconstruct/src/reconstruction.cpp b/applications/reconstruct/src/reconstruction.cpp index e4e448793..524128dd5 100644 --- a/applications/reconstruct/src/reconstruction.cpp +++ b/applications/reconstruct/src/reconstruction.cpp @@ -75,9 +75,13 @@ void Reconstruction::onFrameSet(const ftl::rgbd::VideoCallback &cb) { bool Reconstruction::post(ftl::rgbd::FrameSet &fs) { pipeline_->apply(fs, fs, 0); + + /*for (size_t i=0; i<fs.frames.size(); ++i) { + fs.frames[i].create<cv::cuda::GpuMat>(Channel::Depth); + }*/ { - UNIQUE_LOCK(exchange_mtx_, lk); + //UNIQUE_LOCK(exchange_mtx_, lk); //if (new_frame_ == true) LOG(WARNING) << "Frame lost"; fs.swapTo(fs_align_); new_frame_ = true; @@ -85,12 +89,16 @@ bool Reconstruction::post(ftl::rgbd::FrameSet &fs) { if (cb_) { ftl::pool.push([this](int id) { + UNIQUE_LOCK(fs_align_.mtx, lk); if (new_frame_) { - { - UNIQUE_LOCK(exchange_mtx_, lk); + //{ + //UNIQUE_LOCK(exchange_mtx_, lk); new_frame_ = false; fs_align_.swapTo(fs_render_); - } + //} + + UNIQUE_LOCK(fs_render_.mtx, lk2); + lk.unlock(); if (cb_) cb_(fs_render_); } diff --git a/components/codecs/include/ftl/codecs/depth_convert_cuda.hpp b/components/codecs/include/ftl/codecs/depth_convert_cuda.hpp index 7c4011dfb..7370f2357 100644 --- a/components/codecs/include/ftl/codecs/depth_convert_cuda.hpp +++ b/components/codecs/include/ftl/codecs/depth_convert_cuda.hpp @@ -6,11 +6,11 @@ namespace ftl { namespace cuda { -void depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream stream); +void depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream &stream); -void vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream stream); +void vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream &stream); -void smooth_y(const cv::cuda::PtrStepSz<ushort4> &rgba, cv::cuda::Stream stream); +void smooth_y(const cv::cuda::PtrStepSz<ushort4> &rgba, cv::cuda::Stream &stream); } } diff --git a/components/codecs/src/depth_convert.cu b/components/codecs/src/depth_convert.cu index d11a78a3d..e18d16007 100644 --- a/components/codecs/src/depth_convert.cu +++ b/components/codecs/src/depth_convert.cu @@ -46,7 +46,7 @@ __global__ void depth_to_vuya_kernel(cv::cuda::PtrStepSz<float> depth, cv::cuda: } } -void ftl::cuda::depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream stream) { +void ftl::cuda::depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream &stream) { const dim3 gridSize((depth.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.rows + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); @@ -97,7 +97,7 @@ __global__ void vuya_to_depth_kernel(cv::cuda::PtrStepSz<float> depth, cv::cuda: } } -void ftl::cuda::vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream stream) { +void ftl::cuda::vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream &stream) { const dim3 gridSize((depth.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.rows + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); @@ -201,7 +201,7 @@ void ftl::cuda::vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv: } } -void ftl::cuda::smooth_y(const cv::cuda::PtrStepSz<ushort4> &rgba, cv::cuda::Stream stream) { +void ftl::cuda::smooth_y(const cv::cuda::PtrStepSz<ushort4> &rgba, cv::cuda::Stream &stream) { const dim3 gridSize((rgba.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (rgba.rows + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); diff --git a/components/codecs/src/nvpipe_encoder.cpp b/components/codecs/src/nvpipe_encoder.cpp index c8a44e47c..e4ad3589e 100644 --- a/components/codecs/src/nvpipe_encoder.cpp +++ b/components/codecs/src/nvpipe_encoder.cpp @@ -77,14 +77,14 @@ static ftl::codecs::format_t formatFromPacket(const ftl::codecs::Packet &pkt) { static uint64_t calculateBitrate(definition_t def, float ratescale) { float bitrate = 1.0f; // Megabits switch (def) { - case definition_t::UHD4k : bitrate = 32.0f; break; - case definition_t::HTC_VIVE : bitrate = 16.0f; break; - case definition_t::HD1080 : bitrate = 6.0f; break; - case definition_t::HD720 : bitrate = 4.0f; break; + case definition_t::UHD4k : bitrate = 40.0f; break; + case definition_t::HTC_VIVE : bitrate = 32.0f; break; + case definition_t::HD1080 : bitrate = 12.0f; break; + case definition_t::HD720 : bitrate = 8.0f; break; case definition_t::SD576 : - case definition_t::SD480 : bitrate = 2.0f; break; - case definition_t::LD360 : bitrate = 1.0f; break; - default : bitrate = 8.0f; + case definition_t::SD480 : bitrate = 4.0f; break; + case definition_t::LD360 : bitrate = 2.0f; break; + default : bitrate = 16.0f; } bitrate *= 1000.0f*1000.0f; diff --git a/components/operators/src/operator.cpp b/components/operators/src/operator.cpp index f0e8868ab..080d5c170 100644 --- a/components/operators/src/operator.cpp +++ b/components/operators/src/operator.cpp @@ -67,7 +67,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) { if (instance->enabled()) { try { - if (!instance->apply(in.frames[j], out.frames[j], stream_actual)) return false; + instance->apply(in.frames[j], out.frames[j], stream_actual); } catch (const std::exception &e) { LOG(ERROR) << "Operator exception: " << e.what(); } @@ -78,7 +78,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) { if (instance->enabled()) { try { - if (!instance->apply(in, out, stream_actual)) return false; + instance->apply(in, out, stream_actual); } catch (const std::exception &e) { LOG(ERROR) << "Operator exception: " << e.what(); } @@ -108,7 +108,11 @@ bool Graph::apply(Frame &in, Frame &out, cudaStream_t stream) { auto *instance = i.instances[0]; if (instance->enabled()) { - if (!instance->apply(in, out, stream_actual)) return false; + try { + instance->apply(in, out, stream_actual); + } catch (const std::exception &e) { + LOG(ERROR) << "Operator exception: " << e.what(); + } } } diff --git a/components/rgbd-sources/include/ftl/rgbd/frameset.hpp b/components/rgbd-sources/include/ftl/rgbd/frameset.hpp index 7fbb09b51..92ac321eb 100644 --- a/components/rgbd-sources/include/ftl/rgbd/frameset.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/frameset.hpp @@ -129,7 +129,7 @@ class Builder : public Generator { std::list<FrameSet*> allocated_; // Keep memory allocations size_t head_; - ftl::rgbd::VideoCallback callback_; + ftl::rgbd::VideoCallback cb_; MUTEX mutex_; int mspf_; float latency_; @@ -156,6 +156,8 @@ class Builder : public Generator { ftl::rgbd::FrameSet *_findFrameset(int64_t ts); void _freeFrameset(ftl::rgbd::FrameSet *); + void _schedule(); + void _recordStats(float fps, float latency); }; diff --git a/components/rgbd-sources/src/frame.cpp b/components/rgbd-sources/src/frame.cpp index faaf78cf3..bf40b5a50 100644 --- a/components/rgbd-sources/src/frame.cpp +++ b/components/rgbd-sources/src/frame.cpp @@ -37,12 +37,14 @@ const cv::cuda::GpuMat &VideoData::as<cv::cuda::GpuMat>() const { template <> cv::Mat &VideoData::make<cv::Mat>() { isgpu = false; + encoded.clear(); return host; } template <> cv::cuda::GpuMat &VideoData::make<cv::cuda::GpuMat>() { isgpu = true; + encoded.clear(); return gpu; } diff --git a/components/rgbd-sources/src/frameset.cpp b/components/rgbd-sources/src/frameset.cpp index e98b5677c..48da2b5dc 100644 --- a/components/rgbd-sources/src/frameset.cpp +++ b/components/rgbd-sources/src/frameset.cpp @@ -83,45 +83,6 @@ Builder::~Builder() { } } - -/*void Builder::push(int64_t timestamp, size_t ix, ftl::rgbd::Frame &frame) { - if (timestamp <= 0 || ix >= kMaxFramesInSet) return; - - UNIQUE_LOCK(mutex_, lk); - - //LOG(INFO) << "BUILDER PUSH: " << timestamp << ", " << ix << ", " << size_; - - // Size is determined by largest frame index received... note that size - // cannot therefore reduce. - if (ix >= size_) { - size_ = ix+1; - states_.resize(size_); - } - states_[ix] = frame.origin(); - - auto *fs = _findFrameset(timestamp); - - if (!fs) { - // Add new frameset - fs = _addFrameset(timestamp); - if (!fs) return; - } - - if (fs->frames.size() < size_) fs->frames.resize(size_); - - lk.unlock(); - SHARED_LOCK(fs->mtx, lk2); - - frame.swapTo(ftl::codecs::kAllChannels, fs->frames[ix]); - - ++fs->count; - - if (fs->mask & (1 << ix)) { - LOG(ERROR) << "Too many frames received for given timestamp: " << timestamp << " (source " << ix << ")"; - } - fs->mask |= (1 << ix); -}*/ - ftl::rgbd::Frame &Builder::get(int64_t timestamp, size_t ix) { if (timestamp <= 0 || ix >= kMaxFramesInSet) throw ftl::exception("Invalid frame timestamp or index"); @@ -156,37 +117,92 @@ ftl::rgbd::Frame &Builder::get(int64_t timestamp, size_t ix) { } void Builder::completed(int64_t ts, size_t ix) { - auto *fs = _findFrameset(ts); + ftl::rgbd::FrameSet *fs = nullptr; + + { + UNIQUE_LOCK(mutex_, lk); + fs = _findFrameset(ts); + } + if (fs && ix < fs->frames.size()) { - if (fs->mask & (1 << ix)) { - LOG(WARNING) << "Frame completed multiple times: " << ts << " (source " << ix << ")"; - return; + { + UNIQUE_LOCK(fs->mtx, lk2); + + // If already completed for given frame, then skip + if (fs->mask & (1 << ix)) return; + + states_[ix] = fs->frames[ix].origin(); + fs->mask |= (1 << ix); + ++fs->count; + } + + if (!fs->stale && static_cast<unsigned int>(fs->count) >= size_) { + //LOG(INFO) << "Frameset ready... " << fs->timestamp; + UNIQUE_LOCK(mutex_, lk); + _schedule(); } - states_[ix] = fs->frames[ix].origin(); - fs->mask |= (1 << ix); - ++fs->count; } else { LOG(ERROR) << "Completing frame that does not exist: " << ts << ":" << ix; } } +void Builder::_schedule() { + if (size_ == 0) return; + ftl::rgbd::FrameSet *fs = nullptr; + + //UNIQUE_LOCK(mutex_, lk); + if (jobs_ > 0) return; + fs = _getFrameset(); + + //LOG(INFO) << "Latency for " << name_ << " = " << (latency_*ftl::timer::getInterval()) << "ms"; + + if (fs) { + //UNIQUE_LOCK(fs->mtx, lk2); + // The buffers are invalid after callback so mark stale + fs->stale = true; + jobs_++; + //lk.unlock(); + + ftl::pool.push([this,fs](int) { + UNIQUE_LOCK(fs->mtx, lk2); + try { + if (cb_) cb_(*fs); + //LOG(INFO) << "Frameset processed (" << name_ << "): " << fs->timestamp; + } catch(std::exception &e) { + LOG(ERROR) << "Exception in frameset builder: " << e.what(); + } + + //fs->resetFull(); + + UNIQUE_LOCK(mutex_, lk); + _freeFrameset(fs); + + jobs_--; + _schedule(); + }); + } + +} + size_t Builder::size() { return size_; } void Builder::onFrameSet(const std::function<bool(ftl::rgbd::FrameSet &)> &cb) { - if (!cb) { + /*if (!cb) { main_id_.cancel(); return; - } + }*/ + + cb_ = cb; - if (main_id_.id() != -1) { + /*if (main_id_.id() != -1) { main_id_.cancel(); - } + }*/ // 3. Issue IO retrieve ad compute jobs before finding a valid // frame at required latency to pass to callback. - main_id_ = ftl::timer::add(ftl::timer::kTimerMain, [this,cb](int64_t ts) { + /*main_id_ = ftl::timer::add(ftl::timer::kTimerMain, [this,cb](int64_t ts) { //if (jobs_ > 0) LOG(ERROR) << "SKIPPING TIMER JOB " << ts; if (jobs_ > 0) return true; if (size_ == 0) return true; @@ -234,7 +250,7 @@ void Builder::onFrameSet(const std::function<bool(ftl::rgbd::FrameSet &)> &cb) { //if (jobs_ == 0) LOG(INFO) << "LAST JOB = Main"; return true; - }); + });*/ } ftl::rgbd::FrameState &Builder::state(size_t ix) { @@ -297,10 +313,11 @@ ftl::rgbd::FrameSet *Builder::_getFrameset() { int count = 0; // Merge all previous frames - for (; j!=framesets_.end(); j++) { + while (j!=framesets_.end()) { ++count; auto *f2 = *j; + //LOG(INFO) << "MERGE: " << f2->count; j = framesets_.erase(j); mergeFrameset(*f,*f2); _freeFrameset(f2); diff --git a/components/streams/src/netstream.cpp b/components/streams/src/netstream.cpp index cb0a64876..f8857d5c8 100644 --- a/components/streams/src/netstream.cpp +++ b/components/streams/src/netstream.cpp @@ -80,9 +80,9 @@ bool Net::post(const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet // Quality filter the packets if (pkt.bitrate > 0 && pkt.bitrate != client.quality) { - ++c; + //++c; LOG(INFO) << "Incorrect quality: " << (int)pkt.bitrate << " but requested " << (int)client.quality; - continue; + //continue; } try { diff --git a/components/streams/src/sender.cpp b/components/streams/src/sender.cpp index 11ababc85..0592d24f6 100644 --- a/components/streams/src/sender.cpp +++ b/components/streams/src/sender.cpp @@ -158,7 +158,7 @@ void Sender::post(const ftl::rgbd::FrameSet &fs) { const auto &packets = frame.getPackets(cc); if (packets.size() > 0) { if (packets.size() > 1) { - LOG(WARNING) << "Multi-packet send"; + LOG(WARNING) << "Multi-packet send: " << (int)cc; ftl::codecs::Packet pkt; mergeNALUnits(packets, pkt); stream_->post(spkt, pkt); @@ -256,6 +256,9 @@ void Sender::_encodeChannel(const ftl::rgbd::FrameSet &fs, Channel c, bool reset break; } + //cudaSafeCall(cudaStreamSynchronize(enc->stream())); + enc->stream().waitForCompletion(); + if (enc) { // FIXME: Timestamps may not always be aligned to interval. //if (do_inject || fs.timestamp % (10*ftl::timer::getInterval()) == 0) enc->reset(); @@ -266,7 +269,7 @@ void Sender::_encodeChannel(const ftl::rgbd::FrameSet &fs, Channel c, bool reset pkt.frame_count = count; pkt.codec = codec; pkt.definition = definition_t::Any; - pkt.bitrate = max_bitrate; + pkt.bitrate = (!lossless && ftl::codecs::isFloatChannel(c)) ? max_bitrate : max_bitrate/2; pkt.flags = 0; if (!lossless && ftl::codecs::isFloatChannel(c)) pkt.flags = ftl::codecs::kFlagFloat | ftl::codecs::kFlagMappedDepth; -- GitLab