diff --git a/applications/reconstruct/src/reconstruction.cpp b/applications/reconstruct/src/reconstruction.cpp index e4e448793677b44c116fb308ded8a510c551a3e6..524128dd5af15784609c89fd1f3c5681a566bb54 100644 --- a/applications/reconstruct/src/reconstruction.cpp +++ b/applications/reconstruct/src/reconstruction.cpp @@ -75,9 +75,13 @@ void Reconstruction::onFrameSet(const ftl::rgbd::VideoCallback &cb) { bool Reconstruction::post(ftl::rgbd::FrameSet &fs) { pipeline_->apply(fs, fs, 0); + + /*for (size_t i=0; i<fs.frames.size(); ++i) { + fs.frames[i].create<cv::cuda::GpuMat>(Channel::Depth); + }*/ { - UNIQUE_LOCK(exchange_mtx_, lk); + //UNIQUE_LOCK(exchange_mtx_, lk); //if (new_frame_ == true) LOG(WARNING) << "Frame lost"; fs.swapTo(fs_align_); new_frame_ = true; @@ -85,12 +89,16 @@ bool Reconstruction::post(ftl::rgbd::FrameSet &fs) { if (cb_) { ftl::pool.push([this](int id) { + UNIQUE_LOCK(fs_align_.mtx, lk); if (new_frame_) { - { - UNIQUE_LOCK(exchange_mtx_, lk); + //{ + //UNIQUE_LOCK(exchange_mtx_, lk); new_frame_ = false; fs_align_.swapTo(fs_render_); - } + //} + + UNIQUE_LOCK(fs_render_.mtx, lk2); + lk.unlock(); if (cb_) cb_(fs_render_); } diff --git a/components/codecs/include/ftl/codecs/depth_convert_cuda.hpp b/components/codecs/include/ftl/codecs/depth_convert_cuda.hpp index 7c4011dfbeb279680fe0cf25fb5cfbe7df93a574..7370f235790724355b2eb4dc242b5b8c4e73b38f 100644 --- a/components/codecs/include/ftl/codecs/depth_convert_cuda.hpp +++ b/components/codecs/include/ftl/codecs/depth_convert_cuda.hpp @@ -6,11 +6,11 @@ namespace ftl { namespace cuda { -void depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream stream); +void depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream &stream); -void vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream stream); +void vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream &stream); -void smooth_y(const cv::cuda::PtrStepSz<ushort4> &rgba, cv::cuda::Stream stream); +void smooth_y(const cv::cuda::PtrStepSz<ushort4> &rgba, cv::cuda::Stream &stream); } } diff --git a/components/codecs/src/depth_convert.cu b/components/codecs/src/depth_convert.cu index d11a78a3da1fa7dfd1b899cc629605d383b2c4b3..e18d16007bacce4a151c8d64f39c8eb5851b15a5 100644 --- a/components/codecs/src/depth_convert.cu +++ b/components/codecs/src/depth_convert.cu @@ -46,7 +46,7 @@ __global__ void depth_to_vuya_kernel(cv::cuda::PtrStepSz<float> depth, cv::cuda: } } -void ftl::cuda::depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream stream) { +void ftl::cuda::depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream &stream) { const dim3 gridSize((depth.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.rows + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); @@ -97,7 +97,7 @@ __global__ void vuya_to_depth_kernel(cv::cuda::PtrStepSz<float> depth, cv::cuda: } } -void ftl::cuda::vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream stream) { +void ftl::cuda::vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream &stream) { const dim3 gridSize((depth.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.rows + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); @@ -201,7 +201,7 @@ void ftl::cuda::vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv: } } -void ftl::cuda::smooth_y(const cv::cuda::PtrStepSz<ushort4> &rgba, cv::cuda::Stream stream) { +void ftl::cuda::smooth_y(const cv::cuda::PtrStepSz<ushort4> &rgba, cv::cuda::Stream &stream) { const dim3 gridSize((rgba.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (rgba.rows + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); diff --git a/components/codecs/src/nvpipe_encoder.cpp b/components/codecs/src/nvpipe_encoder.cpp index c8a44e47c223dac248f204010330757a289ebf75..e4ad3589e20a3c261a3c16a1e61cbb1e01888b41 100644 --- a/components/codecs/src/nvpipe_encoder.cpp +++ b/components/codecs/src/nvpipe_encoder.cpp @@ -77,14 +77,14 @@ static ftl::codecs::format_t formatFromPacket(const ftl::codecs::Packet &pkt) { static uint64_t calculateBitrate(definition_t def, float ratescale) { float bitrate = 1.0f; // Megabits switch (def) { - case definition_t::UHD4k : bitrate = 32.0f; break; - case definition_t::HTC_VIVE : bitrate = 16.0f; break; - case definition_t::HD1080 : bitrate = 6.0f; break; - case definition_t::HD720 : bitrate = 4.0f; break; + case definition_t::UHD4k : bitrate = 40.0f; break; + case definition_t::HTC_VIVE : bitrate = 32.0f; break; + case definition_t::HD1080 : bitrate = 12.0f; break; + case definition_t::HD720 : bitrate = 8.0f; break; case definition_t::SD576 : - case definition_t::SD480 : bitrate = 2.0f; break; - case definition_t::LD360 : bitrate = 1.0f; break; - default : bitrate = 8.0f; + case definition_t::SD480 : bitrate = 4.0f; break; + case definition_t::LD360 : bitrate = 2.0f; break; + default : bitrate = 16.0f; } bitrate *= 1000.0f*1000.0f; diff --git a/components/operators/src/operator.cpp b/components/operators/src/operator.cpp index f0e8868ab65516d7810e1177f140ef1133b39ec9..080d5c1707c5e7c578746ab7438ae2bf3ee9c35e 100644 --- a/components/operators/src/operator.cpp +++ b/components/operators/src/operator.cpp @@ -67,7 +67,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) { if (instance->enabled()) { try { - if (!instance->apply(in.frames[j], out.frames[j], stream_actual)) return false; + instance->apply(in.frames[j], out.frames[j], stream_actual); } catch (const std::exception &e) { LOG(ERROR) << "Operator exception: " << e.what(); } @@ -78,7 +78,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) { if (instance->enabled()) { try { - if (!instance->apply(in, out, stream_actual)) return false; + instance->apply(in, out, stream_actual); } catch (const std::exception &e) { LOG(ERROR) << "Operator exception: " << e.what(); } @@ -108,7 +108,11 @@ bool Graph::apply(Frame &in, Frame &out, cudaStream_t stream) { auto *instance = i.instances[0]; if (instance->enabled()) { - if (!instance->apply(in, out, stream_actual)) return false; + try { + instance->apply(in, out, stream_actual); + } catch (const std::exception &e) { + LOG(ERROR) << "Operator exception: " << e.what(); + } } } diff --git a/components/rgbd-sources/include/ftl/rgbd/frameset.hpp b/components/rgbd-sources/include/ftl/rgbd/frameset.hpp index 7fbb09b51b60fe54bdbf25954f062d2f66dae02e..92ac321ebd2a3915ab40a51af80ac52930f76ad0 100644 --- a/components/rgbd-sources/include/ftl/rgbd/frameset.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/frameset.hpp @@ -129,7 +129,7 @@ class Builder : public Generator { std::list<FrameSet*> allocated_; // Keep memory allocations size_t head_; - ftl::rgbd::VideoCallback callback_; + ftl::rgbd::VideoCallback cb_; MUTEX mutex_; int mspf_; float latency_; @@ -156,6 +156,8 @@ class Builder : public Generator { ftl::rgbd::FrameSet *_findFrameset(int64_t ts); void _freeFrameset(ftl::rgbd::FrameSet *); + void _schedule(); + void _recordStats(float fps, float latency); }; diff --git a/components/rgbd-sources/src/frame.cpp b/components/rgbd-sources/src/frame.cpp index faaf78cf32993431a55b55abfd14416badbdcf91..bf40b5a50f7ac08b46a56e42a5222aab595a9d37 100644 --- a/components/rgbd-sources/src/frame.cpp +++ b/components/rgbd-sources/src/frame.cpp @@ -37,12 +37,14 @@ const cv::cuda::GpuMat &VideoData::as<cv::cuda::GpuMat>() const { template <> cv::Mat &VideoData::make<cv::Mat>() { isgpu = false; + encoded.clear(); return host; } template <> cv::cuda::GpuMat &VideoData::make<cv::cuda::GpuMat>() { isgpu = true; + encoded.clear(); return gpu; } diff --git a/components/rgbd-sources/src/frameset.cpp b/components/rgbd-sources/src/frameset.cpp index e98b5677cb48004c181c614ae31aed310460ed4a..48da2b5dc34b0399cc640485781c19b0d2e429b4 100644 --- a/components/rgbd-sources/src/frameset.cpp +++ b/components/rgbd-sources/src/frameset.cpp @@ -83,45 +83,6 @@ Builder::~Builder() { } } - -/*void Builder::push(int64_t timestamp, size_t ix, ftl::rgbd::Frame &frame) { - if (timestamp <= 0 || ix >= kMaxFramesInSet) return; - - UNIQUE_LOCK(mutex_, lk); - - //LOG(INFO) << "BUILDER PUSH: " << timestamp << ", " << ix << ", " << size_; - - // Size is determined by largest frame index received... note that size - // cannot therefore reduce. - if (ix >= size_) { - size_ = ix+1; - states_.resize(size_); - } - states_[ix] = frame.origin(); - - auto *fs = _findFrameset(timestamp); - - if (!fs) { - // Add new frameset - fs = _addFrameset(timestamp); - if (!fs) return; - } - - if (fs->frames.size() < size_) fs->frames.resize(size_); - - lk.unlock(); - SHARED_LOCK(fs->mtx, lk2); - - frame.swapTo(ftl::codecs::kAllChannels, fs->frames[ix]); - - ++fs->count; - - if (fs->mask & (1 << ix)) { - LOG(ERROR) << "Too many frames received for given timestamp: " << timestamp << " (source " << ix << ")"; - } - fs->mask |= (1 << ix); -}*/ - ftl::rgbd::Frame &Builder::get(int64_t timestamp, size_t ix) { if (timestamp <= 0 || ix >= kMaxFramesInSet) throw ftl::exception("Invalid frame timestamp or index"); @@ -156,37 +117,92 @@ ftl::rgbd::Frame &Builder::get(int64_t timestamp, size_t ix) { } void Builder::completed(int64_t ts, size_t ix) { - auto *fs = _findFrameset(ts); + ftl::rgbd::FrameSet *fs = nullptr; + + { + UNIQUE_LOCK(mutex_, lk); + fs = _findFrameset(ts); + } + if (fs && ix < fs->frames.size()) { - if (fs->mask & (1 << ix)) { - LOG(WARNING) << "Frame completed multiple times: " << ts << " (source " << ix << ")"; - return; + { + UNIQUE_LOCK(fs->mtx, lk2); + + // If already completed for given frame, then skip + if (fs->mask & (1 << ix)) return; + + states_[ix] = fs->frames[ix].origin(); + fs->mask |= (1 << ix); + ++fs->count; + } + + if (!fs->stale && static_cast<unsigned int>(fs->count) >= size_) { + //LOG(INFO) << "Frameset ready... " << fs->timestamp; + UNIQUE_LOCK(mutex_, lk); + _schedule(); } - states_[ix] = fs->frames[ix].origin(); - fs->mask |= (1 << ix); - ++fs->count; } else { LOG(ERROR) << "Completing frame that does not exist: " << ts << ":" << ix; } } +void Builder::_schedule() { + if (size_ == 0) return; + ftl::rgbd::FrameSet *fs = nullptr; + + //UNIQUE_LOCK(mutex_, lk); + if (jobs_ > 0) return; + fs = _getFrameset(); + + //LOG(INFO) << "Latency for " << name_ << " = " << (latency_*ftl::timer::getInterval()) << "ms"; + + if (fs) { + //UNIQUE_LOCK(fs->mtx, lk2); + // The buffers are invalid after callback so mark stale + fs->stale = true; + jobs_++; + //lk.unlock(); + + ftl::pool.push([this,fs](int) { + UNIQUE_LOCK(fs->mtx, lk2); + try { + if (cb_) cb_(*fs); + //LOG(INFO) << "Frameset processed (" << name_ << "): " << fs->timestamp; + } catch(std::exception &e) { + LOG(ERROR) << "Exception in frameset builder: " << e.what(); + } + + //fs->resetFull(); + + UNIQUE_LOCK(mutex_, lk); + _freeFrameset(fs); + + jobs_--; + _schedule(); + }); + } + +} + size_t Builder::size() { return size_; } void Builder::onFrameSet(const std::function<bool(ftl::rgbd::FrameSet &)> &cb) { - if (!cb) { + /*if (!cb) { main_id_.cancel(); return; - } + }*/ + + cb_ = cb; - if (main_id_.id() != -1) { + /*if (main_id_.id() != -1) { main_id_.cancel(); - } + }*/ // 3. Issue IO retrieve ad compute jobs before finding a valid // frame at required latency to pass to callback. - main_id_ = ftl::timer::add(ftl::timer::kTimerMain, [this,cb](int64_t ts) { + /*main_id_ = ftl::timer::add(ftl::timer::kTimerMain, [this,cb](int64_t ts) { //if (jobs_ > 0) LOG(ERROR) << "SKIPPING TIMER JOB " << ts; if (jobs_ > 0) return true; if (size_ == 0) return true; @@ -234,7 +250,7 @@ void Builder::onFrameSet(const std::function<bool(ftl::rgbd::FrameSet &)> &cb) { //if (jobs_ == 0) LOG(INFO) << "LAST JOB = Main"; return true; - }); + });*/ } ftl::rgbd::FrameState &Builder::state(size_t ix) { @@ -297,10 +313,11 @@ ftl::rgbd::FrameSet *Builder::_getFrameset() { int count = 0; // Merge all previous frames - for (; j!=framesets_.end(); j++) { + while (j!=framesets_.end()) { ++count; auto *f2 = *j; + //LOG(INFO) << "MERGE: " << f2->count; j = framesets_.erase(j); mergeFrameset(*f,*f2); _freeFrameset(f2); diff --git a/components/streams/src/netstream.cpp b/components/streams/src/netstream.cpp index cb0a648764f846dbb01c9f28cbeace88ddfd606f..f8857d5c8ec6ca6a8e65344d9438347f4855333b 100644 --- a/components/streams/src/netstream.cpp +++ b/components/streams/src/netstream.cpp @@ -80,9 +80,9 @@ bool Net::post(const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet // Quality filter the packets if (pkt.bitrate > 0 && pkt.bitrate != client.quality) { - ++c; + //++c; LOG(INFO) << "Incorrect quality: " << (int)pkt.bitrate << " but requested " << (int)client.quality; - continue; + //continue; } try { diff --git a/components/streams/src/sender.cpp b/components/streams/src/sender.cpp index 11ababc855ca840ffcc1d5cd6f3a94eb26b10f7b..0592d24f67d68a30561c74e79ad017f82c4c129a 100644 --- a/components/streams/src/sender.cpp +++ b/components/streams/src/sender.cpp @@ -158,7 +158,7 @@ void Sender::post(const ftl::rgbd::FrameSet &fs) { const auto &packets = frame.getPackets(cc); if (packets.size() > 0) { if (packets.size() > 1) { - LOG(WARNING) << "Multi-packet send"; + LOG(WARNING) << "Multi-packet send: " << (int)cc; ftl::codecs::Packet pkt; mergeNALUnits(packets, pkt); stream_->post(spkt, pkt); @@ -256,6 +256,9 @@ void Sender::_encodeChannel(const ftl::rgbd::FrameSet &fs, Channel c, bool reset break; } + //cudaSafeCall(cudaStreamSynchronize(enc->stream())); + enc->stream().waitForCompletion(); + if (enc) { // FIXME: Timestamps may not always be aligned to interval. //if (do_inject || fs.timestamp % (10*ftl::timer::getInterval()) == 0) enc->reset(); @@ -266,7 +269,7 @@ void Sender::_encodeChannel(const ftl::rgbd::FrameSet &fs, Channel c, bool reset pkt.frame_count = count; pkt.codec = codec; pkt.definition = definition_t::Any; - pkt.bitrate = max_bitrate; + pkt.bitrate = (!lossless && ftl::codecs::isFloatChannel(c)) ? max_bitrate : max_bitrate/2; pkt.flags = 0; if (!lossless && ftl::codecs::isFloatChannel(c)) pkt.flags = ftl::codecs::kFlagFloat | ftl::codecs::kFlagMappedDepth;