diff --git a/applications/gui/src/src_window.cpp b/applications/gui/src/src_window.cpp index 8b4090b7c47849d97ad4a290b3f501f0fe4f406a..ba9c4d93bbda87e265d57214e69317ac60e06c8b 100644 --- a/applications/gui/src/src_window.cpp +++ b/applications/gui/src/src_window.cpp @@ -25,6 +25,8 @@ #include "ftl/operators/mask.hpp" #include "ftl/operators/antialiasing.hpp" #include <ftl/operators/smoothing.hpp> +#include <ftl/operators/disparity.hpp> +#include <ftl/operators/detectandtrack.hpp> #include <nlohmann/json.hpp> @@ -44,9 +46,15 @@ using std::string; using std::vector; using ftl::config::json_t; -static ftl::rgbd::Generator *createSourceGenerator(const std::vector<ftl::rgbd::Source*> &srcs) { +static ftl::rgbd::Generator *createSourceGenerator(ftl::Configurable *root, const std::vector<ftl::rgbd::Source*> &srcs) { auto *grp = new ftl::rgbd::Group(); + /*auto pipeline = ftl::config::create<ftl::operators::Graph>(root, "pipeline"); + pipeline->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false); + pipeline->append<ftl::operators::ArUco>("aruco")->value("enabled", false); + pipeline->append<ftl::operators::DepthChannel>("depth"); // Ensure there is a depth channel + grp->addPipeline(pipeline);*/ + for (auto s : srcs) { s->setChannel(Channel::Depth); grp->addSource(s); @@ -143,7 +151,7 @@ SourceWindow::SourceWindow(ftl::gui::Screen *screen) // Create a vector of all input RGB-Depth sources if (screen->root()->getConfig()["sources"].size() > 0) { devices = ftl::createArray<Source>(screen->root(), "sources", screen->control()->getNet()); - auto *gen = createSourceGenerator(devices); + auto *gen = createSourceGenerator(screen->root(), devices); gen->onFrameSet([this, ftl_count](ftl::rgbd::FrameSet &fs) { fs.id = ftl_count; // Set a frameset id to something unique. return _processFrameset(fs, false); @@ -209,6 +217,8 @@ void SourceWindow::_checkFrameSets(int id) { auto *p = ftl::config::create<ftl::operators::Graph>(screen_->root(), "pre_filters"); //pre_pipeline_->append<ftl::operators::HFSmoother>("hfnoise"); //pre_pipeline_->append<ftl::operators::ColourChannels>("colour"); // Convert BGR to BGRA + p->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false); + p->append<ftl::operators::ArUco>("aruco")->value("enabled", false); p->append<ftl::operators::CrossSupport>("cross"); p->append<ftl::operators::DiscontinuityMask>("discontinuity"); p->append<ftl::operators::CullDiscontinuity>("remove_discontinuity"); diff --git a/applications/vision/src/main.cpp b/applications/vision/src/main.cpp index 5cc9c175b6b421f53d438454c48aaa539b27daf4..fde5bf127e39999da6d3429b4ec1c6cc041b8433 100644 --- a/applications/vision/src/main.cpp +++ b/applications/vision/src/main.cpp @@ -22,6 +22,7 @@ #include <ftl/master.hpp> #include <nlohmann/json.hpp> #include <ftl/operators/disparity.hpp> +#include <ftl/operators/detectandtrack.hpp> #include <ftl/streams/netstream.hpp> #include <ftl/streams/sender.hpp> @@ -86,6 +87,8 @@ static void run(ftl::Configurable *root) { }); auto pipeline = ftl::config::create<ftl::operators::Graph>(root, "pipeline"); + pipeline->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false); + pipeline->append<ftl::operators::ArUco>("aruco")->value("enabled", false); pipeline->append<ftl::operators::DepthChannel>("depth"); // Ensure there is a depth channel grp->addPipeline(pipeline); diff --git a/components/operators/include/ftl/operators/detectandtrack.hpp b/components/operators/include/ftl/operators/detectandtrack.hpp index f4d9e47e3889169c7c14c4992c2baeeec0e89d54..76d478730435456a74fe5ee37db15fbc3c114fd4 100644 --- a/components/operators/include/ftl/operators/detectandtrack.hpp +++ b/components/operators/include/ftl/operators/detectandtrack.hpp @@ -44,6 +44,8 @@ class DetectAndTrack : public ftl::operators::Operator { bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) override; + void wait(cudaStream_t) override; + protected: bool init(); @@ -51,6 +53,7 @@ class DetectAndTrack : public ftl::operators::Operator { bool track(const cv::Mat &im); private: + std::future<bool> job_; int createTracker(const cv::Mat &im, const cv::Rect2d &obj); @@ -114,10 +117,13 @@ class ArUco : public ftl::operators::Operator { inline Operator::Type type() const override { return Operator::Type::OneToOne; } bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) override; + void wait(cudaStream_t) override; + ftl::codecs::Channel channel_in_; ftl::codecs::Channel channel_out_; private: + std::future<bool> job_; bool debug_; bool estimate_pose_; float marker_size_; diff --git a/components/operators/include/ftl/operators/operator.hpp b/components/operators/include/ftl/operators/operator.hpp index c8522cb9889b8f2c7a1de3291174917431ab4985..8dee5f7b987aba6e87364a277d53ffe1e0b2d901 100644 --- a/components/operators/include/ftl/operators/operator.hpp +++ b/components/operators/include/ftl/operators/operator.hpp @@ -46,6 +46,14 @@ class Operator { inline bool enabled() const { return enabled_; } inline void enabled(bool e) { enabled_ = e; } + /** + * In case this operator operates async, force a wait for complete when + * this is called. Useful for CPU processing in parallel to GPU. A CUDA + * stream is passed that corresponds to the pipeline that called wait, + * allowing any GPU parallel streams to insert a wait event. + */ + virtual void wait(cudaStream_t) {} + inline ftl::Configurable *config() const { return config_; } private: @@ -115,6 +123,15 @@ class Graph : public ftl::Configurable { cudaStream_t getStream() const { return stream_; } + /** + * Make sure all async operators have also completed. This is automatically + * called by `apply` so should not be needed unless an `applyAsync` is + * added in the future. The stream passed is the primary pipeline stream, + * which may be either `getStream()` or the stream argument passed to the + * `apply` method called. + */ + bool waitAll(cudaStream_t); + private: std::list<ftl::operators::detail::OperatorNode> operators_; std::map<std::string, ftl::Configurable*> configs_; diff --git a/components/operators/src/aruco.cpp b/components/operators/src/aruco.cpp index b80067108d00c5d154e207d4f14ceba697deb140..18beb6638d3772f7aae44682f60b549ec386d0dc 100644 --- a/components/operators/src/aruco.cpp +++ b/components/operators/src/aruco.cpp @@ -34,45 +34,73 @@ ArUco::ArUco(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { bool ArUco::apply(Frame &in, Frame &out, cudaStream_t stream) { if (!in.hasChannel(channel_in_)) { return false; } - in.download(channel_in_); - Mat im = in.get<Mat>(channel_in_); - Mat K = in.getLeftCamera().getCameraMatrix(); - Mat dist = cv::Mat::zeros(cv::Size(5, 1), CV_64FC1); + Frame *inptr = ∈ + Frame *outptr = &out; - std::vector<std::vector<cv::Point2f>> corners; - std::vector<int> ids; + job_ = std::move(ftl::pool.push([this,inptr,outptr,stream](int id) { + Frame &in = *inptr; + Frame &out = *outptr; - cv::aruco::detectMarkers( im, dictionary_, - corners, ids, params_, cv::noArray(), K); + auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + //in.download(channel_in_); - std::vector<Vec3d> rvecs; - std::vector<Vec3d> tvecs; + //Mat im = in.get<Mat>(channel_in_); + // FIXME: Use internal stream here. + Mat im; // = in.fastDownload(channel_in_, cv::cuda::Stream::Null()); + cv::cvtColor(in.fastDownload(channel_in_, cv::cuda::Stream::Null()), im, cv::COLOR_BGRA2BGR); - if (estimate_pose_) { - cv::aruco::estimatePoseSingleMarkers(corners, marker_size_, K, dist, rvecs, tvecs); - } + Mat K = in.getLeftCamera().getCameraMatrix(); + Mat dist = cv::Mat::zeros(cv::Size(5, 1), CV_64FC1); + + std::vector<std::vector<cv::Point2f>> corners; + std::vector<int> ids; + + cv::aruco::detectMarkers( im, dictionary_, + corners, ids, params_, cv::noArray(), K); + + std::vector<Vec3d> rvecs; + std::vector<Vec3d> tvecs; - vector<Transformation> result; - for (size_t i = 0; i < rvecs.size(); i++) { if (estimate_pose_) { - result.push_back(ftl::codecs::Transformation(ids[i], rvecs[i], tvecs[i])); + cv::aruco::estimatePoseSingleMarkers(corners, marker_size_, K, dist, rvecs, tvecs); } - } - out.create(channel_out_, result); + vector<Transformation> result; + for (size_t i = 0; i < rvecs.size(); i++) { + if (estimate_pose_) { + result.push_back(ftl::codecs::Transformation(ids[i], rvecs[i], tvecs[i])); + } + } - if (debug_) { - cv::aruco::drawDetectedMarkers(im, corners, ids); - if (estimate_pose_) { - for (size_t i = 0; i < rvecs.size(); i++) { - cv::aruco::drawAxis(im, K, dist, rvecs[i], tvecs[i], marker_size_); + out.create(channel_out_, result); + + if (debug_) { + cv::aruco::drawDetectedMarkers(im, corners, ids); + if (estimate_pose_) { + for (size_t i = 0; i < rvecs.size(); i++) { + cv::aruco::drawAxis(im, K, dist, rvecs[i], tvecs[i], marker_size_); + } } } - } - // TODO: should be uploaded by operator which requires data on GPU - in.upload(channel_in_); + // TODO: should be uploaded by operator which requires data on GPU + //in.upload(channel_in_); + if (debug_) { + if (in.isGPU(channel_in_)) { + cv::cvtColor(im, im, cv::COLOR_BGR2BGRA); + out.get<cv::cuda::GpuMat>(channel_in_).upload(im); + } else cv::cvtColor(im, in.get<cv::Mat>(channel_in_), cv::COLOR_BGR2BGRA); + } + return true; + })); return true; -} \ No newline at end of file +} + +void ArUco::wait(cudaStream_t s) { + if (job_.valid()) { + job_.wait(); + job_.get(); + } +} diff --git a/components/operators/src/detectandtrack.cpp b/components/operators/src/detectandtrack.cpp index 5d7c629d699b7761424785970df99b490cddb3dd..5ea48eff0cdd202aa99d6aee27b55a978453f707 100644 --- a/components/operators/src/detectandtrack.cpp +++ b/components/operators/src/detectandtrack.cpp @@ -2,6 +2,7 @@ #include "loguru.hpp" #include "ftl/operators/detectandtrack.hpp" +#include <ftl/exception.hpp> using std::string; using std::vector; @@ -22,9 +23,13 @@ DetectAndTrack::DetectAndTrack(ftl::Configurable *cfg) : ftl::operators::Operato } bool DetectAndTrack::init() { - fname_ = config()->value<string>("filename", ""); + fname_ = config()->value<string>("filename", FTL_LOCAL_DATA_ROOT "/haarcascades/haarcascade_frontalface_default.xml"); debug_ = config()->value<bool>("debug", false); + config()->on("debug", [this](const ftl::config::Event &e) { + debug_ = config()->value<bool>("debug", false); + }); + detect_n_frames_ = config()->value<int>("n_frames", 10); detect_n_frames_ = detect_n_frames_ < 0.0 ? 0.0 : detect_n_frames_; @@ -69,7 +74,7 @@ bool DetectAndTrack::init() { } channel_in_ = ftl::codecs::Channel::Colour; - channel_out_ = ftl::codecs::Channel::Data; + channel_out_ = ftl::codecs::Channel::Faces; id_max_ = 0; bool retval = false; @@ -175,46 +180,81 @@ bool DetectAndTrack::apply(Frame &in, Frame &out, cudaStream_t stream) { return false; } - in.download(channel_in_); - Mat im = in.get<Mat>(channel_in_); + Frame *inptr = ∈ + Frame *outptr = &out; - track(im); + job_ = std::move(ftl::pool.push([this,inptr,outptr,stream](int id) { + Frame &in = *inptr; + Frame &out = *outptr; - if ((n_frame_++ % detect_n_frames_ == 0) && (tracked_.size() < max_tracked_)) { - if (im.channels() == 1) { - gray_ = im; - } - else if (im.channels() == 4) { - cv::cvtColor(im, gray_, cv::COLOR_BGRA2GRAY); - } - else if (im.channels() == 3) { - cv::cvtColor(im, gray_, cv::COLOR_BGR2GRAY); + auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + + //in.download(channel_in_); + //Mat im = in.get<Mat>(channel_in_); + Mat im; // TODO: Keep this as an internal buffer? Perhaps page locked. + + // FIXME: Use internal stream here. + cv::cvtColor(in.fastDownload(channel_in_, cv::cuda::Stream::Null()), im, cv::COLOR_BGRA2BGR); + + if (im.empty()) { + throw FTL_Error("Empty image in face detection"); } - else { - LOG(ERROR) << "unsupported number of channels in input image"; - return false; + + //cv::cvtColor(im, im, cv::COLOR_BGRA2BGR); + + track(im); + + if ((n_frame_++ % detect_n_frames_ == 0) && (tracked_.size() < max_tracked_)) { + if (im.channels() == 1) { + gray_ = im; + } + else if (im.channels() == 4) { + cv::cvtColor(im, gray_, cv::COLOR_BGRA2GRAY); + } + else if (im.channels() == 3) { + cv::cvtColor(im, gray_, cv::COLOR_BGR2GRAY); + } + else { + LOG(ERROR) << "unsupported number of channels in input image"; + return false; + } + + detect(gray_); } - detect(gray_); - } + std::vector<Rect2d> result; + result.reserve(tracked_.size()); + for (auto const &tracked : tracked_) { + result.push_back(tracked.object); - std::vector<Rect2d> result; - result.reserve(tracked_.size()); - for (auto const &tracked : tracked_) { - result.push_back(tracked.object); + if (debug_) { + cv::putText(im, "#" + std::to_string(tracked.id), + Point2i(tracked.object.x+5, tracked.object.y+tracked.object.height-5), + cv::FONT_HERSHEY_COMPLEX_SMALL, 1.0, cv::Scalar(0,0,255)); + + cv::rectangle(im, tracked.object, cv::Scalar(0, 0, 255), 1); + } + } + out.create(channel_out_, result); + //in.upload(channel_in_); + // FIXME: This is a bad idea. if (debug_) { - cv::putText(im, "#" + std::to_string(tracked.id), - Point2i(tracked.object.x+5, tracked.object.y+tracked.object.height-5), - cv::FONT_HERSHEY_COMPLEX_SMALL, 1.0, cv::Scalar(0,0,255)); - - cv::rectangle(im, tracked.object, cv::Scalar(0, 0, 255), 1); + if (in.isGPU(channel_in_)) { + cv::cvtColor(im, im, cv::COLOR_BGR2BGRA); + out.get<cv::cuda::GpuMat>(channel_in_).upload(im); + } else cv::cvtColor(im, in.get<cv::Mat>(channel_in_), cv::COLOR_BGR2BGRA); } - } - out.create(channel_out_, result); - // TODO: should be uploaded by operator which requires data on GPU - in.upload(channel_in_); + return true; + })); return true; } + +void DetectAndTrack::wait(cudaStream_t s) { + if (job_.valid()) { + job_.wait(); + job_.get(); + } +} diff --git a/components/operators/src/operator.cpp b/components/operators/src/operator.cpp index 9b87fe1d0ae0b0312852bf19b90d62c94d247de3..89a4c34c6f377524ba6e8577585dd66951e0df5a 100644 --- a/components/operators/src/operator.cpp +++ b/components/operators/src/operator.cpp @@ -45,6 +45,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) { if (!value("enabled", true)) return false; auto stream_actual = (stream == 0) ? stream_ : stream; + bool success = true; if (in.frames.size() != out.frames.size()) return false; @@ -70,11 +71,12 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) { instance->apply(in.frames[j], out.frames[j], stream_actual); } catch (const std::exception &e) { LOG(ERROR) << "Operator exception for '" << instance->config()->getID() << "': " << e.what(); - cudaSafeCall(cudaStreamSynchronize(stream_actual)); - return false; + success = false; + break; } } } + if (!success) break; } else if (i.instances[0]->type() == Operator::Type::ManyToMany) { auto *instance = i.instances[0]; @@ -83,18 +85,33 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) { instance->apply(in, out, stream_actual); } catch (const std::exception &e) { LOG(ERROR) << "Operator exception for '" << instance->config()->getID() << "': " << e.what(); - cudaSafeCall(cudaStreamSynchronize(stream_actual)); - return false; + success = false; + break; } } } } + success = waitAll(stream_actual) && success; + if (stream == 0) { cudaSafeCall(cudaStreamSynchronize(stream_actual)); - //cudaSafeCall( cudaGetLastError() ); } + return success; +} + +bool Graph::waitAll(cudaStream_t stream) { + for (auto &i : operators_) { + for (auto *j : i.instances) { + try { + j->wait(stream); + } catch (std::exception &e) { + LOG(ERROR) << "Operator exception for '" << j->config()->getID() << "': " << e.what(); + return false; + } + } + } return true; } @@ -102,6 +119,7 @@ bool Graph::apply(Frame &in, Frame &out, cudaStream_t stream) { if (!value("enabled", true)) return false; auto stream_actual = (stream == 0) ? stream_ : stream; + bool success = true; for (auto &i : operators_) { // Make sure there are enough instances @@ -116,18 +134,19 @@ bool Graph::apply(Frame &in, Frame &out, cudaStream_t stream) { instance->apply(in, out, stream_actual); } catch (const std::exception &e) { LOG(ERROR) << "Operator exception for '" << instance->config()->getID() << "': " << e.what(); - cudaSafeCall(cudaStreamSynchronize(stream_actual)); - return false; + success = false; + break; } } } + success = waitAll(stream_actual) && success; + if (stream == 0) { - cudaStreamSynchronize(stream_actual); - cudaSafeCall( cudaGetLastError() ); + cudaSafeCall(cudaStreamSynchronize(stream_actual)); } - return true; + return success; } ftl::Configurable *Graph::_append(ftl::operators::detail::ConstructionHelperBase *m) { diff --git a/components/rgbd-sources/include/ftl/rgbd/frame.hpp b/components/rgbd-sources/include/ftl/rgbd/frame.hpp index a2e7813bc4b7671ed71723f5a44a086901574484..652bd244151343fd2172dbe04e77c9dfae7133f6 100644 --- a/components/rgbd-sources/include/ftl/rgbd/frame.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/frame.hpp @@ -35,6 +35,7 @@ struct VideoData { cv::cuda::GpuMat gpu; cv::Mat host; bool isgpu; + bool validhost; std::list<ftl::codecs::Packet> encoded; template <typename T> @@ -53,6 +54,7 @@ struct VideoData { }; inline void reset() { + validhost = false; encoded.clear(); } }; @@ -90,6 +92,15 @@ public: inline void download(const ftl::codecs::Channels<0> &c, cudaStream_t stream=0) { download(c, cv::cuda::StreamAccessor::wrapStream(stream)); }; inline void upload(const ftl::codecs::Channels<0> &c, cudaStream_t stream=0) { upload(c, cv::cuda::StreamAccessor::wrapStream(stream)); }; + /** + * Special case optional download. If a host memory version still exists, + * use that. Only download if no host version exists. This assumes that + * the GPU version has not been modified since the host version was created, + * in otherwords that both version are still the same. It also does not + * actually mark the channel as downloaded. + */ + cv::Mat &fastDownload(ftl::codecs::Channel c, cv::cuda::Stream stream); + /** * Get an existing CUDA texture object. */ diff --git a/components/rgbd-sources/src/frame.cpp b/components/rgbd-sources/src/frame.cpp index bf40b5a50f7ac08b46a56e42a5222aab595a9d37..c5200dbca2e3988400155a6708dfda0c449b106f 100644 --- a/components/rgbd-sources/src/frame.cpp +++ b/components/rgbd-sources/src/frame.cpp @@ -36,6 +36,7 @@ const cv::cuda::GpuMat &VideoData::as<cv::cuda::GpuMat>() const { template <> cv::Mat &VideoData::make<cv::Mat>() { + validhost = true; isgpu = false; encoded.clear(); return host; @@ -83,6 +84,7 @@ void Frame::download(Channels<0> c, cv::cuda::Stream stream) { for (size_t i=0u; i<Channels<0>::kMax; ++i) { if (c.has(i) && hasChannel(static_cast<Channel>(i)) && isGPU(static_cast<Channel>(i))) { auto &data = getData(static_cast<Channel>(i)); + data.validhost = true; data.gpu.download(data.host, stream); data.isgpu = false; } @@ -99,6 +101,21 @@ void Frame::upload(Channels<0> c, cv::cuda::Stream stream) { } } +cv::Mat &Frame::fastDownload(ftl::codecs::Channel c, cv::cuda::Stream stream) { + if (hasChannel(c)) { + auto &data = getData(static_cast<Channel>(c)); + if (!data.isgpu) return data.host; + + if (data.validhost && !data.host.empty()) return data.host; + + // TODO: Perhaps allocated page locked here? + data.gpu.download(data.host, stream); + data.validhost = true; + return data.host; + } + throw FTL_Error("Fast download channel does not exist: " << (int)c); +} + void Frame::pushPacket(ftl::codecs::Channel c, ftl::codecs::Packet &pkt) { if (hasChannel(c)) { auto &m1 = getData(c); diff --git a/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp b/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp index 141070f3917cd476fa09485cc268760e0bc62269..05e1221d36bc86bff88154567fd7afbbf34db91b 100644 --- a/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp +++ b/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp @@ -20,7 +20,6 @@ #include "ftl/operators/segmentation.hpp" #include "ftl/operators/disparity.hpp" #include "ftl/operators/mask.hpp" -#include "ftl/operators/detectandtrack.hpp" #include "ftl/threads.hpp" #include "calibrate.hpp" @@ -82,8 +81,6 @@ void StereoVideoSource::init(const string &file) { #ifdef HAVE_OPTFLOW pipeline_input_->append<ftl::operators::NVOpticalFlow>("optflow", Channel::Colour, Channel::Flow); #endif - pipeline_input_->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false); - pipeline_input_->append<ftl::operators::ArUco>("aruco")->value("enabled", false); pipeline_input_->append<ftl::operators::ColourChannels>("colour"); calib_ = ftl::create<Calibrate>(host_, "calibration", cv::Size(lsrc_->fullWidth(), lsrc_->fullHeight()), stream_);