diff --git a/applications/gui2/src/modules/calibration/extrinsic.cpp b/applications/gui2/src/modules/calibration/extrinsic.cpp index f5b35eed4fd85100e89adf31d1555cfb894ec7b5..916ce681f835d3fad75a865fd56681b0ed63cf2f 100644 --- a/applications/gui2/src/modules/calibration/extrinsic.cpp +++ b/applications/gui2/src/modules/calibration/extrinsic.cpp @@ -73,16 +73,17 @@ void ExtrinsicCalibration::start(unsigned int fsid, std::vector<FrameID> sources // stereo calibration auto cl = CameraID(id.frameset(), id.source(), Channel::Left); auto cr = CameraID(id.frameset(), id.source(), Channel::Right); - const auto& frame = (*fs_current_)[id.source()].cast<ftl::rgbd::Frame>(); - auto sz = cv::Size((int) frame.getLeftCamera().width, (int) frame.getLeftCamera().height); state_.cameras.push_back(cl); state_.cameras.push_back(cr); + + const auto& frame = (*fs_current_)[id.source()].cast<ftl::rgbd::Frame>(); + // NOTE: assumes left size is the same as right size! + auto sz = frame.getSize(); auto calibl = getCalibration(cl); calibl.intrinsic = CalibrationData::Intrinsic(calibl.intrinsic, sz); + auto calibr = getCalibration(cr); calibr.intrinsic = CalibrationData::Intrinsic(calibr.intrinsic, sz); - - // Scale intrinsics state_.calib.addStereoCamera(calibl, calibr); // Update rectification diff --git a/applications/gui2/src/modules/calibration/intrinsic.cpp b/applications/gui2/src/modules/calibration/intrinsic.cpp index 2fa2e9c54fcf996b95994d07a8b055fd9010473f..b91f74f56cf6eacc273368b18bc3332c9f625f3d 100644 --- a/applications/gui2/src/modules/calibration/intrinsic.cpp +++ b/applications/gui2/src/modules/calibration/intrinsic.cpp @@ -112,22 +112,10 @@ void IntrinsicCalibration::setChannel_(FrameSetPtr fs) { cv::Size size; if (state_->channel== Channel::Left) { - if(frame.has(Channel::LeftHighRes)) { - state_->channel_alt = Channel::LeftHighRes; - size = frame.get<cv::Mat>(state_->channel_alt).size(); - } - else { - size = frame.get<cv::Mat>(state_->channel_alt).size(); - } + size = frame.get<cv::Mat>(state_->channel_alt).size(); } else if (state_->channel== Channel::Right) { - if (frame.has(Channel::RightHighRes)) { - state_->channel_alt = Channel::RightHighRes; - size = frame.get<cv::Mat>(Channel::LeftHighRes).size(); - } - else { - size = frame.get<cv::Mat>(Channel::Left).size(); - } + size = frame.get<cv::Mat>(Channel::Left).size(); } try { diff --git a/applications/gui2/src/modules/calibration/stereo.cpp b/applications/gui2/src/modules/calibration/stereo.cpp index 052bf15572b0e97790cfbfed8ac7c4cf90f96230..148f8abcaa4d484201ec238a4440d01b59ca7700 100644 --- a/applications/gui2/src/modules/calibration/stereo.cpp +++ b/applications/gui2/src/modules/calibration/stereo.cpp @@ -110,7 +110,7 @@ void StereoCalibration::start(ftl::data::FrameID id) { try { auto& frame = (*fs)[state_->id.source()]; state_->calib = frame.get<CalibrationData>(Channel::CalibrationData); - state_->highres = frame.hasAll({Channel::LeftHighRes, Channel::RightHighRes}); + state_->highres = false; // TODO: Remove auto sizel = frame.get<cv::cuda::GpuMat>(channelLeft_()).size(); auto sizer = frame.get<cv::cuda::GpuMat>(channelLeft_()).size(); if (sizel != sizer) { @@ -251,18 +251,18 @@ ftl::rgbd::Frame& StereoCalibration::frame_() { } bool StereoCalibration::hasFrame() { - auto cleft = state_->highres ? Channel::LeftHighRes : Channel::Left; - auto cright = state_->highres ? Channel::RightHighRes : Channel::Right; + auto cleft = Channel::Left; + auto cright = Channel::Right; return (std::atomic_load(&fs_update_).get() != nullptr) && fs_update_->frames[state_->id.source()].hasAll({cleft, cright}); }; Channel StereoCalibration::channelLeft_() { - return (state_->highres ? Channel::LeftHighRes : Channel::Left); + return Channel::Left; } Channel StereoCalibration::channelRight_() { - return (state_->highres ? Channel::RightHighRes : Channel::Right); + return Channel::Right; } cv::cuda::GpuMat StereoCalibration::getLeft() { diff --git a/applications/gui2/src/modules/camera.cpp b/applications/gui2/src/modules/camera.cpp index bbe19415bead60124c157f4c21a20f7d33864b13..4219d7004e08fd895de8a75111b2ea262cf5e9e9 100644 --- a/applications/gui2/src/modules/camera.cpp +++ b/applications/gui2/src/modules/camera.cpp @@ -77,9 +77,12 @@ void Camera::update(double delta) { if (frame.has(Channel::Calibration)) { const auto &cam = rgbdf.getLeft(); + cv::Size s = rgbdf.getSize(); auto &jcam = mod->getJSON(StatisticsPanel::CAMERA_DETAILS); - jcam["Resolution"] = std::to_string(cam.width) + std::string("x") + std::to_string(cam.height); + jcam["D-Resolution"] = std::to_string(cam.width) + std::string("x") + std::to_string(cam.height); + jcam["C-Resolution"] = std::to_string(s.width) + std::string("x") + std::to_string(s.height); jcam["Focal"] = cam.fx; + jcam["Baseline"] = cam.baseline; jcam["Principle"] = std::to_string(int(cam.cx)) + std::string(",") + std::to_string(int(cam.cy)); } diff --git a/applications/gui2/src/views/camera.cpp b/applications/gui2/src/views/camera.cpp index 016ce424c9c6849620be0ed7a57637645f638da7..f63455ab930911d8634be7b649e7fffe3bd42ddc 100644 --- a/applications/gui2/src/views/camera.cpp +++ b/applications/gui2/src/views/camera.cpp @@ -83,9 +83,7 @@ RecordOptions::RecordOptions(nanogui::Widget *parent, Camera* ctrl) case Channel::Colour : case Channel::Colour2 : case Channel::Depth : - case Channel::Depth2 : - case Channel::ColourHighRes : - case Channel::Colour2HighRes : break; + case Channel::Depth2 : break; default: continue; } } diff --git a/applications/reconstruct/src/reconstruction.cpp b/applications/reconstruct/src/reconstruction.cpp index c55f1f5f1676a22cf1153078de48144e0d0a4b13..a0c69b754f5bdd5b7e3e6d463e42eaa94ed87d0d 100644 --- a/applications/reconstruct/src/reconstruction.cpp +++ b/applications/reconstruct/src/reconstruction.cpp @@ -28,8 +28,6 @@ Reconstruction::Reconstruction(nlohmann::json &config, const std::string name) : pipeline_->append<ftl::operators::DisparityToDepth>("calculate_depth")->value("enabled", false); pipeline_->append<ftl::operators::ColourChannels>("colour"); // Convert BGR to BGRA pipeline_->append<ftl::operators::ClipScene>("clipping")->value("enabled", false); - pipeline_->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false); - pipeline_->append<ftl::operators::ArUco>("aruco")->value("enabled", false); //pipeline_->append<ftl::operators::HFSmoother>("hfnoise"); // Remove high-frequency noise pipeline_->append<ftl::operators::Normals>("normals"); // Estimate surface normals //pipeline_->append<ftl::operators::SmoothChannel>("smoothing"); // Generate a smoothing channel @@ -43,7 +41,8 @@ Reconstruction::Reconstruction(nlohmann::json &config, const std::string name) : pipeline_->append<ftl::operators::VisCrossSupport>("viscross")->value("enabled", false); pipeline_->append<ftl::operators::MultiViewMLS>("mvmls"); pipeline_->append<ftl::operators::Poser>("poser")->value("enabled", false); - + pipeline_->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false); + pipeline_->append<ftl::operators::ArUco>("aruco")->value("enabled", false); //pipeline_->set("enabled", false); } @@ -75,7 +74,7 @@ bool Reconstruction::post(ftl::rgbd::FrameSet &fs) { /*for (size_t i=0; i<fs.frames.size(); ++i) { fs.frames[i].create<cv::cuda::GpuMat>(Channel::Depth); }*/ - + { //UNIQUE_LOCK(exchange_mtx_, lk); //if (new_frame_ == true) LOG(WARNING) << "Frame lost"; diff --git a/applications/reconstruct2/src/main.cpp b/applications/reconstruct2/src/main.cpp index 9e8961f8b1450c8a1f0fd1df827ce9e38776e7df..4fe250a905eb0105a5bd1a1e94fe7fbe67785697 100644 --- a/applications/reconstruct2/src/main.cpp +++ b/applications/reconstruct2/src/main.cpp @@ -66,8 +66,6 @@ static void run(ftl::Configurable *root) { pipeline->append<ftl::operators::DisparityToDepth>("calculate_depth")->value("enabled", false); pipeline->append<ftl::operators::ColourChannels>("colour"); // Convert BGR to BGRA pipeline->append<ftl::operators::ClipScene>("clipping")->value("enabled", false); - pipeline->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false); - pipeline->append<ftl::operators::ArUco>("aruco")->value("enabled", false); //pipeline_->append<ftl::operators::HFSmoother>("hfnoise"); // Remove high-frequency noise pipeline->append<ftl::operators::Normals>("normals"); // Estimate surface normals //pipeline_->append<ftl::operators::SmoothChannel>("smoothing"); // Generate a smoothing channel @@ -81,6 +79,8 @@ static void run(ftl::Configurable *root) { pipeline->append<ftl::operators::VisCrossSupport>("viscross")->value("enabled", false); pipeline->append<ftl::operators::MultiViewMLS>("mvmls"); pipeline->append<ftl::operators::Poser>("poser")->value("enabled", false); + pipeline->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false); + pipeline->append<ftl::operators::ArUco>("aruco")->value("enabled", false); }); // Add sources here @@ -118,7 +118,7 @@ static void run(ftl::Configurable *root) { });*/ auto *filter = feed->filter({Channel::Colour, Channel::Depth, Channel::AudioStereo}); - + //feed->lowLatencyMode(); feed->startStreaming(filter); diff --git a/applications/tools/middlebury_gen/src/main.cpp b/applications/tools/middlebury_gen/src/main.cpp index c1b71e5680f6034dcc0d98274217ca5197508bc4..db7e380ba9aa89af9ad3561bb6420e4460cff4b1 100644 --- a/applications/tools/middlebury_gen/src/main.cpp +++ b/applications/tools/middlebury_gen/src/main.cpp @@ -219,7 +219,7 @@ int main(int argc, char **argv) { ftl::rgbd::Frame &frame = dframe.cast<ftl::rgbd::Frame>(); frame.store(); - ftl::operators::DisparityToDepth disp2depth(ftl::create<ftl::Configurable>(root, "disparity")); + ftl::operators::DisparityToDepth disp2depth(nullptr, ftl::create<ftl::Configurable>(root, "disparity")); ftl::codecs::OpenCVEncoder encoder(ftl::codecs::definition_t::Any, ftl::codecs::definition_t::Any); diff --git a/applications/vision/src/main.cpp b/applications/vision/src/main.cpp index 93c7043e40785e0445a06ed2d28f78b32e6e6f28..a8c8d9006ea622cad8463c3ed0c45e986fe2f1c9 100644 --- a/applications/vision/src/main.cpp +++ b/applications/vision/src/main.cpp @@ -106,9 +106,9 @@ static void run(ftl::Configurable *root) { //LOG(INFO) << "LATENCY: " << float(latency)/1000.0f << "ms"; if (clock_adjust != 0) { - LOG(INFO) << "Clock adjustment: " << clock_adjust << ", latency=" << float(latency)/1000.0f << "ms"; + LOG(INFO) << "Clock adjustment: " << clock_adjust << ", latency=" << float(latency)/2000.0f << "ms"; ftl::timer::setClockAdjustment(clock_adjust); - } + } }); } catch (const std::exception &e) { LOG(ERROR) << "Ping failed, could not time sync: " << e.what(); @@ -142,7 +142,7 @@ static void run(ftl::Configurable *root) { } Source *source = nullptr; source = ftl::create<Source>(root, "source"); - + ftl::stream::Sender *sender = ftl::create<ftl::stream::Sender>(root, "sender"); ftl::stream::Net *outstream = ftl::create<ftl::stream::Net>(root, "stream", net); outstream->set("uri", root->value("uri", outstream->getID())); @@ -189,18 +189,12 @@ static void run(ftl::Configurable *root) { auto *pipeline = ftl::config::create<ftl::operators::Graph>(root, "pipeline"); pipeline->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false); - pipeline->append<ftl::operators::ArUco>("aruco")->value("enabled", false); pipeline->append<ftl::operators::DepthChannel>("depth"); // Ensure there is a depth channel - pipeline->append<ftl::operators::ClipScene>("clipping")->value("enabled", false); - - pipeline->restore("vision_pipeline", { - "clipping" - }); - - std::atomic_flag busy; - busy.clear(); + //pipeline->append<ftl::operators::ClipScene>("clipping")->value("enabled", false); + pipeline->restore("vision_pipeline", { "clipping" }); + pipeline->append<ftl::operators::ArUco>("aruco")->value("enabled", false); - auto h = creator->onFrameSet([sender,outstream,&stats_count,&latency,&frames,&stats_time,pipeline,&busy,&encodable,&previous_encodable](const ftl::data::FrameSetPtr &fs) { + auto h = creator->onFrameSet([sender,outstream,&stats_count,&latency,&frames,&stats_time,pipeline,&encodable,&previous_encodable](const ftl::data::FrameSetPtr &fs) { // Decide what to encode here, based upon what remote users select const auto sel = outstream->selectedNoExcept(fs->frameset()); @@ -222,21 +216,30 @@ static void run(ftl::Configurable *root) { fs->set(ftl::data::FSFlag::AUTO_SEND); - // Do all processing in another thread... - ftl::pool.push([sender,&stats_count,&latency,&frames,&stats_time,pipeline,&busy,fs](int id) mutable { - if (busy.test_and_set()) { - LOG(WARNING) << "Depth pipeline drop: " << fs->timestamp(); - fs->firstFrame().message(ftl::data::Message::Warning_PIPELINE_DROP, "Depth pipeline drop"); - return; - } - pipeline->apply(*fs, *fs); - busy.clear(); - + bool did_pipe = pipeline->apply(*fs, *fs, [fs,&frames,&latency]() { + if (fs->hasAnyChanged(Channel::Depth)) fs->flush(Channel::Depth); ++frames; latency += float(ftl::timer::get_time() - fs->timestamp()); + const_cast<ftl::data::FrameSetPtr&>(fs).reset(); + }); + + if (!did_pipe) { + LOG(WARNING) << "Depth pipeline drop: " << fs->timestamp(); + fs->firstFrame().message(ftl::data::Message::Warning_PIPELINE_DROP, "Depth pipeline drop"); + } + + + // Do some encoding (eg. colour) whilst pipeline runs + ftl::pool.push([fs,&stats_count,&latency,&frames,&stats_time](int id){ + if (fs->hasAnyChanged(Channel::Audio)) { + fs->flush(ftl::codecs::Channel::Audio); + } + + // Make sure upload has completed. + cudaSafeCall(cudaEventSynchronize(fs->frames[0].uploadEvent())); + // TODO: Try depth pipeline again here if failed first time. + fs->flush(ftl::codecs::Channel::Colour); - // Destruct frameset as soon as possible to send the data... - if (fs->hasAnyChanged(Channel::Depth)) fs->flush(Channel::Depth); const_cast<ftl::data::FrameSetPtr&>(fs).reset(); if (!quiet && --stats_count <= 0) { @@ -252,19 +255,14 @@ static void run(ftl::Configurable *root) { } }); - // Lock colour right now to encode in parallel, same for audio - ftl::pool.push([fs](int id){ fs->flush(ftl::codecs::Channel::Colour); }); - - if (fs->hasAnyChanged(Channel::Audio)) { - ftl::pool.push([fs](int id){ fs->flush(ftl::codecs::Channel::Audio); }); - } + const_cast<ftl::data::FrameSetPtr&>(fs).reset(); return true; }); // Start the timed generation of frames creator->start(); - + // Only now start listening for connections net->start(); @@ -274,7 +272,7 @@ static void run(ftl::Configurable *root) { ctrl.stop(); ftl::config::save(); - + net->shutdown(); ftl::pool.stop(); @@ -321,7 +319,7 @@ int main(int argc, char **argv) { // Use other GPU if available. //ftl::cuda::setDevice(ftl::cuda::deviceCount()-1); - + std::cout << "Loading..." << std::endl; run(root); diff --git a/components/calibration/include/ftl/calibration/optimize.hpp b/components/calibration/include/ftl/calibration/optimize.hpp index a92f0354d471ae0df6192c8ecc92879af2b4b54a..e6bb0b01173bc00e8ae6a44eac0057868c329491 100644 --- a/components/calibration/include/ftl/calibration/optimize.hpp +++ b/components/calibration/include/ftl/calibration/optimize.hpp @@ -219,6 +219,8 @@ public: /**/ int removeObservations(double threshold); + std::vector<cv::Point3d> points(); + protected: double* getCameraPtr(int i) { return cameras_.at(i)->data; } diff --git a/components/calibration/src/extrinsic.cpp b/components/calibration/src/extrinsic.cpp index 25c79e4ff7a7303a1986a96f87eed5f1c6c63613..5d5148b67732185ccf11580a16d12a38e8657043 100644 --- a/components/calibration/src/extrinsic.cpp +++ b/components/calibration/src/extrinsic.cpp @@ -462,7 +462,16 @@ void ExtrinsicCalibration::triangulate(unsigned int c1, unsigned int c2) { // implements least squares method described in H&Z p312 cv::triangulatePoints(P1, P2, pts1u, pts2u, out); // scalePoints() converts to non-homogenous coordinates and estimates scale - scalePoints(points().getObject(0), out, pointsw); + LOG(INFO) << "new scale: " << scalePoints(points().getObject(0), out, pointsw); + + /*for (int col = 0; col < out.cols; col++) { + CHECK_NE(out.at<double>(3, col), 0); + cv::Point3d p = cv::Point3d(out.at<double>(0, col), + out.at<double>(1, col), + out.at<double>(2, col)) + / out.at<double>(3, col); + pointsw.push_back(p); + }*/ points().setTriangulatedPoints(c1, c2, pointsw); } @@ -723,7 +732,7 @@ double ExtrinsicCalibration::optimize() { // TODO: desgin better check if (cv::norm(absdiff(px, py, pz)) > threshold_bad_) { n_points_bad++; - continue; + //continue; } ba.addPoint(vis, obs, p); @@ -760,7 +769,8 @@ double ExtrinsicCalibration::optimize() { for (const auto& t : prune_observations_) { n_removed += ba.removeObservations(t); if (float(n_removed)/float(n_points) > threhsold_warning_) { - LOG(WARNING) << "significant number of observations removed"; + LOG(WARNING) << "significant number (" << n_removed << " of " + << n_points << ") of observations removed"; break; } else { @@ -772,12 +782,17 @@ double ExtrinsicCalibration::optimize() { calib_optimized_.resize(calib_.size()); rmse_.resize(calib_.size()); + auto points_optimized = ba.points(); + double scale = optimizeScale(points_.getObject(0), points_optimized); + LOG(INFO) << "scale: " << scale; + for (unsigned int i = 0; i < cameras.size(); i++) { rmse_[i] = ba.reprojectionError(i); auto intr = cameras[i].intrinsic(); calib_optimized_[i] = calib_[i]; calib_optimized_[i].intrinsic.set(intr.matrix(), intr.distCoeffs.Mat(), intr.resolution); calib_optimized_[i].extrinsic.set(cameras[i].rvec(), cameras[i].tvec()); + calib_optimized_[i].extrinsic.tvec *= scale; } rmse_total_ = ba.reprojectionError(); diff --git a/components/calibration/src/optimize.cpp b/components/calibration/src/optimize.cpp index 73788380cb14d09f0eaa50d4791a5e52b6b25a6e..751fd546f67f1ece6d63497adffa9466f54a4e71 100644 --- a/components/calibration/src/optimize.cpp +++ b/components/calibration/src/optimize.cpp @@ -334,7 +334,7 @@ struct ScaleError { double ftl::calibration::optimizeScale(const vector<Point3d> &object_points, vector<Point3d> &points) { - // use exceptions instead + // throw exception instead CHECK_EQ(points.size() % object_points.size(), 0); CHECK_EQ(points.size() % 2, 0); @@ -684,6 +684,13 @@ int BundleAdjustment::removeObservations(double threshold) { return removed; } +std::vector<cv::Point3d> BundleAdjustment::points() { + std::vector<cv::Point3d> pts; + pts.reserve(points_.size()); + for (const auto& p : points_) { pts.push_back(p.point); } + return pts; +} + void BundleAdjustment::_reprojectionErrorSE(const int camera, double &error, double &npoints) const { error = 0.0; npoints = 0.0; diff --git a/components/codecs/include/ftl/codecs/channels.hpp b/components/codecs/include/ftl/codecs/channels.hpp index 90e3d3cec00f903844b658ef32d424c596069c0b..db0db3b679e96fd5993240b535ba1703e5ef6b04 100644 --- a/components/codecs/include/ftl/codecs/channels.hpp +++ b/components/codecs/include/ftl/codecs/channels.hpp @@ -31,12 +31,10 @@ enum struct Channel : int { Support2 = 14, // 8UC4 (currently) Segmentation = 15, // 32S? Normals2 = 16, // 16FC4 - ColourHighRes = 17, // 8UC3 or 8UC4 - LeftHighRes = 17, // 8UC3 or 8UC4 + UNUSED1 = 17, Disparity = 18, Smoothing = 19, // 32F - RightHighRes = 20, // 8UC3 or 8UC4 - Colour2HighRes = 20, + UNUSED2 = 20, Overlay = 21, // 8UC4 GroundTruth = 22, // 32F diff --git a/components/codecs/include/ftl/codecs/decoder.hpp b/components/codecs/include/ftl/codecs/decoder.hpp index 4af10e1186855da9b19be7bd8022405c8ac841bb..b649f63a8e67c76a6882bd489b4e2eba9d2970ec 100644 --- a/components/codecs/include/ftl/codecs/decoder.hpp +++ b/components/codecs/include/ftl/codecs/decoder.hpp @@ -33,17 +33,19 @@ void free(Decoder *&e); */ class Decoder { public: - Decoder() { cudaStreamCreate(&stream_); }; - virtual ~Decoder() { cudaStreamDestroy(stream_); }; + Decoder() { cudaStreamCreate(&stream_); cudaEventCreate(&event_); }; + virtual ~Decoder() { cudaStreamDestroy(stream_); cudaEventDestroy(event_); }; virtual bool decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out)=0; virtual bool accepts(const ftl::codecs::Packet &)=0; cudaStream_t stream() { return stream_; } + cudaEvent_t event() { return event_; } protected: cudaStream_t stream_; + cudaEvent_t event_; }; } diff --git a/components/codecs/src/channels.cpp b/components/codecs/src/channels.cpp index cc30c51683d4947507dc9bec81bbca3159dcef95..458541d5f1b2ffe94166faf828efa9f827e2c983 100644 --- a/components/codecs/src/channels.cpp +++ b/components/codecs/src/channels.cpp @@ -27,10 +27,10 @@ static const std::unordered_map<Channel,ChannelInfo> info = { {Channel::Support2, {"Support2", CV_8UC4}}, {Channel::Segmentation, {"Segmentation", CV_8U}}, {Channel::Normals2, {"Normals Right", CV_32FC4}}, - {Channel::ColourHighRes, {"Left High-res", CV_8UC4}}, + {Channel::UNUSED1, {"Unused", CV_8UC4}}, {Channel::Disparity, {"Disparity", CV_16S}}, {Channel::Smoothing, {"Smoothing", CV_32F}}, - {Channel::Colour2HighRes, {"Right High-res", CV_8UC4}}, + {Channel::UNUSED2, {"Unused", CV_8UC4}}, {Channel::Overlay, {"Overlay", CV_8UC4}}, {Channel::GroundTruth, {"Ground Truth", CV_32F}}, diff --git a/components/common/cpp/include/ftl/cuda_common.hpp b/components/common/cpp/include/ftl/cuda_common.hpp index 5c3cc9f484ecbaf615cf44469e3c37b636a4f7e6..59053e4b2196779bfa4bb16e84431b00fd36594e 100644 --- a/components/common/cpp/include/ftl/cuda_common.hpp +++ b/components/common/cpp/include/ftl/cuda_common.hpp @@ -21,6 +21,8 @@ #define STRIDE_Y(I,N) int I = blockIdx.y * blockDim.y + threadIdx.y; I < N; I += blockDim.y * gridDim.y #define STRIDE_X(I,N) int I = blockIdx.x * blockDim.x + threadIdx.x; I < N; I += blockDim.x * gridDim.x +void cudaCallback(cudaStream_t stream, const std::function<void()> &cb); + namespace ftl { namespace cuda { diff --git a/components/common/cpp/include/ftl/threads.hpp b/components/common/cpp/include/ftl/threads.hpp index c40ed095b5075afe0b4df7409c48ace45b8328cc..1e60a20077da11dedc9fc026f6f9932a2c2d9609 100644 --- a/components/common/cpp/include/ftl/threads.hpp +++ b/components/common/cpp/include/ftl/threads.hpp @@ -7,7 +7,7 @@ #define POOL_SIZE 10 -//#define DEBUG_MUTEX +#define DEBUG_MUTEX #define MUTEX_TIMEOUT 2 #if defined DEBUG_MUTEX diff --git a/components/common/cpp/include/ftl/timer.hpp b/components/common/cpp/include/ftl/timer.hpp index 9ccb0b31fbfb282d5f1d072ffd5f73bec57d92c8..6530aeaddb843e7f56dce9dc334b5fa0e9f8d0d6 100644 --- a/components/common/cpp/include/ftl/timer.hpp +++ b/components/common/cpp/include/ftl/timer.hpp @@ -84,6 +84,19 @@ void setClockSlave(bool); */ ftl::Handle add(timerlevel_t, const std::function<bool(int64_t ts)> &); +/** + * Same as other add function except that a multiplier is given to indicate + * how often this should be triggered in numbers of ticks. + */ +ftl::Handle add(timerlevel_t, size_t multiplier, const std::function<bool(int64_t ts)> &); + +/** + * Same as other add function except that a period in seconds is given. Note that + * the period should be a multiple of frames otherwise it will not be accurate + * but will still work. + */ +ftl::Handle add(timerlevel_t, double seconds, const std::function<bool(int64_t ts)> &); + /** * Initiate the timer and optionally block the current process. */ diff --git a/components/common/cpp/src/cuda_common.cpp b/components/common/cpp/src/cuda_common.cpp index 273b540c4e2a640c86255b8ed040edde9d8cc40a..949a22704fa1e7454f5094d92296c93f7731190a 100644 --- a/components/common/cpp/src/cuda_common.cpp +++ b/components/common/cpp/src/cuda_common.cpp @@ -56,6 +56,18 @@ void ftl::cuda::setDevice() { cudaSafeCall(cudaSetDevice(dev_to_use)); } +static void _cudaCallback(void *ud) { + auto *cb = (std::function<void()>*)ud; + (*cb)(); + delete cb; +} + +// TODO: Move this to a common location +void cudaCallback(cudaStream_t stream, const std::function<void()> &cb) { + cudaSafeCall(cudaLaunchHostFunc(stream, _cudaCallback, (void*)(new std::function<void()>(cb)))); +} + + TextureObjectBase::~TextureObjectBase() { free(); } diff --git a/components/common/cpp/src/timer.cpp b/components/common/cpp/src/timer.cpp index ec947317836ed3eb9b26f0ea38625240b8db7e87..e49a955cf1af3acae8e70d7bcb448832146b7269 100644 --- a/components/common/cpp/src/timer.cpp +++ b/components/common/cpp/src/timer.cpp @@ -35,8 +35,8 @@ struct TimerJob { std::atomic_bool active=false; // TODO: (Nick) Implement richer forms of timer //bool paused; - //int multiplier; - //int countdown; + int multiplier=0; // Number of ticks before trigger + int counter=0; // Current tick counter std::string name; }; @@ -76,14 +76,21 @@ static void waitTimePoint() { UNIQUE_LOCK(mtx, lk); auto idle_job = jobs[kTimerIdle10].begin(); while (idle_job != jobs[kTimerIdle10].end() && msdelay >= 10 && sincelast != mspf) { - (*idle_job).active = true; - bool doremove = !(*idle_job).job.trigger(now); - - if (doremove) { - idle_job = jobs[kTimerIdle10].erase(idle_job); - LOG(INFO) << "Timer job removed"; + auto &job = *idle_job; + + if (++job.counter >= job.multiplier) { + job.counter = 0; + job.active = true; + bool doremove = !job.job.trigger(now); + + if (doremove) { + idle_job = jobs[kTimerIdle10].erase(idle_job); + LOG(INFO) << "Timer job removed"; + } else { + (*idle_job++).active = false; + } } else { - (*idle_job++).active = false; + ++idle_job; } now = get_time(); msdelay = mspf - (now % mspf); @@ -100,14 +107,21 @@ static void waitTimePoint() { UNIQUE_LOCK(mtx, lk); auto idle_job = jobs[kTimerIdle1].begin(); while (idle_job != jobs[kTimerIdle1].end() && msdelay >= 2 && sincelast != mspf) { - (*idle_job).active = true; - bool doremove = !(*idle_job).job.trigger(now); - - if (doremove) { - idle_job = jobs[kTimerIdle1].erase(idle_job); - LOG(INFO) << "Timer job removed"; + auto &job = *idle_job; + + if (++job.counter >= job.multiplier) { + job.counter = 0; + job.active = true; + bool doremove = !job.job.trigger(now); + + if (doremove) { + idle_job = jobs[kTimerIdle1].erase(idle_job); + LOG(INFO) << "Timer job removed"; + } else { + (*idle_job++).active = false; + } } else { - (*idle_job++).active = false; + ++idle_job; } now = get_time(); msdelay = mspf - (now % mspf); @@ -171,6 +185,32 @@ ftl::Handle ftl::timer::add(timerlevel_t l, const std::function<bool(int64_t ts) return h; } +ftl::Handle ftl::timer::add(timerlevel_t l, size_t multiplier, const std::function<bool(int64_t ts)> &f) { + if (l < 0 || l >= kTimerMAXLEVEL) return {}; + + UNIQUE_LOCK(mtx, lk); + int newid = last_id++; + auto &j = jobs[l].emplace_back(); + j.id = newid; + j.name = "NoName"; + j.multiplier = multiplier; + ftl::Handle h = j.job.on(f); + return h; +} + +ftl::Handle ftl::timer::add(timerlevel_t l, double seconds, const std::function<bool(int64_t ts)> &f) { + if (l < 0 || l >= kTimerMAXLEVEL) return {}; + + UNIQUE_LOCK(mtx, lk); + int newid = last_id++; + auto &j = jobs[l].emplace_back(); + j.id = newid; + j.name = "NoName"; + j.multiplier = int(seconds*1000.0 / double(getInterval())); + ftl::Handle h = j.job.on(f); + return h; +} + static void removeJob(int id) { UNIQUE_LOCK(mtx, lk); if (id < 0) return; diff --git a/components/common/cpp/test/timer_unit.cpp b/components/common/cpp/test/timer_unit.cpp index 2c7602646eced4f792e818214f6424fc71752a16..1f8f63b693b40f6cfbe60a80c50f51463456e1cb 100644 --- a/components/common/cpp/test/timer_unit.cpp +++ b/components/common/cpp/test/timer_unit.cpp @@ -133,6 +133,58 @@ TEST_CASE( "Timer::add() Idle10 job" ) { } } +TEST_CASE( "Timer::add() Idle10 job periodic" ) { + SECTION( "Quick idle job" ) { + bool didrun = false; + + ftl::timer::reset(); + + int count = 0; + auto rcc = ftl::timer::add(ftl::timer::kTimerIdle10, [&count](int64_t ts) { + ++count; + return true; + }); + + auto rc = ftl::timer::add(ftl::timer::kTimerIdle10, size_t(20), [&didrun](int64_t ts) { + didrun = true; + ftl::timer::stop(false); + return true; + }); + + REQUIRE( (rc.id() >= 0) ); + + ftl::timer::start(true); + REQUIRE( didrun == true ); + REQUIRE( count == 20 ); + } +} + +TEST_CASE( "Timer::add() Idle1 job periodic" ) { + SECTION( "Quick idle job" ) { + bool didrun = false; + + ftl::timer::reset(); + + int count = 0; + auto rcc = ftl::timer::add(ftl::timer::kTimerIdle1, [&count](int64_t ts) { + ++count; + return true; + }); + + auto rc = ftl::timer::add(ftl::timer::kTimerIdle1, size_t(20), [&didrun](int64_t ts) { + didrun = true; + ftl::timer::stop(false); + return true; + }); + + REQUIRE( (rc.id() >= 0) ); + + ftl::timer::start(true); + REQUIRE( didrun == true ); + REQUIRE( count == 20 ); + } +} + TEST_CASE( "Timer::add() Main job" ) { SECTION( "Quick main job" ) { bool didrun = false; diff --git a/components/net/cpp/include/ftl/net/universe.hpp b/components/net/cpp/include/ftl/net/universe.hpp index 3d3a4b3ffe175c048958219e1fa1477e0043e73b..8ee642fe615b1044c44535ee8f17d0911d90e669 100644 --- a/components/net/cpp/include/ftl/net/universe.hpp +++ b/components/net/cpp/include/ftl/net/universe.hpp @@ -212,8 +212,8 @@ class Universe : public ftl::Configurable { void removeCallback(ftl::net::callback_t cbid); - size_t getSendBufferSize() const { return send_size_; } - size_t getRecvBufferSize() const { return recv_size_; } + size_t getSendBufferSize(ftl::URI::scheme_t s); + size_t getRecvBufferSize(ftl::URI::scheme_t s); private: void _run(); diff --git a/components/net/cpp/src/peer.cpp b/components/net/cpp/src/peer.cpp index db544841e77cfd6629b2ffddd2745ff9949e0188..57fdab3a9540f01ee32bdb0ebc016dd6ffb5e53d 100644 --- a/components/net/cpp/src/peer.cpp +++ b/components/net/cpp/src/peer.cpp @@ -183,11 +183,11 @@ Peer::Peer(SOCKET s, Universe *u, Dispatcher *d) : sock_(s), can_reconnect_(fals #ifndef TEST_MOCKS int flags =1; if (setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (const char *)&flags, sizeof(flags))) { LOG(ERROR) << "ERROR: setsocketopt(), TCP_NODELAY"; }; - int a = static_cast<int>(u->getRecvBufferSize()); + int a = static_cast<int>(u->getRecvBufferSize(scheme_)); if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, (const char *)&a, sizeof(int)) == -1) { fprintf(stderr, "Error setting socket opts: %s\n", strerror(errno)); } - a = static_cast<int>(u->getSendBufferSize()); + a = static_cast<int>(u->getSendBufferSize(scheme_)); if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, (const char *)&a, sizeof(int)) == -1) { fprintf(stderr, "Error setting socket opts: %s\n", strerror(errno)); } @@ -244,12 +244,12 @@ Peer::Peer(const char *pUri, Universe *u, Dispatcher *d) : can_reconnect_(true), scheme_ = uri.getProtocol(); if (uri.getProtocol() == URI::SCHEME_TCP) { - sock_ = tcpConnect(uri, u->getSendBufferSize(), u->getRecvBufferSize()); + sock_ = tcpConnect(uri, u->getSendBufferSize(scheme_), u->getRecvBufferSize(scheme_)); if (sock_ != INVALID_SOCKET) status_ = kConnecting; else status_ = kReconnecting; } else if (uri.getProtocol() == URI::SCHEME_WS) { LOG(INFO) << "Websocket connect " << uri.getPath(); - sock_ = tcpConnect(uri, u->getSendBufferSize(), u->getRecvBufferSize()); + sock_ = tcpConnect(uri, u->getSendBufferSize(scheme_), u->getRecvBufferSize(scheme_)); if (sock_ != INVALID_SOCKET) { if (!ws_connect(sock_, uri)) { LOG(ERROR) << "Websocket connection failed"; @@ -313,7 +313,7 @@ bool Peer::reconnect() { LOG(INFO) << "Reconnecting to " << uri_ << " ..."; if (scheme_ == URI::SCHEME_TCP) { - sock_ = tcpConnect(uri, universe_->getSendBufferSize(), universe_->getRecvBufferSize()); + sock_ = tcpConnect(uri, universe_->getSendBufferSize(scheme_), universe_->getRecvBufferSize(scheme_)); if (sock_ != INVALID_SOCKET) { status_ = kConnecting; is_waiting_ = true; @@ -322,7 +322,7 @@ bool Peer::reconnect() { return false; } } else if (scheme_ == URI::SCHEME_WS) { - sock_ = tcpConnect(uri, universe_->getSendBufferSize(), universe_->getRecvBufferSize()); + sock_ = tcpConnect(uri, universe_->getSendBufferSize(scheme_), universe_->getRecvBufferSize(scheme_)); if (sock_ != INVALID_SOCKET) { if (!ws_connect(sock_, uri)) { return false; diff --git a/components/net/cpp/src/universe.cpp b/components/net/cpp/src/universe.cpp index a36bf29ceb84d9dd178cefcfe96287e2c16cc9a9..809bba4af8f1ec7becf35f93904b2d52e5ac14e8 100644 --- a/components/net/cpp/src/universe.cpp +++ b/components/net/cpp/src/universe.cpp @@ -41,8 +41,10 @@ struct NetImplDetail { //#define TCP_SEND_BUFFER_SIZE (512*1024) //#define TCP_RECEIVE_BUFFER_SIZE (1024*1024*1) -#define TCP_SEND_BUFFER_SIZE (128*1024) // Was 256 -#define TCP_RECEIVE_BUFFER_SIZE (128*1024) // Was 256 +#define TCP_SEND_BUFFER_SIZE (512*1024) +#define TCP_RECEIVE_BUFFER_SIZE (1024*1024) +#define WS_SEND_BUFFER_SIZE (512*1024) +#define WS_RECEIVE_BUFFER_SIZE (512*1024) callback_t ftl::net::Universe::cbid__ = 0; @@ -68,8 +70,8 @@ Universe::Universe(nlohmann::json &config) : this_peer(ftl::net::this_peer), impl_(new ftl::net::NetImplDetail), phase_(0), - send_size_(value("tcp_send_buffer",TCP_SEND_BUFFER_SIZE)), - recv_size_(value("tcp_recv_buffer",TCP_RECEIVE_BUFFER_SIZE)), + //send_size_(value("tcp_send_buffer",TCP_SEND_BUFFER_SIZE)), + //recv_size_(value("tcp_recv_buffer",TCP_RECEIVE_BUFFER_SIZE)), periodic_time_(value("periodics", 1.0)), reconnect_attempts_(value("reconnect_attempts",50)), thread_(Universe::__start, this) { @@ -99,6 +101,18 @@ Universe::~Universe() { delete impl_; } +size_t Universe::getSendBufferSize(ftl::URI::scheme_t s) { + return (s == ftl::URI::scheme_t::SCHEME_WS) ? + value("ws_send_buffer",WS_SEND_BUFFER_SIZE) : + value("tcp_send_buffer",TCP_SEND_BUFFER_SIZE); +} + +size_t Universe::getRecvBufferSize(ftl::URI::scheme_t s) { + return (s == ftl::URI::scheme_t::SCHEME_WS) ? + value("ws_recv_buffer",WS_SEND_BUFFER_SIZE) : + value("tcp_recv_buffer",TCP_SEND_BUFFER_SIZE); +} + void Universe::start() { /*cpu_set_t cpus; CPU_ZERO(&cpus); diff --git a/components/net/cpp/test/peer_unit.cpp b/components/net/cpp/test/peer_unit.cpp index 6b2cd2bd2ca9606e24c0bb0a2851dff7bb78e1e8..57828f57c78260068227f6044fc5d7d65dabfe96 100644 --- a/components/net/cpp/test/peer_unit.cpp +++ b/components/net/cpp/test/peer_unit.cpp @@ -51,8 +51,8 @@ class Universe { callback_t onConnect(const std::function<void(Peer*)> &f) { return 0; } callback_t onDisconnect(const std::function<void(Peer*)> &f) { return 0; } - size_t getSendBufferSize() const { return 10*1024; } - size_t getRecvBufferSize() const { return 10*1024; } + size_t getSendBufferSize(ftl::URI::scheme_t s) const { return 10*1024; } + size_t getRecvBufferSize(ftl::URI::scheme_t s) const { return 10*1024; } }; } } diff --git a/components/operators/include/ftl/operators/antialiasing.hpp b/components/operators/include/ftl/operators/antialiasing.hpp index 295729bd361e6a17287b845460068a59e4d7555e..5548c08578e96879f36f2c3a11b281b78ca4bf43 100644 --- a/components/operators/include/ftl/operators/antialiasing.hpp +++ b/components/operators/include/ftl/operators/antialiasing.hpp @@ -12,7 +12,7 @@ namespace operators { */ class FXAA : public ftl::operators::Operator { public: - explicit FXAA(ftl::Configurable*); + explicit FXAA(ftl::operators::Graph *g, ftl::Configurable*); ~FXAA(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } diff --git a/components/operators/include/ftl/operators/buffer.hpp b/components/operators/include/ftl/operators/buffer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..424e2dea08e853416b8ca673a0f577eaf1c119a7 --- /dev/null +++ b/components/operators/include/ftl/operators/buffer.hpp @@ -0,0 +1,37 @@ +#ifndef _FTL_OPERATORS_BUFFERS_HPP_ +#define _FTL_OPERATORS_BUFFERS_HPP_ + +namespace ftl { +namespace operators { + +/** + * Similar to frame channels, but these are pipeline buffers that can be + * used from one operator to the next. + */ +enum class Buffer { + LowLeft = 0, // 8UC4 + Screen = 1, + Weights = 2, // short + Confidence = 3, // 32F + Contribution = 4, // 32F + Flow = 5, // 16SC2 + Flow2 = 6, // 16SC2 + Energy = 7, // 32F + Mask = 8, // 32U + Density = 9, // 32F + Support1 = 10, // 8UC4 (currently) + Support2 = 11, // 8UC4 (currently) + Segmentation = 12, // 32S? + Disparity = 13, + Smoothing = 14, // 32F + LowGrayLeft = 15, + LowGrayRight = 16, + GrayLeft = 17, + GrayRight = 18, + LowRight = 19 +}; + +} +} + +#endif \ No newline at end of file diff --git a/components/operators/include/ftl/operators/clipping.hpp b/components/operators/include/ftl/operators/clipping.hpp index 590e714c9eeca9713e56139aa874f7c0db0a472d..25d8b76ff7bdbfc99717b01652497395e5ff7ea1 100644 --- a/components/operators/include/ftl/operators/clipping.hpp +++ b/components/operators/include/ftl/operators/clipping.hpp @@ -12,7 +12,7 @@ namespace operators { */ class ClipScene : public ftl::operators::Operator { public: - explicit ClipScene(ftl::Configurable*); + explicit ClipScene(ftl::operators::Graph *g, ftl::Configurable*); ~ClipScene(); inline Operator::Type type() const override { return Operator::Type::ManyToMany; } diff --git a/components/operators/include/ftl/operators/colours.hpp b/components/operators/include/ftl/operators/colours.hpp index 788f7b4f50dec2472453d30ebf4351616de228b2..a54539a67f829a7d1e9aa6c6306bb76a707081fa 100644 --- a/components/operators/include/ftl/operators/colours.hpp +++ b/components/operators/include/ftl/operators/colours.hpp @@ -8,7 +8,7 @@ namespace operators { class ColourChannels : public ftl::operators::Operator { public: - explicit ColourChannels(ftl::Configurable *cfg); + explicit ColourChannels(ftl::operators::Graph *g, ftl::Configurable *cfg); ~ColourChannels(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } diff --git a/components/operators/include/ftl/operators/cuda/mask.hpp b/components/operators/include/ftl/operators/cuda/mask.hpp index 8e136371879c451a9b03997998c46e7b50a8c943..54a1522b628e14e5b99b1fb823416347374e00fc 100644 --- a/components/operators/include/ftl/operators/cuda/mask.hpp +++ b/components/operators/include/ftl/operators/cuda/mask.hpp @@ -83,6 +83,11 @@ void cull_mask( unsigned int radius, cudaStream_t stream); +void show_mask( + ftl::cuda::TextureObject<uchar4> &colour, + ftl::cuda::TextureObject<uint8_t> &mask, + int id, uchar4 style, cudaStream_t stream); + } } diff --git a/components/operators/include/ftl/operators/depth.hpp b/components/operators/include/ftl/operators/depth.hpp index b7ceba8dab56c1f168c6f6fc3bf29e005ff07b33..14f46b12d12b3adf933bd3ea7bd67463b36f19f2 100644 --- a/components/operators/include/ftl/operators/depth.hpp +++ b/components/operators/include/ftl/operators/depth.hpp @@ -10,8 +10,8 @@ namespace operators { class DepthBilateralFilter : public::ftl::operators::Operator { public: - explicit DepthBilateralFilter(ftl::Configurable*); - DepthBilateralFilter(ftl::Configurable*, const std::tuple<ftl::codecs::Channel> &); + explicit DepthBilateralFilter(ftl::operators::Graph *g, ftl::Configurable*); + DepthBilateralFilter(ftl::operators::Graph *g, ftl::Configurable*, const std::tuple<ftl::codecs::Channel> &); ~DepthBilateralFilter() {}; diff --git a/components/operators/include/ftl/operators/detectandtrack.hpp b/components/operators/include/ftl/operators/detectandtrack.hpp index f6c5c869fc9583e5f1b2948806d57237be550ea3..2e5ce171359c6a6f6bc5b8d693b64677a602be38 100644 --- a/components/operators/include/ftl/operators/detectandtrack.hpp +++ b/components/operators/include/ftl/operators/detectandtrack.hpp @@ -39,7 +39,7 @@ namespace operators { */ class DetectAndTrack : public ftl::operators::Operator { public: - explicit DetectAndTrack(ftl::Configurable*); + explicit DetectAndTrack(ftl::operators::Graph *g, ftl::Configurable*); ~DetectAndTrack() {}; inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -116,22 +116,19 @@ class DetectAndTrack : public ftl::operators::Operator { */ class ArUco : public ftl::operators::Operator { public: - explicit ArUco(ftl::Configurable*); + explicit ArUco(ftl::operators::Graph *g, ftl::Configurable*); ~ArUco() {}; inline Operator::Type type() const override { return Operator::Type::OneToOne; } bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) override; - void wait(cudaStream_t) override; - ftl::codecs::Channel channel_in_; ftl::codecs::Channel channel_out_; private: - std::future<bool> job_; - bool debug_; bool estimate_pose_; float marker_size_; + cv::Mat tmp_; cv::Ptr<cv::aruco::Dictionary> dictionary_; cv::Ptr<cv::aruco::DetectorParameters> params_; diff --git a/components/operators/include/ftl/operators/disparity.hpp b/components/operators/include/ftl/operators/disparity.hpp index 62b1f00857a1d8971f61248cda857814d49fc24e..4aba9bbd8e179c86ddc0d6305fb14b3579a6fb23 100644 --- a/components/operators/include/ftl/operators/disparity.hpp +++ b/components/operators/include/ftl/operators/disparity.hpp @@ -8,6 +8,7 @@ #endif #include <opencv2/cudastereo.hpp> +#include <opencv2/cudafilters.hpp> #ifdef HAVE_LIBSGM #include <libsgm.h> @@ -19,7 +20,7 @@ namespace operators { class StereoDisparity : public ftl::operators::Operator { public: - explicit StereoDisparity(ftl::Configurable* cfg); + StereoDisparity(ftl::operators::Graph *g, ftl::Configurable* cfg); ~StereoDisparity(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -44,7 +45,7 @@ private: */ class FixstarsSGM : public ftl::operators::Operator { public: - explicit FixstarsSGM(ftl::Configurable* cfg); + FixstarsSGM(ftl::operators::Graph *g, ftl::Configurable* cfg); ~FixstarsSGM(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -57,11 +58,14 @@ class FixstarsSGM : public ftl::operators::Operator { bool updateParameters(); bool updateP2Parameters(); void computeP2(cudaStream_t &stream); + void _variance_mask(cv::InputArray in, cv::OutputArray out, int wsize, cv::cuda::Stream &cvstream); sgm::StereoSGM *ssgm_; cv::Size size_; cv::cuda::GpuMat lbw_; cv::cuda::GpuMat rbw_; + cv::cuda::GpuMat lbw_full_; + cv::cuda::GpuMat rbw_full_; cv::cuda::GpuMat disp_int_; cv::cuda::GpuMat P2_map_; @@ -69,6 +73,12 @@ class FixstarsSGM : public ftl::operators::Operator { cv::cuda::GpuMat weightsF_; cv::cuda::GpuMat edges_; cv::Ptr<cv::cuda::CannyEdgeDetector> canny_; + cv::Ptr<cv::cuda::Filter> filter_; + + cv::cuda::GpuMat im_; + cv::cuda::GpuMat im2_; + cv::cuda::GpuMat mean_; + cv::cuda::GpuMat mean2_; int P1_; int P2_; @@ -80,7 +90,7 @@ class FixstarsSGM : public ftl::operators::Operator { class DisparityBilateralFilter : public::ftl::operators::Operator { public: - explicit DisparityBilateralFilter(ftl::Configurable*); + DisparityBilateralFilter(ftl::operators::Graph *g, ftl::Configurable*); ~DisparityBilateralFilter() {}; @@ -91,6 +101,7 @@ class DisparityBilateralFilter : public::ftl::operators::Operator { cv::Ptr<cv::cuda::DisparityBilateralFilter> filter_; cv::cuda::GpuMat disp_int_; cv::cuda::GpuMat disp_int_result_; + cv::cuda::GpuMat rgb_; double scale_; int radius_; int iter_; @@ -102,8 +113,8 @@ class DisparityBilateralFilter : public::ftl::operators::Operator { */ class DisparityToDepth : public ftl::operators::Operator { public: - explicit DisparityToDepth(ftl::Configurable* cfg) : - ftl::operators::Operator(cfg) {} + DisparityToDepth(ftl::operators::Graph *g, ftl::Configurable* cfg) : + ftl::operators::Operator(g, cfg) {} ~DisparityToDepth() {}; inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -117,7 +128,7 @@ class DisparityToDepth : public ftl::operators::Operator { */ class DepthChannel : public ftl::operators::Operator { public: - explicit DepthChannel(ftl::Configurable *cfg); + DepthChannel(ftl::operators::Graph *g, ftl::Configurable *cfg); ~DepthChannel(); inline Operator::Type type() const override { return Operator::Type::ManyToMany; } @@ -139,8 +150,8 @@ class DepthChannel : public ftl::operators::Operator { #ifdef HAVE_OPTFLOW class OpticalFlowTemporalSmoothing : public ftl::operators::Operator { public: - explicit OpticalFlowTemporalSmoothing(ftl::Configurable*); - OpticalFlowTemporalSmoothing(ftl::Configurable*, const std::tuple<ftl::codecs::Channel> ¶ms); + OpticalFlowTemporalSmoothing(ftl::operators::Graph *g, ftl::Configurable*); + OpticalFlowTemporalSmoothing(ftl::operators::Graph *g, ftl::Configurable*, const std::tuple<ftl::codecs::Channel> ¶ms); ~OpticalFlowTemporalSmoothing(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } diff --git a/components/operators/include/ftl/operators/filling.hpp b/components/operators/include/ftl/operators/filling.hpp index 6de380213ba4fb69bbf966a87db11a7de81f0946..ed2b3f39a20a949fa28dfe61232d44d15f56affe 100644 --- a/components/operators/include/ftl/operators/filling.hpp +++ b/components/operators/include/ftl/operators/filling.hpp @@ -12,7 +12,7 @@ namespace operators { */ class ScanFieldFill : public ftl::operators::Operator { public: - explicit ScanFieldFill(ftl::Configurable*); + ScanFieldFill(ftl::operators::Graph *g, ftl::Configurable*); ~ScanFieldFill(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -23,7 +23,7 @@ class ScanFieldFill : public ftl::operators::Operator { class CrossSupportFill : public ftl::operators::Operator { public: - explicit CrossSupportFill(ftl::Configurable*); + CrossSupportFill(ftl::operators::Graph *g, ftl::Configurable*); ~CrossSupportFill(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } diff --git a/components/operators/include/ftl/operators/gt_analysis.hpp b/components/operators/include/ftl/operators/gt_analysis.hpp index a89230caa12e42fc05027506c6484be1bf348d91..cea34c9dba5477012bd2a620ebbbe0e079fdea51 100644 --- a/components/operators/include/ftl/operators/gt_analysis.hpp +++ b/components/operators/include/ftl/operators/gt_analysis.hpp @@ -14,7 +14,7 @@ namespace operators { */ class GTAnalysis : public ftl::operators::Operator { public: - explicit GTAnalysis(ftl::Configurable*); + GTAnalysis(ftl::operators::Graph *g, ftl::Configurable*); ~GTAnalysis(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } diff --git a/components/operators/include/ftl/operators/mask.hpp b/components/operators/include/ftl/operators/mask.hpp index 294caf54f31d12fce9674dfcf51e204f3b2ade5d..7d877d6579c329f7af513840b7140f187dbf7eed 100644 --- a/components/operators/include/ftl/operators/mask.hpp +++ b/components/operators/include/ftl/operators/mask.hpp @@ -14,7 +14,7 @@ namespace operators { */ class DiscontinuityMask : public ftl::operators::Operator { public: - explicit DiscontinuityMask(ftl::Configurable*); + DiscontinuityMask(ftl::operators::Graph *g, ftl::Configurable*); ~DiscontinuityMask(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -28,7 +28,7 @@ class DiscontinuityMask : public ftl::operators::Operator { */ class BorderMask : public ftl::operators::Operator { public: - explicit BorderMask(ftl::Configurable*); + BorderMask(ftl::operators::Graph *g, ftl::Configurable*); ~BorderMask(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -37,12 +37,26 @@ class BorderMask : public ftl::operators::Operator { }; +/** + * Visualise a mask value + */ +class DisplayMask : public ftl::operators::Operator { + public: + DisplayMask(ftl::operators::Graph *g, ftl::Configurable*); + ~DisplayMask(); + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) override; + +}; + /** * Remove depth values marked with the discontinuity mask. */ class CullDiscontinuity : public ftl::operators::Operator { public: - explicit CullDiscontinuity(ftl::Configurable*); + CullDiscontinuity(ftl::operators::Graph *g, ftl::Configurable*); ~CullDiscontinuity(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } diff --git a/components/operators/include/ftl/operators/mvmls.hpp b/components/operators/include/ftl/operators/mvmls.hpp index 3e5c476d60728bea288f0e43a95a6bd00776ce1e..0c0d6f4afb9715814199304ca0b430ac094071c4 100644 --- a/components/operators/include/ftl/operators/mvmls.hpp +++ b/components/operators/include/ftl/operators/mvmls.hpp @@ -8,7 +8,7 @@ namespace operators { class MultiViewMLS : public ftl::operators::Operator { public: - explicit MultiViewMLS(ftl::Configurable*); + MultiViewMLS(ftl::operators::Graph *g, ftl::Configurable*); ~MultiViewMLS(); inline Operator::Type type() const override { return Operator::Type::ManyToMany; } diff --git a/components/operators/include/ftl/operators/normals.hpp b/components/operators/include/ftl/operators/normals.hpp index a5faaa17645612bce3ab9601638f92058fd4ba57..c4d1a0190d27e9f307474eeed3f2234ac99e2e23 100644 --- a/components/operators/include/ftl/operators/normals.hpp +++ b/components/operators/include/ftl/operators/normals.hpp @@ -12,7 +12,7 @@ namespace operators { */ class Normals : public ftl::operators::Operator { public: - explicit Normals(ftl::Configurable*); + Normals(ftl::operators::Graph *g, ftl::Configurable*); ~Normals(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -29,7 +29,7 @@ class Normals : public ftl::operators::Operator { */ class NormalDot : public ftl::operators::Operator { public: - explicit NormalDot(ftl::Configurable*); + NormalDot(ftl::operators::Graph *g, ftl::Configurable*); ~NormalDot(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -44,7 +44,7 @@ class NormalDot : public ftl::operators::Operator { */ class SmoothNormals : public ftl::operators::Operator { public: - explicit SmoothNormals(ftl::Configurable*); + SmoothNormals(ftl::operators::Graph *g, ftl::Configurable*); ~SmoothNormals(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } diff --git a/components/operators/include/ftl/operators/operator.hpp b/components/operators/include/ftl/operators/operator.hpp index 566e2715b99041eed861f96e5f4a43499ebf1580..86c406caa2da23eb88f2c2493c7b2e6864fa0b74 100644 --- a/components/operators/include/ftl/operators/operator.hpp +++ b/components/operators/include/ftl/operators/operator.hpp @@ -8,10 +8,13 @@ #include <ftl/rgbd/frameset.hpp> #include <ftl/rgbd/source.hpp> #include <ftl/cuda_common.hpp> +#include <ftl/operators/buffer.hpp> namespace ftl { namespace operators { +class Graph; + /** * An abstract frame operator interface. Any kind of filter that operates on a * single frame should use this as a base class. An example of a filter would @@ -22,7 +25,7 @@ namespace operators { */ class Operator { public: - explicit Operator(ftl::Configurable *cfg); + Operator(Graph *pgraph, ftl::Configurable *cfg); virtual ~Operator(); enum class Type { @@ -58,9 +61,12 @@ class Operator { inline ftl::Configurable *config() const { return config_; } + inline Graph *graph() const { return graph_; } + private: bool enabled_; ftl::Configurable *config_; + Graph *graph_; }; namespace detail { @@ -68,7 +74,7 @@ namespace detail { struct ConstructionHelperBase { explicit ConstructionHelperBase(ftl::Configurable *cfg) : config(cfg) {} virtual ~ConstructionHelperBase() {} - virtual ftl::operators::Operator *make()=0; + virtual ftl::operators::Operator *make(Graph *g)=0; ftl::Configurable *config; }; @@ -77,8 +83,8 @@ template <typename T> struct ConstructionHelper : public ConstructionHelperBase { explicit ConstructionHelper(ftl::Configurable *cfg) : ConstructionHelperBase(cfg) {} ~ConstructionHelper() {} - ftl::operators::Operator *make() override { - return new T(config); + ftl::operators::Operator *make(Graph *g) override { + return new T(g, config); } }; @@ -88,8 +94,8 @@ struct ConstructionHelper2 : public ConstructionHelperBase { arguments_ = std::make_tuple(args...); } ~ConstructionHelper2() {} - ftl::operators::Operator *make() override { - return new T(config, arguments_); + ftl::operators::Operator *make(Graph *g) override { + return new T(g, config, arguments_); } private: @@ -119,11 +125,9 @@ class Graph : public ftl::Configurable { template <typename T, typename... ARGS> ftl::Configurable *append(const std::string &name, ARGS...); - bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream=0); - bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream=0); - bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::Frame &out, cudaStream_t stream=0); - - cudaStream_t getStream() const { return stream_; } + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, const std::function<void()> &cb=nullptr); + bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, const std::function<void()> &cb=nullptr); + bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::Frame &out, const std::function<void()> &cb=nullptr); /** * Make sure all async operators have also completed. This is automatically @@ -134,11 +138,20 @@ class Graph : public ftl::Configurable { */ bool waitAll(cudaStream_t); + inline cv::cuda::GpuMat &createBuffer(ftl::operators::Buffer b) { return createBuffer(b, 0); } + cv::cuda::GpuMat &createBuffer(ftl::operators::Buffer b, uint32_t fid); + + cv::cuda::GpuMat &getBuffer(ftl::operators::Buffer b, uint32_t fid); + + bool hasBuffer(ftl::operators::Buffer b, uint32_t fid) const; + private: std::list<ftl::operators::detail::OperatorNode> operators_; std::map<std::string, ftl::Configurable*> configs_; - cudaStream_t stream_; std::atomic_flag busy_; + std::unordered_map<uint32_t,cv::cuda::GpuMat> buffers_; + std::unordered_set<uint32_t> valid_buffers_; + std::function<void()> callback_; ftl::Configurable *_append(ftl::operators::detail::ConstructionHelperBase*); }; diff --git a/components/operators/include/ftl/operators/opticalflow.hpp b/components/operators/include/ftl/operators/opticalflow.hpp index 8ee77736dcba7d95becf381759266dfeb543cba3..21d8bbb200383b85a4f0c99a1f35e5e2e7b51385 100644 --- a/components/operators/include/ftl/operators/opticalflow.hpp +++ b/components/operators/include/ftl/operators/opticalflow.hpp @@ -12,8 +12,8 @@ namespace operators { */ class NVOpticalFlow : public ftl::operators::Operator { public: - explicit NVOpticalFlow(ftl::Configurable*); - NVOpticalFlow(ftl::Configurable*, const std::tuple<ftl::codecs::Channel,ftl::codecs::Channel,ftl::codecs::Channel,ftl::codecs::Channel> &channels); + NVOpticalFlow(ftl::operators::Graph *g, ftl::Configurable*); + NVOpticalFlow(ftl::operators::Graph *g, ftl::Configurable*, const std::tuple<ftl::codecs::Channel,ftl::codecs::Channel,ftl::codecs::Channel,ftl::codecs::Channel> &channels); ~NVOpticalFlow(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } diff --git a/components/operators/include/ftl/operators/poser.hpp b/components/operators/include/ftl/operators/poser.hpp index 9c642e47730e3f6833087b1a484e1f16476e3017..8d762f8ba2ee371342adb8e20b07d359211d081e 100644 --- a/components/operators/include/ftl/operators/poser.hpp +++ b/components/operators/include/ftl/operators/poser.hpp @@ -15,7 +15,7 @@ namespace operators { */ class Poser : public ftl::operators::Operator { public: - explicit Poser(ftl::Configurable*); + Poser(ftl::operators::Graph *g, ftl::Configurable*); ~Poser(); inline Operator::Type type() const override { return Operator::Type::ManyToMany; } diff --git a/components/operators/include/ftl/operators/segmentation.hpp b/components/operators/include/ftl/operators/segmentation.hpp index d7447615c6d5230f997f9dab857b1ba824b22ce2..ec2f9e9a6d3ea8d683a6139a237849c82b8fe44a 100644 --- a/components/operators/include/ftl/operators/segmentation.hpp +++ b/components/operators/include/ftl/operators/segmentation.hpp @@ -11,7 +11,7 @@ namespace operators { */ class CrossSupport : public ftl::operators::Operator { public: - explicit CrossSupport(ftl::Configurable*); + CrossSupport(ftl::operators::Graph *g, ftl::Configurable*); ~CrossSupport(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -25,7 +25,7 @@ class CrossSupport : public ftl::operators::Operator { */ class VisCrossSupport : public ftl::operators::Operator { public: - explicit VisCrossSupport(ftl::Configurable*); + VisCrossSupport(ftl::operators::Graph *g, ftl::Configurable*); ~VisCrossSupport(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } diff --git a/components/operators/include/ftl/operators/smoothing.hpp b/components/operators/include/ftl/operators/smoothing.hpp index c42f801ed8ed19a34162b270ed9b36fbf3cdd1d8..db231cdd714a786df1dedf7ca54de06d2005ae4e 100644 --- a/components/operators/include/ftl/operators/smoothing.hpp +++ b/components/operators/include/ftl/operators/smoothing.hpp @@ -14,7 +14,7 @@ namespace operators { */ class HFSmoother : public ftl::operators::Operator { public: - explicit HFSmoother(ftl::Configurable*); + HFSmoother(ftl::operators::Graph *g, ftl::Configurable*); ~HFSmoother(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -35,7 +35,7 @@ class HFSmoother : public ftl::operators::Operator { */ class SmoothChannel : public ftl::operators::Operator { public: - explicit SmoothChannel(ftl::Configurable*); + SmoothChannel(ftl::operators::Graph *g, ftl::Configurable*); ~SmoothChannel(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -53,7 +53,7 @@ class SmoothChannel : public ftl::operators::Operator { */ class SimpleMLS : public ftl::operators::Operator { public: - explicit SimpleMLS(ftl::Configurable*); + SimpleMLS(ftl::operators::Graph *g, ftl::Configurable*); ~SimpleMLS(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -70,7 +70,7 @@ class SimpleMLS : public ftl::operators::Operator { */ class ColourMLS : public ftl::operators::Operator { public: - explicit ColourMLS(ftl::Configurable*); + ColourMLS(ftl::operators::Graph *g, ftl::Configurable*); ~ColourMLS(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -113,7 +113,7 @@ class ColourMLS : public ftl::operators::Operator { */ class AggreMLS : public ftl::operators::Operator { public: - explicit AggreMLS(ftl::Configurable*); + AggreMLS(ftl::operators::Graph *g, ftl::Configurable*); ~AggreMLS(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -137,7 +137,7 @@ class AggreMLS : public ftl::operators::Operator { */ class AdaptiveMLS : public ftl::operators::Operator { public: - explicit AdaptiveMLS(ftl::Configurable*); + AdaptiveMLS(ftl::operators::Graph *g, ftl::Configurable*); ~AdaptiveMLS(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } diff --git a/components/operators/include/ftl/operators/weighting.hpp b/components/operators/include/ftl/operators/weighting.hpp index 545b4c3b8380311aaccc29190a83626da28b8aa6..8256a08cb23d03d42b9ef18a7107a858a34ebb8b 100644 --- a/components/operators/include/ftl/operators/weighting.hpp +++ b/components/operators/include/ftl/operators/weighting.hpp @@ -22,7 +22,7 @@ namespace operators { */ class PixelWeights : public ftl::operators::Operator { public: - explicit PixelWeights(ftl::Configurable*); + PixelWeights(ftl::operators::Graph *g, ftl::Configurable*); ~PixelWeights(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -33,7 +33,7 @@ class PixelWeights : public ftl::operators::Operator { class CullWeight : public ftl::operators::Operator { public: - explicit CullWeight(ftl::Configurable*); + CullWeight(ftl::operators::Graph *g, ftl::Configurable*); ~CullWeight(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } @@ -44,7 +44,7 @@ class CullWeight : public ftl::operators::Operator { class DegradeWeight : public ftl::operators::Operator { public: - explicit DegradeWeight(ftl::Configurable*); + DegradeWeight(ftl::operators::Graph *g, ftl::Configurable*); ~DegradeWeight(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } diff --git a/components/operators/src/antialiasing.cpp b/components/operators/src/antialiasing.cpp index 1e6ef8f6c2f2f33de83fb7d113f2bd60b6f1d1f8..bbd1760657e8a7a497c1af5d12a9a92415e990ac 100644 --- a/components/operators/src/antialiasing.cpp +++ b/components/operators/src/antialiasing.cpp @@ -4,7 +4,7 @@ using ftl::operators::FXAA; using ftl::codecs::Channel; -FXAA::FXAA(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +FXAA::FXAA(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } diff --git a/components/operators/src/aruco.cpp b/components/operators/src/aruco.cpp index 02ed251e2e29ec26bbed50ae892dec489b0a368e..0d287356748e7eebd287e18d3e53639f3e6778d2 100644 --- a/components/operators/src/aruco.cpp +++ b/components/operators/src/aruco.cpp @@ -6,6 +6,7 @@ #include <opencv2/calib3d.hpp> #define LOGURU_REPLACE_GLOG 1 +#include <ftl/profiler.hpp> #include <loguru.hpp> using ftl::operators::ArUco; @@ -38,20 +39,12 @@ static Eigen::Matrix4d matrix(cv::Vec3d &rvec, cv::Vec3d &tvec) { return r; } -ArUco::ArUco(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +ArUco::ArUco(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { dictionary_ = cv::aruco::getPredefinedDictionary(cfg->value("dictionary", 0)); params_ = cv::aruco::DetectorParameters::create(); - - debug_ = cfg->value("debug", false); - //estimate_pose_ = cfg->value("estimate_pose", false); - //auto marker_size = cfg->get<float>("marker_size"); - //if (!marker_size || (*marker_size <= 0.0f)) { - // marker_size_ = 0.1f; - // estimate_pose_ = false; - //} - //else { - // marker_size_ = *marker_size; - //} + params_->cornerRefinementMethod = cv::aruco::CORNER_REFINE_CONTOUR; + params_->cornerRefinementMinAccuracy = 0.01; + params_->cornerRefinementMaxIterations = 20; channel_in_ = Channel::Colour; channel_out_ = Channel::Shapes3D; @@ -61,88 +54,48 @@ ArUco::ArUco(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { }); } -bool ArUco::apply(Frame &in, Frame &out, cudaStream_t stream) { +bool ArUco::apply(Frame &in, Frame &out, cudaStream_t) { if (!in.hasChannel(channel_in_)) { return false; } - Frame *inptr = ∈ - Frame *outptr = &out; - estimate_pose_ = config()->value("estimate_pose", true); - debug_ = config()->value("debug", false); - marker_size_ = config()->value("marker_size",0.1f); - - job_ = std::move(ftl::pool.push([this,inptr,outptr,stream](int id) { - Frame &in = *inptr; - Frame &out = *outptr; - - auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream); - //in.download(channel_in_); - - //Mat im = in.get<Mat>(channel_in_); - // FIXME: Use internal stream here. - Mat im; // = in.fastDownload(channel_in_, cv::cuda::Stream::Null()); - cv::cvtColor(in.get<cv::Mat>(channel_in_), im, cv::COLOR_BGRA2BGR); + marker_size_ = config()->value("marker_size", 0.1f); - Mat K = in.getLeftCamera().getCameraMatrix(); - Mat dist = cv::Mat::zeros(cv::Size(5, 1), CV_64FC1); + std::vector<Vec3d> rvecs; + std::vector<Vec3d> tvecs; + std::vector<std::vector<cv::Point2f>> corners; + std::vector<int> ids; - std::vector<std::vector<cv::Point2f>> corners; - std::vector<int> ids; + { + FTL_Profile("ArUco", 0.02); + cv::cvtColor(in.get<cv::Mat>(channel_in_), tmp_, cv::COLOR_BGRA2GRAY); - cv::aruco::detectMarkers( im, dictionary_, - corners, ids, params_, cv::noArray(), K); + const Mat K = in.getLeftCamera().getCameraMatrix(); + const Mat dist; - std::vector<Vec3d> rvecs; - std::vector<Vec3d> tvecs; + cv::aruco::detectMarkers(tmp_, dictionary_, + corners, ids, params_, cv::noArray(), K, dist); if (estimate_pose_) { cv::aruco::estimatePoseSingleMarkers(corners, marker_size_, K, dist, rvecs, tvecs); } + } - list<Shape3D> result; - if (out.hasChannel(channel_out_)) { - result = out.get<list<Shape3D>>(channel_out_); - } - - for (size_t i = 0; i < rvecs.size(); i++) { - if (estimate_pose_) { - auto &t = result.emplace_back(); - t.id = ids[i]; - t.type = ftl::codecs::Shape3DType::ARUCO; - t.pose = (in.getPose() * matrix(rvecs[i], tvecs[i])).cast<float>(); - t.size = Eigen::Vector3f(0.1f,0.1f,0.1f); - t.label = "Aruco"; - } - } - - out.create<list<Shape3D>>(channel_out_).list = result; - - if (debug_) { - cv::aruco::drawDetectedMarkers(im, corners, ids); - if (estimate_pose_) { - for (size_t i = 0; i < rvecs.size(); i++) { - cv::aruco::drawAxis(im, K, dist, rvecs[i], tvecs[i], marker_size_); - } - } - } + list<Shape3D> result; + if (out.hasChannel(channel_out_)) { + result = out.get<list<Shape3D>>(channel_out_); + } - // TODO: should be uploaded by operator which requires data on GPU - //in.upload(channel_in_); - if (debug_) { - //if (in.isGPU(channel_in_)) { - cv::cvtColor(im, im, cv::COLOR_BGR2BGRA); - out.set<cv::cuda::GpuMat>(channel_in_).upload(im); - //} else cv::cvtColor(im, in.get<cv::Mat>(channel_in_), cv::COLOR_BGR2BGRA); + for (size_t i = 0; i < rvecs.size(); i++) { + if (estimate_pose_) { + auto &t = result.emplace_back(); + t.id = ids[i]; + t.type = ftl::codecs::Shape3DType::ARUCO; + t.pose = (in.getPose() * matrix(rvecs[i], tvecs[i])).cast<float>(); + t.size = Eigen::Vector3f(1.0f, 1.0f, 0.0f)*marker_size_; + t.label = "Aruco-" + std::to_string(ids[i]); } - return true; - })); + } + out.create<list<Shape3D>>(channel_out_).list = result; return true; } - -void ArUco::wait(cudaStream_t s) { - if (job_.valid()) { - job_.wait(); - job_.get(); - } -} diff --git a/components/operators/src/clipping.cpp b/components/operators/src/clipping.cpp index fa0a5c7efebc2d7de8e705d639d8c6c1bb1aa48d..3cfce7d554dc81ec253f5248192d61a525205e5e 100644 --- a/components/operators/src/clipping.cpp +++ b/components/operators/src/clipping.cpp @@ -10,7 +10,7 @@ using ftl::operators::ClipScene; using ftl::codecs::Channel; using ftl::rgbd::Format; -ClipScene::ClipScene(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +ClipScene::ClipScene(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } diff --git a/components/operators/src/colours.cpp b/components/operators/src/colours.cpp index ad48acd1fc8d08b05a778aa983400ca2cebebb39..2704d5763d496de808716c827ccd86ce1efc9c15 100644 --- a/components/operators/src/colours.cpp +++ b/components/operators/src/colours.cpp @@ -6,7 +6,7 @@ using ftl::operators::ColourChannels; using ftl::codecs::Channel; -ColourChannels::ColourChannels(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +ColourChannels::ColourChannels(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } diff --git a/components/operators/src/depth.cpp b/components/operators/src/depth.cpp index df127ef9f360bf40f4e4a757e11b9095902dbce7..11a8d22430c436aa2002ee4afa9ecd6a27c0d4be 100644 --- a/components/operators/src/depth.cpp +++ b/components/operators/src/depth.cpp @@ -49,8 +49,8 @@ static void calc_space_weighted_filter(GpuMat& table_space, int win_size, float // ==== Depth Bilateral Filter ================================================= -DepthBilateralFilter::DepthBilateralFilter(ftl::Configurable* cfg) : - ftl::operators::Operator(cfg) { +DepthBilateralFilter::DepthBilateralFilter(ftl::operators::Graph *g, ftl::Configurable* cfg) : + ftl::operators::Operator(g, cfg) { scale_ = 16.0; radius_ = cfg->value("radius", 7); @@ -72,8 +72,8 @@ DepthBilateralFilter::DepthBilateralFilter(ftl::Configurable* cfg) : calc_space_weighted_filter(table_space_, radius_ * 2 + 1, radius_ + 1.0f); } -DepthBilateralFilter::DepthBilateralFilter(ftl::Configurable* cfg, const std::tuple<ftl::codecs::Channel> &p) : - ftl::operators::Operator(cfg) { +DepthBilateralFilter::DepthBilateralFilter(ftl::operators::Graph *g, ftl::Configurable* cfg, const std::tuple<ftl::codecs::Channel> &p) : + ftl::operators::Operator(g, cfg) { scale_ = 16.0; radius_ = cfg->value("radius", 7); @@ -124,7 +124,7 @@ bool DepthBilateralFilter::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, // ============================================================================= -DepthChannel::DepthChannel(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +DepthChannel::DepthChannel(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { pipe_ = nullptr; } @@ -144,7 +144,8 @@ void DepthChannel::_createPipeline(size_t size) { pipe_->append<ftl::operators::ColourChannels>("colour"); // Convert BGR to BGRA pipe_->append<ftl::operators::CrossSupport>("cross"); #ifdef HAVE_OPTFLOW - pipe_->append<ftl::operators::NVOpticalFlow>("optflow", Channel::Colour, Channel::Flow, Channel::Colour2, Channel::Flow2); + // FIXME: OpenCV Nvidia OptFlow has a horrible implementation that causes device syncs + //pipe_->append<ftl::operators::NVOpticalFlow>("optflow", Channel::Colour, Channel::Flow, Channel::Colour2, Channel::Flow2); //if (size == 1) pipe_->append<ftl::operators::OpticalFlowTemporalSmoothing>("optflow_filter", Channel::Disparity); #endif #ifdef HAVE_LIBSGM @@ -171,11 +172,7 @@ bool DepthChannel::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda rbuf_.resize(in.frames.size()); - if (in.frames.size() > 0) { - if (depth_size_.width == 0) { - depth_size_ = in.firstFrame().get<cv::cuda::GpuMat>(Channel::Colour).size(); - } - } + int valid_count = 0; for (size_t i=0; i<in.frames.size(); ++i) { if (!in.hasFrame(i)) continue; @@ -188,18 +185,23 @@ bool DepthChannel::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda if (!cdata.enabled) continue; } - _createPipeline(in.frames.size()); - const cv::cuda::GpuMat& left = f.get<cv::cuda::GpuMat>(Channel::Left); const cv::cuda::GpuMat& right = f.get<cv::cuda::GpuMat>(Channel::Right); + if (left.empty() || right.empty()) continue; + cv::cuda::GpuMat& depth = f.create<cv::cuda::GpuMat>(Channel::Depth); - depth.create(left.size(), CV_32FC1); - if (left.empty() || right.empty()) continue; - pipe_->apply(f, f, stream); + const auto &intrin = f.getLeft(); + depth.create(intrin.height, intrin.width, CV_32FC1); + ++valid_count; } } + if (valid_count > 0) { + _createPipeline(in.frames.size()); + pipe_->apply(in, out); + } + return true; } @@ -210,28 +212,21 @@ bool DepthChannel::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream auto &f = in; if (!f.hasChannel(Channel::Depth) && f.hasChannel(Channel::Right)) { - _createPipeline(1); + if (f.hasChannel(Channel::CalibrationData)) { + auto &cdata = f.get<ftl::calibration::CalibrationData>(Channel::CalibrationData); + if (!cdata.enabled) return true; + } const cv::cuda::GpuMat& left = f.get<cv::cuda::GpuMat>(Channel::Left); const cv::cuda::GpuMat& right = f.get<cv::cuda::GpuMat>(Channel::Right); - cv::cuda::GpuMat& depth = f.create<cv::cuda::GpuMat>(Channel::Depth); - depth.create(depth_size_, CV_32FC1); - if (left.empty() || right.empty()) return false; + + _createPipeline(1); - /*if (depth_size_ != left.size()) { - auto &col2 = f.create<cv::cuda::GpuMat>(Channel::ColourHighRes); - cv::cuda::resize(left, col2, depth_size_, 0.0, 0.0, cv::INTER_CUBIC, cvstream); - f.createTexture<uchar4>(Channel::ColourHighRes, true); - f.swapChannels(Channel::Colour, Channel::ColourHighRes); - } - - if (depth_size_ != right.size()) { - cv::cuda::resize(right, rbuf_[i], depth_size_, 0.0, 0.0, cv::INTER_CUBIC, cvstream); - cv::cuda::swap(right, rbuf_[i]); - }*/ + cv::cuda::GpuMat& depth = f.create<cv::cuda::GpuMat>(Channel::Depth); + depth.create(depth_size_, CV_32FC1); - pipe_->apply(f, f, stream); + pipe_->apply(f, f); } return true; diff --git a/components/operators/src/detectandtrack.cpp b/components/operators/src/detectandtrack.cpp index 4e9b1b7294bc168957bc63206d4601ef424f45e6..0b9e783e0419256b357a27970beb6fb6eb0e6604 100644 --- a/components/operators/src/detectandtrack.cpp +++ b/components/operators/src/detectandtrack.cpp @@ -20,7 +20,7 @@ using ftl::codecs::Channel; using ftl::rgbd::Frame; using ftl::operators::DetectAndTrack; -DetectAndTrack::DetectAndTrack(ftl::Configurable *cfg) : ftl::operators::Operator(cfg), detecting_(false) { +DetectAndTrack::DetectAndTrack(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg), detecting_(false) { init(); } diff --git a/components/operators/src/disparity/bilateral_filter.cpp b/components/operators/src/disparity/bilateral_filter.cpp index 7ffce560c635e884dbaad6aa128ebba5f353817e..425b8378a2c8bfc13ebb52f1c11c0d50efbe8449 100644 --- a/components/operators/src/disparity/bilateral_filter.cpp +++ b/components/operators/src/disparity/bilateral_filter.cpp @@ -4,15 +4,17 @@ #include <ftl/operators/cuda/disparity.hpp> #include <opencv2/cudaimgproc.hpp> +#include <opencv2/cudawarping.hpp> using cv::cuda::GpuMat; using cv::Size; using ftl::codecs::Channel; using ftl::operators::DisparityBilateralFilter; +using ftl::operators::Buffer; -DisparityBilateralFilter::DisparityBilateralFilter(ftl::Configurable* cfg) : - ftl::operators::Operator(cfg) { +DisparityBilateralFilter::DisparityBilateralFilter(ftl::operators::Graph *g, ftl::Configurable* cfg) : + ftl::operators::Operator(g, cfg) { scale_ = 16.0; n_disp_ = cfg->value("n_disp", 256); @@ -27,14 +29,16 @@ bool DisparityBilateralFilter::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out if (!in.hasChannel(Channel::Colour)) { throw FTL_Error("Joint Bilateral Filter is missing Colour"); return false; - } else if (!in.hasChannel(Channel::Disparity)) { + } + + if (!graph()->hasBuffer(Buffer::Disparity, in.source())) { // Have depth, so calculate disparity... if (in.hasChannel(Channel::Depth)) { // No disparity, so create it. const auto params = in.getLeftCamera(); const GpuMat &depth = in.get<GpuMat>(Channel::Depth); - GpuMat &disp = out.create<GpuMat>(Channel::Disparity); + GpuMat &disp = graph()->createBuffer(Buffer::Disparity, in.source()); disp.create(depth.size(), CV_32FC1); //LOG(ERROR) << "Calculated disparity from depth"; @@ -56,14 +60,31 @@ bool DisparityBilateralFilter::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream); const GpuMat &rgb = in.get<GpuMat>(Channel::Colour); - const GpuMat &disp_in = in.get<GpuMat>(Channel::Disparity); - GpuMat &disp_out = out.create<GpuMat>(Channel::Disparity); + //const GpuMat &disp_in = in.get<GpuMat>(Channel::Disparity); + //GpuMat &disp_out = out.create<GpuMat>(Channel::Disparity); + + GpuMat disp_in = graph()->getBuffer(Buffer::Disparity, in.source()); disp_int_.create(disp_in.size(), disp_in.type()); + GpuMat rgb_buf; + if (rgb.size() != disp_in.size()) { + if (graph()->hasBuffer(Buffer::LowLeft, in.source())) { + rgb_buf = graph()->getBuffer(Buffer::LowLeft, in.source()); + } else { + auto &t = graph()->createBuffer(Buffer::LowLeft, in.source()); + cv::cuda::resize(rgb, t, disp_in.size(), 0, 0, cv::INTER_LINEAR, cvstream); + rgb_buf = t; + } + } else { + rgb_buf = rgb; + } + + //LOG(INFO) << "DISP = " << disp_in.size() << "," << disp_in.type() << " - RGBBUF = " << rgb_buf.size() << "," << rgb_buf.type() << " - RGB = " << rgb.size() << "," << rgb.type(); + //disp_in.convertTo(disp_int_, CV_16SC1, scale_, cvstream); //cv::cuda::cvtColor(rgb, bw_, cv::COLOR_BGRA2GRAY, 0, cvstream); - filter_->apply(disp_in, rgb, disp_int_, cvstream); - cv::cuda::swap(disp_out, disp_int_); + filter_->apply(disp_in, rgb_buf, disp_int_, cvstream); + cv::cuda::swap(disp_in, disp_int_); //disp_int_result_.convertTo(disp_out, disp_in.type(), 1.0/scale_, cvstream); return true; } \ No newline at end of file diff --git a/components/operators/src/disparity/disparity_to_depth.cpp b/components/operators/src/disparity/disparity_to_depth.cpp index 1ffd157dead77ef4fcf8859903c4439955a70f0b..07146ccfc95705186ccf5706c071bac3aff9a3a2 100644 --- a/components/operators/src/disparity/disparity_to_depth.cpp +++ b/components/operators/src/disparity/disparity_to_depth.cpp @@ -3,17 +3,18 @@ using ftl::operators::DisparityToDepth; using ftl::codecs::Channel; +using ftl::operators::Buffer; using cv::cuda::GpuMat; bool DisparityToDepth::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) { - if (!in.hasChannel(Channel::Disparity)) { + if (!graph()->hasBuffer(Buffer::Disparity, in.source())) { throw FTL_Error("Missing disparity before convert to depth"); } - const GpuMat &disp = in.get<GpuMat>(Channel::Disparity); + const GpuMat &disp = graph()->getBuffer(Buffer::Disparity, in.source()); const auto params = in.getLeftCamera().scaled(disp.cols, disp.rows); GpuMat &depth = out.create<GpuMat>(Channel::Depth); diff --git a/components/operators/src/disparity/fixstars_sgm.cpp b/components/operators/src/disparity/fixstars_sgm.cpp index 0274022737726a8d6a94c5bcc8124c82a8d4dd17..176dc8d10bf23ba91ebc3cf1976b7e2e08a96766 100644 --- a/components/operators/src/disparity/fixstars_sgm.cpp +++ b/components/operators/src/disparity/fixstars_sgm.cpp @@ -6,6 +6,7 @@ #include <opencv2/cudaimgproc.hpp> #include <opencv2/cudaarithm.hpp> #include <opencv2/cudafilters.hpp> +#include <opencv2/cudawarping.hpp> using cv::Size; using cv::cuda::GpuMat; @@ -15,29 +16,25 @@ using ftl::codecs::Channel; using ftl::rgbd::Frame; using ftl::rgbd::Source; using ftl::operators::FixstarsSGM; +using ftl::operators::Buffer; -static void variance_mask(cv::InputArray in, cv::OutputArray out, int wsize, cv::cuda::Stream &cvstream) { +void FixstarsSGM::_variance_mask(cv::InputArray in, cv::OutputArray out, int wsize, cv::cuda::Stream &cvstream) { if (in.isGpuMat() && out.isGpuMat()) { - cv::cuda::GpuMat im; - cv::cuda::GpuMat im2; - cv::cuda::GpuMat mean; - cv::cuda::GpuMat mean2; - - mean.create(in.size(), CV_32FC1); - mean2.create(in.size(), CV_32FC1); - im2.create(in.size(), CV_32FC1); - in.getGpuMat().convertTo(im, CV_32FC1, cvstream); - - cv::cuda::multiply(im, im, im2, 1.0, CV_32FC1, cvstream); - auto filter = cv::cuda::createBoxFilter(CV_32FC1, CV_32FC1, cv::Size(wsize,wsize)); - filter->apply(im, mean, cvstream); // E[X] - filter->apply(im2, mean2, cvstream); // E[X^2] - cv::cuda::multiply(mean, mean, mean, 1.0, -1, cvstream); // (E[X])^2 + mean_.create(in.size(), CV_32FC1); + mean2_.create(in.size(), CV_32FC1); + im2_.create(in.size(), CV_32FC1); + in.getGpuMat().convertTo(im_, CV_32FC1, cvstream); + + cv::cuda::multiply(im_, im_, im2_, 1.0, CV_32FC1, cvstream); + if (!filter_) filter_ = cv::cuda::createBoxFilter(CV_32FC1, CV_32FC1, cv::Size(wsize,wsize)); + filter_->apply(im_, mean_, cvstream); // E[X] + filter_->apply(im2_, mean2_, cvstream); // E[X^2] + cv::cuda::multiply(mean_, mean_, mean_, 1.0, -1, cvstream); // (E[X])^2 // NOTE: floating point accuracy in subtraction // (cv::cuda::createBoxFilter only supports float and 8 bit integer types) - cv::cuda::subtract(mean2, mean, out.getGpuMatRef(), cv::noArray(), -1, cvstream); // E[X^2] - (E[X])^2 + cv::cuda::subtract(mean2_, mean_, out.getGpuMatRef(), cv::noArray(), -1, cvstream); // E[X^2] - (E[X])^2 } else { throw std::exception(); /* todo CPU version */ } } @@ -56,8 +53,8 @@ void FixstarsSGM::computeP2(cudaStream_t &stream) { } } -FixstarsSGM::FixstarsSGM(ftl::Configurable* cfg) : - ftl::operators::Operator(cfg) { +FixstarsSGM::FixstarsSGM(ftl::operators::Graph *g, ftl::Configurable* cfg) : + ftl::operators::Operator(g, cfg) { ssgm_ = nullptr; size_ = Size(0, 0); @@ -180,27 +177,43 @@ bool FixstarsSGM::apply(Frame &in, Frame &out, cudaStream_t stream) { auto &l = in.get<GpuMat>(Channel::Left); const auto &r = in.get<GpuMat>(Channel::Right); - if (l.size() != size_) { - size_ = l.size(); + const auto &intrin = in.getLeft(); + + if (l.empty() || r.empty() || intrin.width == 0) { + LOG(ERROR) << "Missing data for Fixstars"; + return false; + } + + if (size_.width != intrin.width) { + size_ = cv::Size(intrin.width, intrin.height); if (!init()) { return false; } } - bool has_estimate = in.hasChannel(Channel::Disparity); - const auto &disp = (!has_estimate) ? out.create<ftl::rgbd::VideoFrame>(Channel::Disparity).createGPU(Format<short>(l.size())) : in.get<GpuMat>(Channel::Disparity); + bool has_estimate = graph()->hasBuffer(Buffer::Disparity, in.source()); //in.hasChannel(Channel::Disparity); + auto &disp = graph()->createBuffer(Buffer::Disparity, in.source()); + disp.create(size_, CV_16SC1); auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream); - cv::cuda::cvtColor(l, lbw_, cv::COLOR_BGRA2GRAY, 0, cvstream); - cv::cuda::cvtColor(r, rbw_, cv::COLOR_BGRA2GRAY, 0, cvstream); + cv::cuda::cvtColor(l, lbw_full_, cv::COLOR_BGRA2GRAY, 0, cvstream); + cv::cuda::cvtColor(r, rbw_full_, cv::COLOR_BGRA2GRAY, 0, cvstream); + + if (l.size() != size_) { + cv::cuda::resize(lbw_full_, lbw_, size_, 0, 0, cv::INTER_CUBIC, cvstream); + cv::cuda::resize(rbw_full_, rbw_, size_, 0, 0, cv::INTER_CUBIC, cvstream); + } else { + lbw_ = lbw_full_; + rbw_ = rbw_full_; + } //cvstream.waitForCompletion(); computeP2(stream); bool use_variance = config()->value("use_variance", true); if (use_variance) { - variance_mask(lbw_, weightsF_, config()->value("var_wsize", 11), cvstream); + _variance_mask(lbw_, weightsF_, config()->value("var_wsize", 11), cvstream); float minweight = std::min(1.0f, std::max(0.0f, config()->value("var_minweight", 0.5f))); cv::cuda::normalize(weightsF_, weightsF_, minweight, 1.0, cv::NORM_MINMAX, -1, cv::noArray(), cvstream); - weightsF_.convertTo(weights_, CV_8UC1, 255.0f); + weightsF_.convertTo(weights_, CV_8UC1, 255.0f, cvstream); //if ((int)P2_map_.step != P2_map_.cols) LOG(ERROR) << "P2 map step error: " << P2_map_.cols << "," << P2_map_.step; ssgm_->execute(lbw_.data, rbw_.data, disp_int_.data, P2_map_.data, (uint8_t*) weights_.data, weights_.step1(), stream); @@ -224,7 +237,7 @@ bool FixstarsSGM::apply(Frame &in, Frame &out, cudaStream_t stream) { } if (config()->value("show_P2_map", false)) { - cv::cuda::cvtColor(P2_map_, out.get<GpuMat>(Channel::Colour), cv::COLOR_GRAY2BGRA); + cv::cuda::cvtColor(P2_map_, out.get<GpuMat>(Channel::Colour), cv::COLOR_GRAY2BGRA, 0, cvstream); } if (config()->value("show_rpe", false)) { ftl::cuda::show_rpe(disp, in.set<GpuMat>(Channel::Left), r, 100.0f, stream); diff --git a/components/operators/src/disparity/libstereo.cpp b/components/operators/src/disparity/libstereo.cpp index 8531dd98f0650abb4b8b83198999616dc4eb31f9..8229c845e59a6dbe0e963cae66599fb0c405280f 100644 --- a/components/operators/src/disparity/libstereo.cpp +++ b/components/operators/src/disparity/libstereo.cpp @@ -21,8 +21,8 @@ struct StereoDisparity::Impl { StereoCensusSgm sgm; }; -StereoDisparity::StereoDisparity(ftl::Configurable* cfg) : - ftl::operators::Operator(cfg), impl_(nullptr) { +StereoDisparity::StereoDisparity(ftl::operators::Graph *g, ftl::Configurable* cfg) : + ftl::operators::Operator(g, cfg), impl_(nullptr) { init(); } diff --git a/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu b/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu index c1dd611c00e6830232a01d6f1eb86b643cd477cb..9ed95f6eab0831f12b502a36858c837f37edd8ad 100644 --- a/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu +++ b/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu @@ -328,8 +328,8 @@ namespace ftl { namespace cuda { namespace device } - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); + //if (stream == 0) + // cudaSafeCall( cudaDeviceSynchronize() ); } // These are commented out since we don't use them and it slows compile diff --git a/components/operators/src/disparity/optflow_smoothing.cpp b/components/operators/src/disparity/optflow_smoothing.cpp index c69b0727beda8ceb22580609ed8f7181a22d40f9..d1205854ea385c0b8be40167926523c572c4af96 100644 --- a/components/operators/src/disparity/optflow_smoothing.cpp +++ b/components/operators/src/disparity/optflow_smoothing.cpp @@ -18,14 +18,14 @@ using std::vector; template<typename T> static bool inline isValidDisparity(T d) { return (0.0 < d) && (d < 256.0); } // TODO -OpticalFlowTemporalSmoothing::OpticalFlowTemporalSmoothing(ftl::Configurable* cfg, const std::tuple<ftl::codecs::Channel> ¶ms) : - ftl::operators::Operator(cfg) { +OpticalFlowTemporalSmoothing::OpticalFlowTemporalSmoothing(ftl::operators::Graph *g, ftl::Configurable* cfg, const std::tuple<ftl::codecs::Channel> ¶ms) : + ftl::operators::Operator(g, cfg) { channel_ = std::get<0>(params); _init(cfg); } -OpticalFlowTemporalSmoothing::OpticalFlowTemporalSmoothing(ftl::Configurable* cfg) : - ftl::operators::Operator(cfg) { +OpticalFlowTemporalSmoothing::OpticalFlowTemporalSmoothing(ftl::operators::Graph *g, ftl::Configurable* cfg) : + ftl::operators::Operator(g, cfg) { _init(cfg); } diff --git a/components/operators/src/filling.cpp b/components/operators/src/filling.cpp index 87298cc052cb8d48673a6c2ce13943c35a68caee..8b6b369b787f1e1ceab1c404af46629f4b146185 100644 --- a/components/operators/src/filling.cpp +++ b/components/operators/src/filling.cpp @@ -6,7 +6,7 @@ using ftl::operators::ScanFieldFill; using ftl::operators::CrossSupportFill; using ftl::codecs::Channel; -ScanFieldFill::ScanFieldFill(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +ScanFieldFill::ScanFieldFill(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } @@ -29,7 +29,7 @@ bool ScanFieldFill::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStrea } -CrossSupportFill::CrossSupportFill(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +CrossSupportFill::CrossSupportFill(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } diff --git a/components/operators/src/fusion/correspondence_depth.cu b/components/operators/src/fusion/correspondence_depth.cu index f7303cfc413f03b49bdc75510906a5cbd409b626..95d11b890c98d53c643fed9946b855bf0b6f3ca8 100644 --- a/components/operators/src/fusion/correspondence_depth.cu +++ b/components/operators/src/fusion/correspondence_depth.cu @@ -117,9 +117,16 @@ __global__ void corresponding_depth_kernel( if (depth1 > cam1.minDepth && depth1 < cam1.maxDepth && bestcost < 1.0f) { // Delay making the depth change until later. conf(pt) = bestadjust; - mask(pt) = mask(pt) | Mask::kMask_Correspondence; + auto m = mask(pt); + m &= ~Mask::kMask_Bad; + mask(pt) = m | Mask::kMask_Correspondence; screenOut(pt) = bestScreen; } + + if (depth1 > cam1.minDepth && depth1 < cam1.maxDepth && bestcost > 2.0f) { + auto m = mask(pt); + mask(pt) = (m & Mask::kMask_Correspondence) ? m : m | Mask::kMask_Bad; + } } } diff --git a/components/operators/src/fusion/correspondence_util.cu b/components/operators/src/fusion/correspondence_util.cu index 5145f69767866e66ed01518b6e492328f022f595..1887ee88fa7e182e9b6f4b725092b5a5e833c1ef 100644 --- a/components/operators/src/fusion/correspondence_util.cu +++ b/components/operators/src/fusion/correspondence_util.cu @@ -53,6 +53,8 @@ __global__ void show_cor_error_kernel( if (x < colour.width() && y < colour.height()) { short2 s1 = screen1.tex2D(x,y); + //colour(x,y) = make_uchar4(0,0,0,0); + if (s1.x >= 0 && s1.x < screen2.width() && s1.y < screen2.height()) { short2 s2 = screen2.tex2D(s1.x, s1.y); @@ -120,6 +122,8 @@ __global__ void show_depth_adjust_kernel( float a = adjust.tex2D(x,y); short2 s = screen.tex2D(x,y); + //colour(x,y) = make_uchar4(0,0,0,0); + if (s.x >= 0) { float ncG = min(1.0f, fabsf(a)/scale); float ncB = -max(-1.0f, min(0.0f, a/scale)); diff --git a/components/operators/src/fusion/mvmls.cpp b/components/operators/src/fusion/mvmls.cpp index bdb69b6e6fd53777ec38e248414d72fd05ad2e52..38328f33ea724a1b687163bc961c2ccec1d6b4fa 100644 --- a/components/operators/src/fusion/mvmls.cpp +++ b/components/operators/src/fusion/mvmls.cpp @@ -5,13 +5,14 @@ #include <ftl/cuda/normals.hpp> #include <opencv2/cudaarithm.hpp> +#include <opencv2/cudawarping.hpp> using ftl::operators::MultiViewMLS; using ftl::codecs::Channel; using cv::cuda::GpuMat; using ftl::rgbd::Format; -MultiViewMLS::MultiViewMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +MultiViewMLS::MultiViewMLS(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } @@ -94,6 +95,13 @@ bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda f.createTexture<short2>(Channel::Screen); f.set<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); + + if (show_adjustment || show_consistency) { + if (!f.hasChannel(Channel::Overlay)) { + auto &t = f.createTexture<uchar4>(Channel::Overlay, ftl::rgbd::Format<uchar4>(size)); + cudaMemset2DAsync(t.devicePtr(), t.pitch(), 0, t.width()*4, t.height(), stream); + } + } } //for (int iter=0; iter<iters; ++iter) { @@ -272,8 +280,8 @@ bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda );*/ if (show_consistency) { - ftl::cuda::show_cor_error(f1.getTexture<uchar4>(Channel::Colour), f1.getTexture<short2>(Channel::Screen), f2.getTexture<short2>(Channel::Screen), 5.0f, stream); - ftl::cuda::show_cor_error(f2.getTexture<uchar4>(Channel::Colour), f2.getTexture<short2>(Channel::Screen), f1.getTexture<short2>(Channel::Screen), 5.0f, stream); + ftl::cuda::show_cor_error(f1.getTexture<uchar4>(Channel::Overlay), f1.getTexture<short2>(Channel::Screen), f2.getTexture<short2>(Channel::Screen), 5.0f, stream); + ftl::cuda::show_cor_error(f2.getTexture<uchar4>(Channel::Overlay), f2.getTexture<short2>(Channel::Screen), f1.getTexture<short2>(Channel::Screen), 5.0f, stream); } /*ftl::cuda::remove_cor_error( @@ -297,8 +305,8 @@ bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda //} if (show_adjustment) { - ftl::cuda::show_depth_adjustment(f1.getTexture<uchar4>(Channel::Colour), f1.getTexture<short2>(Channel::Screen), f1.getTexture<float>(Channel::Confidence), 0.04f, stream); - ftl::cuda::show_depth_adjustment(f2.getTexture<uchar4>(Channel::Colour), f2.getTexture<short2>(Channel::Screen), f2.getTexture<float>(Channel::Confidence), 0.04f, stream); + ftl::cuda::show_depth_adjustment(f1.getTexture<uchar4>(Channel::Overlay), f1.getTexture<short2>(Channel::Screen), f1.getTexture<float>(Channel::Confidence), 0.04f, stream); + ftl::cuda::show_depth_adjustment(f2.getTexture<uchar4>(Channel::Overlay), f2.getTexture<short2>(Channel::Screen), f2.getTexture<float>(Channel::Confidence), 0.04f, stream); } //} //else { /*ftl::cuda::correspondence( @@ -439,13 +447,27 @@ bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda float thresh = (1.0f / f.getLeft().fx) * disconPixels; + const GpuMat &rgb = f.get<GpuMat>(Channel::Colour); + GpuMat rgb_buf; + if (rgb.size() != size) { + if (graph()->hasBuffer(Buffer::LowLeft, f.source())) { + rgb_buf = graph()->getBuffer(Buffer::LowLeft, f.source()); + } else { + rgb_buf = graph()->createBuffer(Buffer::LowLeft, f.source()); + cv::cuda::resize(rgb, rgb_buf, size, 0, 0, cv::INTER_LINEAR, cvstream); + } + } else { + rgb_buf = rgb; + } + ftl::cuda::mls_aggr_horiz( f.createTexture<uchar4>((f.hasChannel(Channel::Support2)) ? Channel::Support2 : Channel::Support1), f.createTexture<half4>(Channel::Normals), *normals_horiz_[i], f.createTexture<float>(Channel::Depth), *centroid_horiz_[i], - f.createTexture<uchar4>(Channel::Colour), + //f.createTexture<uchar4>(Channel::Colour), + rgb_buf, thresh, col_smooth, radius, diff --git a/components/operators/src/gt_analysis.cpp b/components/operators/src/gt_analysis.cpp index c977a814b0c0e5fd0c6881f80b7d43379c440712..d7f94d39946d58a877050c73371c178436a9e6f9 100644 --- a/components/operators/src/gt_analysis.cpp +++ b/components/operators/src/gt_analysis.cpp @@ -5,7 +5,7 @@ using ftl::operators::GTAnalysis; using ftl::codecs::Channel; using std::string; -GTAnalysis::GTAnalysis(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +GTAnalysis::GTAnalysis(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { cudaMalloc(&output_, sizeof(ftl::cuda::GTAnalysisData)); } diff --git a/components/operators/src/mask.cpp b/components/operators/src/mask.cpp index 274f2de718dfae78f9b148848a663e16a3bd1313..3a2e7d8331f7fa27aad4c06973cab46f9fe1d23d 100644 --- a/components/operators/src/mask.cpp +++ b/components/operators/src/mask.cpp @@ -4,10 +4,11 @@ using ftl::operators::DiscontinuityMask; using ftl::operators::BorderMask; using ftl::operators::CullDiscontinuity; +using ftl::operators::DisplayMask; using ftl::codecs::Channel; using ftl::rgbd::Format; -DiscontinuityMask::DiscontinuityMask(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +DiscontinuityMask::DiscontinuityMask(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } @@ -31,9 +32,10 @@ bool DiscontinuityMask::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaS } if (!out.hasChannel(Channel::Mask)) { + cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); auto &m = out.create<cv::cuda::GpuMat>(Channel::Mask); m.create(in.get<cv::cuda::GpuMat>(Channel::Depth).size(), CV_8UC1); - m.setTo(cv::Scalar(0)); + m.setTo(cv::Scalar(0), cvstream); } /*ftl::cuda::discontinuity( @@ -59,7 +61,7 @@ bool DiscontinuityMask::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaS -BorderMask::BorderMask(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +BorderMask::BorderMask(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } @@ -87,7 +89,7 @@ bool BorderMask::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t -CullDiscontinuity::CullDiscontinuity(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +CullDiscontinuity::CullDiscontinuity(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } @@ -113,4 +115,35 @@ bool CullDiscontinuity::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaS ); return true; -} \ No newline at end of file +} + + + +DisplayMask::DisplayMask(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { + +} + +DisplayMask::~DisplayMask() { + +} + +bool DisplayMask::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) { + + if (!in.hasChannel(Channel::Mask)) { + return true; + } + + uint8_t mask = config()->value("mask", 0); + bool invert = config()->value("invert", false); + + auto &masktex = in.getTexture<uint8_t>(Channel::Mask); + + if (!in.hasChannel(Channel::Overlay)) { + auto &t = in.createTexture<uchar4>(Channel::Overlay, ftl::rgbd::Format<uchar4>(masktex.width(), masktex.height())); + cudaMemset2DAsync(t.devicePtr(), t.pitch(), 0, t.width()*4, t.height(), stream); + } + + ftl::cuda::show_mask(in.getTexture<uchar4>(Channel::Overlay), masktex, mask, make_uchar4(255,0,255,255), stream); + + return true; +} diff --git a/components/operators/src/mls.cu b/components/operators/src/mls.cu index ee3ce41255e1f24d3f6066e648f26e7caa501f0e..55813446ab7a62d0913b96d107187b2bb62f0543 100644 --- a/components/operators/src/mls.cu +++ b/components/operators/src/mls.cu @@ -223,6 +223,10 @@ __device__ inline int segmentID(int u, int v) { return 0; } +__device__ inline float4 make_float4(const uchar4 &v) { + return make_float4(float(v.x), float(v.y), float(v.z), float(v.w)); +} + /* * Smooth depth map using Moving Least Squares. This version uses colour * similarity weights to adjust the spatial smoothing factor. It is naive in @@ -237,7 +241,9 @@ __device__ inline int segmentID(int u, int v) { TextureObject<half4> normals_out, TextureObject<float> depth_in, // Virtual depth map TextureObject<float> depth_out, // Accumulated output - TextureObject<uchar4> colour_in, + //TextureObject<uchar4> colour_in, + const uchar4* __restrict__ colour_in, + int colour_pitch, float smoothing, float colour_smoothing, ftl::rgbd::Camera camera) { @@ -260,7 +266,8 @@ __device__ inline int segmentID(int u, int v) { } float3 X = camera.screenToCam((int)(x),(int)(y),d0); - float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f); + //float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f); + float4 c0 = make_float4(colour_in[x+y*colour_pitch]); // Neighbourhood uchar4 base = region.tex2D(x,y); @@ -274,7 +281,8 @@ __device__ inline int segmentID(int u, int v) { #pragma unroll for (int u=-RADIUS; u<=RADIUS; ++u) { - const float d = depth_in.tex2D(x+u, y+v); + if (x+u >= 0 && x+u < depth_in.width() && y+v >= 0 && y+v < depth_in.height()) { + const float d = depth_in.tex2D(x+u, y+v); //if (d > camera.minDepth && d < camera.maxDepth) { float w = (d <= camera.minDepth || d >= camera.maxDepth || u < -baseY.x || u > baseY.y || v < -base.z || v > base.z) ? 0.0f : 1.0f; @@ -286,7 +294,8 @@ __device__ inline int segmentID(int u, int v) { // FIXME: Ensure bad normals are removed by setting depth invalid //if (Ni.x+Ni.y+Ni.z == 0.0f) continue; - const float4 c = colour_in.tex2D(float(x+u) + 0.5f, float(y+v) + 0.5f); + //const float4 c = colour_in.tex2D(float(x+u) + 0.5f, float(y+v) + 0.5f); + const float4 c = make_float4(colour_in[x+u+(y+v)*colour_pitch]); w *= ftl::cuda::colourWeighting(c0,c,colour_smoothing); // Allow missing point to borrow z value @@ -300,7 +309,7 @@ __device__ inline int segmentID(int u, int v) { nX += Ni*w; contrib += w; //if (FILLING && w > 0.0f && v > -base.z+1 && v < base.w-1 && u > -baseY.x+1 && u < baseY.y-1) segment_check |= segmentID(u,v); - //} + } } } @@ -335,7 +344,8 @@ void ftl::cuda::colour_mls_smooth_csr( ftl::cuda::TextureObject<half4> &normals_out, ftl::cuda::TextureObject<float> &depth_in, ftl::cuda::TextureObject<float> &depth_out, - ftl::cuda::TextureObject<uchar4> &colour_in, + //ftl::cuda::TextureObject<uchar4> &colour_in, + const cv::cuda::GpuMat &colour_in, float smoothing, float colour_smoothing, bool filling, @@ -346,9 +356,9 @@ void ftl::cuda::colour_mls_smooth_csr( const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); if (filling) { - colour_mls_smooth_csr_kernel<true,5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera); + colour_mls_smooth_csr_kernel<true,5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera); } else { - colour_mls_smooth_csr_kernel<false,5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera); + colour_mls_smooth_csr_kernel<false,5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera); } cudaSafeCall( cudaGetLastError() ); @@ -593,7 +603,8 @@ void ftl::cuda::mls_aggr_horiz( ftl::cuda::TextureObject<half4> &normals_out, ftl::cuda::TextureObject<float> &depth_in, ftl::cuda::TextureObject<float4> ¢roid_out, - ftl::cuda::TextureObject<uchar4> &colour_in, + //ftl::cuda::TextureObject<uchar4> &colour_in, + const cv::cuda::GpuMat &colour_in, float smoothing, float colour_smoothing, int radius, @@ -607,13 +618,13 @@ void ftl::cuda::mls_aggr_horiz( const dim3 blockSize(THREADS_X, THREADS_Y); switch(radius) { - case 1: mls_aggr_horiz_kernel<1><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, colour_in.devicePtr(), colour_in.pixelPitch(), smoothing, colour_smoothing, camera); break; - case 2: mls_aggr_horiz_kernel<2><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, colour_in.devicePtr(), colour_in.pixelPitch(), smoothing, colour_smoothing, camera); break; - case 3: mls_aggr_horiz_kernel<3><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, colour_in.devicePtr(), colour_in.pixelPitch(), smoothing, colour_smoothing, camera); break; - case 5: mls_aggr_horiz_kernel<5><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, colour_in.devicePtr(), colour_in.pixelPitch(), smoothing, colour_smoothing, camera); break; - case 10: mls_aggr_horiz_kernel<10><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, colour_in.devicePtr(), colour_in.pixelPitch(), smoothing, colour_smoothing, camera); break; - case 15: mls_aggr_horiz_kernel<15><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, colour_in.devicePtr(), colour_in.pixelPitch(), smoothing, colour_smoothing, camera); break; - case 20: mls_aggr_horiz_kernel<20><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, colour_in.devicePtr(), colour_in.pixelPitch(), smoothing, colour_smoothing, camera); break; + case 1: mls_aggr_horiz_kernel<1><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera); break; + case 2: mls_aggr_horiz_kernel<2><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera); break; + case 3: mls_aggr_horiz_kernel<3><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera); break; + case 5: mls_aggr_horiz_kernel<5><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera); break; + case 10: mls_aggr_horiz_kernel<10><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera); break; + case 15: mls_aggr_horiz_kernel<15><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera); break; + case 20: mls_aggr_horiz_kernel<20><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera); break; default: return; } cudaSafeCall( cudaGetLastError() ); diff --git a/components/operators/src/normals.cpp b/components/operators/src/normals.cpp index 09618fa4805a4e171c59c32f37b54df3298d5daa..d4831c56204598cb8537ab984caf806c6830c628 100644 --- a/components/operators/src/normals.cpp +++ b/components/operators/src/normals.cpp @@ -8,7 +8,7 @@ using ftl::operators::SmoothNormals; using ftl::codecs::Channel; using ftl::rgbd::Format; -Normals::Normals(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +Normals::Normals(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } @@ -39,7 +39,7 @@ bool Normals::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t st // ============================================================================= -NormalDot::NormalDot(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +NormalDot::NormalDot(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } @@ -71,7 +71,7 @@ bool NormalDot::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t // ============================================================================= -SmoothNormals::SmoothNormals(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +SmoothNormals::SmoothNormals(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } diff --git a/components/operators/src/nvopticalflow.cpp b/components/operators/src/nvopticalflow.cpp index 27a623b39485f0ac29ad1c914980db62f324d560..8c0d38a6937e08b1efba9f61601029b4bd9cad15 100644 --- a/components/operators/src/nvopticalflow.cpp +++ b/components/operators/src/nvopticalflow.cpp @@ -18,13 +18,13 @@ using ftl::operators::NVOpticalFlow; using cv::Size; using cv::cuda::GpuMat; -NVOpticalFlow::NVOpticalFlow(ftl::Configurable* cfg) : - ftl::operators::Operator(cfg), channel_in_{ftl::codecs::Channel::Colour,ftl::codecs::Channel::Colour2}, channel_out_{ftl::codecs::Channel::Flow,ftl::codecs::Channel::Flow2} { +NVOpticalFlow::NVOpticalFlow(ftl::operators::Graph *g, ftl::Configurable* cfg) : + ftl::operators::Operator(g, cfg), channel_in_{ftl::codecs::Channel::Colour,ftl::codecs::Channel::Colour2}, channel_out_{ftl::codecs::Channel::Flow,ftl::codecs::Channel::Flow2} { size_ = Size(0, 0); } -NVOpticalFlow::NVOpticalFlow(ftl::Configurable*cfg, const std::tuple<ftl::codecs::Channel,ftl::codecs::Channel,ftl::codecs::Channel,ftl::codecs::Channel> &channels) : ftl::operators::Operator(cfg) { +NVOpticalFlow::NVOpticalFlow(ftl::operators::Graph *g, ftl::Configurable*cfg, const std::tuple<ftl::codecs::Channel,ftl::codecs::Channel,ftl::codecs::Channel,ftl::codecs::Channel> &channels) : ftl::operators::Operator(g, cfg) { channel_in_[0] = std::get<0>(channels); channel_out_[0] = std::get<1>(channels); channel_in_[1] = std::get<2>(channels); diff --git a/components/operators/src/operator.cpp b/components/operators/src/operator.cpp index cdbeb0c00e9753bd8cb6d8ce21ba285b062505b5..4cc0f90598d49fc70eb6b48eec773a5733b6a459 100644 --- a/components/operators/src/operator.cpp +++ b/components/operators/src/operator.cpp @@ -10,7 +10,7 @@ using ftl::rgbd::FrameSet; using ftl::rgbd::Source; using ftl::codecs::Channel; -Operator::Operator(ftl::Configurable *config) : config_(config) { +Operator::Operator(ftl::operators::Graph *g, ftl::Configurable *config) : config_(config), graph_(g) { enabled_ = config_->value("enabled", true); config_->on("enabled", [this]() { @@ -35,7 +35,6 @@ bool Operator::apply(FrameSet &in, Frame &out, cudaStream_t stream) { Graph::Graph(nlohmann::json &config) : ftl::Configurable(config) { - cudaSafeCall( cudaStreamCreate(&stream_) ); busy_.clear(); } @@ -49,22 +48,43 @@ Graph::~Graph() { delete i; } } - cudaStreamDestroy(stream_); } -bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) { - if (!value("enabled", true)) return false; +cv::cuda::GpuMat &Graph::createBuffer(ftl::operators::Buffer b, uint32_t fid) { + if (fid > 32) throw FTL_Error("Too many frames for buffer"); + auto &v = buffers_[(uint32_t(b) << 8) + fid]; + valid_buffers_.insert((uint32_t(b) << 8) + fid); + return v; +} + +cv::cuda::GpuMat &Graph::getBuffer(ftl::operators::Buffer b, uint32_t fid) { + if (fid > 32) throw FTL_Error("Too many frames for buffer"); + if (!hasBuffer(b, fid)) throw FTL_Error("Buffer does not exist: " << int(b)); + auto &v = buffers_.at((uint32_t(b) << 8) + fid); + return v; +} + +bool Graph::hasBuffer(ftl::operators::Buffer b, uint32_t fid) const { + return valid_buffers_.count((uint32_t(b) << 8) + fid) > 0; +} + +bool Graph::apply(FrameSet &in, FrameSet &out, const std::function<void()> &cb) { + if (!value("enabled", true)) return true; + if (in.frames.size() < 1) return true; - auto stream_actual = (stream == 0) ? stream_ : stream; + auto stream_actual = in.frames[0].stream(); bool success = true; - if (in.frames.size() != out.frames.size()) return false; + if (in.frames.size() != out.frames.size()) return true; if (busy_.test_and_set()) { LOG(ERROR) << "Pipeline already in use: " << in.timestamp(); + //if (cb) cb(); return false; } + valid_buffers_.clear(); + for (auto &f : out.frames) { if (!f.hasOwn(Channel::Pipelines)) f.create<std::list<std::string>>(Channel::Pipelines); auto pls = f.set<std::list<std::string>>(Channel::Pipelines); @@ -73,7 +93,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) { for (auto &i : operators_) { if (i.instances.size() < 1) { - i.instances.push_back(i.maker->make()); + i.instances.push_back(i.maker->make(this)); } if (i.instances[0]->type() == Operator::Type::OneToOne) { @@ -82,7 +102,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) { //i.instances.push_back(i.maker->make()); //} if (in.frames.size() > 1 && i.instances.size() < 2 && !i.instances[0]->isMemoryHeavy()) { - i.instances.push_back(i.maker->make()); + i.instances.push_back(i.maker->make(this)); } for (size_t j=0; j<in.frames.size(); ++j) { @@ -94,6 +114,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) { if (instance->enabled()) { try { instance->apply(in.frames[j].cast<ftl::rgbd::Frame>(), out.frames[j].cast<ftl::rgbd::Frame>(), stream_actual); + //cudaSafeCall(cudaStreamSynchronize(stream_actual)); } catch (const std::exception &e) { LOG(ERROR) << "Operator exception for '" << instance->config()->getID() << "': " << e.what(); in.frames[j].message(ftl::data::Message::Error_OPERATOR_EXCEPTION, "Operator exception"); @@ -109,6 +130,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) { if (instance->enabled()) { try { instance->apply(in, out, stream_actual); + //cudaSafeCall(cudaStreamSynchronize(stream_actual)); } catch (const std::exception &e) { LOG(ERROR) << "Operator exception for '" << instance->config()->getID() << "': " << e.what(); if (in.frames.size() > 0) in.frames[0].message(ftl::data::Message::Error_OPERATOR_EXCEPTION, "Operator exception"); @@ -121,12 +143,17 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) { success = waitAll(stream_actual) && success; - if (stream == 0) { - cudaSafeCall(cudaStreamSynchronize(stream_actual)); + if (cb) { + cudaCallback(stream_actual, [this,cb]() { + busy_.clear(); + ftl::pool.push([cb](int id) { cb(); }); + }); + } else { + //cudaSafeCall(cudaStreamSynchronize(stream_actual)); + busy_.clear(); } - busy_.clear(); - return success; + return true; } bool Graph::waitAll(cudaStream_t stream) { @@ -143,17 +170,20 @@ bool Graph::waitAll(cudaStream_t stream) { return true; } -bool Graph::apply(Frame &in, Frame &out, cudaStream_t stream) { - if (!value("enabled", true)) return false; +bool Graph::apply(Frame &in, Frame &out, const std::function<void()> &cb) { + if (!value("enabled", true)) return true; - auto stream_actual = (stream == 0) ? stream_ : stream; + auto stream_actual = in.stream(); bool success = true; if (busy_.test_and_set()) { LOG(ERROR) << "Pipeline already in use: " << in.timestamp(); + //if (cb) cb(); return false; } + valid_buffers_.clear(); + if (!out.hasOwn(Channel::Pipelines)) out.create<std::list<std::string>>(Channel::Pipelines); auto pls = out.set<std::list<std::string>>(Channel::Pipelines); pls = getID(); @@ -161,7 +191,7 @@ bool Graph::apply(Frame &in, Frame &out, cudaStream_t stream) { for (auto &i : operators_) { // Make sure there are enough instances if (i.instances.size() < 1) { - i.instances.push_back(i.maker->make()); + i.instances.push_back(i.maker->make(this)); } auto *instance = i.instances[0]; @@ -169,6 +199,7 @@ bool Graph::apply(Frame &in, Frame &out, cudaStream_t stream) { if (instance->enabled()) { try { instance->apply(in, out, stream_actual); + //cudaSafeCall(cudaStreamSynchronize(stream_actual)); } catch (const std::exception &e) { LOG(ERROR) << "Operator exception for '" << instance->config()->getID() << "': " << e.what(); success = false; @@ -180,12 +211,18 @@ bool Graph::apply(Frame &in, Frame &out, cudaStream_t stream) { success = waitAll(stream_actual) && success; - if (stream == 0) { - cudaSafeCall(cudaStreamSynchronize(stream_actual)); + if (cb) { + cudaCallback(stream_actual, [this,cb]() { + busy_.clear(); + ftl::pool.push([cb](int id) { cb(); }); + }); + } else { + //cudaSafeCall(cudaStreamSynchronize(stream_actual)); + busy_.clear(); } - busy_.clear(); - return success; + //busy_.clear(); + return true; } ftl::Configurable *Graph::_append(ftl::operators::detail::ConstructionHelperBase *m) { diff --git a/components/operators/src/poser.cpp b/components/operators/src/poser.cpp index a03682767163c03b089ec985342ca1f846e19996..8c748e041651947ab4e2ce39fb1efabcaa4a02c5 100644 --- a/components/operators/src/poser.cpp +++ b/components/operators/src/poser.cpp @@ -12,7 +12,7 @@ static SHARED_MUTEX smtx; std::unordered_map<std::string,ftl::operators::Poser::PoseState> Poser::pose_db__; std::unordered_map<int,std::list<ftl::codecs::Shape3D*>> Poser::fs_shapes__; -Poser::Poser(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +Poser::Poser(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } diff --git a/components/operators/src/segmentation.cpp b/components/operators/src/segmentation.cpp index 91a314da2a534acf43a7d5d007872c5064a2c98c..e256dc091a5c7c701729ce005aaf4e11f3f76591 100644 --- a/components/operators/src/segmentation.cpp +++ b/components/operators/src/segmentation.cpp @@ -1,11 +1,15 @@ #include <ftl/operators/segmentation.hpp> #include "segmentation_cuda.hpp" +#include <opencv2/cudawarping.hpp> + +#include <loguru.hpp> using ftl::operators::CrossSupport; using ftl::operators::VisCrossSupport; using ftl::codecs::Channel; +using cv::cuda::GpuMat; -CrossSupport::CrossSupport(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +CrossSupport::CrossSupport(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } @@ -21,6 +25,27 @@ bool CrossSupport::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream return false; } + auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + + const auto &intrin = in.getLeft(); + cv::Size size(intrin.width, intrin.height); + + const GpuMat &rgb = in.get<GpuMat>(Channel::Colour); + if (rgb.empty()) return false; + + GpuMat rgb_buf; + if (rgb.size() != size) { + if (graph()->hasBuffer(Buffer::LowLeft, in.source())) { + rgb_buf = graph()->getBuffer(Buffer::LowLeft, in.source()); + } else { + auto &t = graph()->createBuffer(Buffer::LowLeft, in.source()); + cv::cuda::resize(rgb, t, size, 0, 0, cv::INTER_LINEAR, cvstream); + rgb_buf = t; + } + } else { + rgb_buf = rgb; + } + if (use_mask && !in.hasChannel(Channel::Support2)) { if (!in.hasChannel(Channel::Mask)) { out.message(ftl::data::Message::Warning_MISSING_CHANNEL, "Missing Mask channel in Support operator"); @@ -28,15 +53,15 @@ bool CrossSupport::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream } ftl::cuda::support_region( in.createTexture<uint8_t>(Channel::Mask), - out.createTexture<uchar4>(Channel::Support2, ftl::rgbd::Format<uchar4>(in.get<cv::cuda::GpuMat>(Channel::Colour).size())), + out.createTexture<uchar4>(Channel::Support2, ftl::rgbd::Format<uchar4>(rgb_buf.size())), config()->value("v_max", 5), config()->value("h_max", 5), config()->value("symmetric", false), stream ); } else if (!in.hasChannel(Channel::Support1)) { ftl::cuda::support_region( - in.createTexture<uchar4>(Channel::Colour), - out.createTexture<uchar4>(Channel::Support1, ftl::rgbd::Format<uchar4>(in.get<cv::cuda::GpuMat>(Channel::Colour).size())), + rgb_buf, + out.createTexture<uchar4>(Channel::Support1, ftl::rgbd::Format<uchar4>(rgb_buf.size())), config()->value("tau", 10.0f), config()->value("v_max", 5), config()->value("h_max", 5), @@ -50,7 +75,7 @@ bool CrossSupport::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream -VisCrossSupport::VisCrossSupport(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +VisCrossSupport::VisCrossSupport(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } diff --git a/components/operators/src/segmentation.cu b/components/operators/src/segmentation.cu index de1268931a7b91bb09f0e3dc6385a8d9c7a4a9d8..c8f780363622c00292aebea375e05eb98fb722c3 100644 --- a/components/operators/src/segmentation.cu +++ b/components/operators/src/segmentation.cu @@ -25,20 +25,20 @@ __device__ inline float cross<float>(float p1, float p2) { } template <typename T, bool SYM> -__device__ uchar4 calculate_support_region(const TextureObject<T> &img, int x, int y, float tau, int v_max, int h_max) { +__device__ uchar4 calculate_support_region(const T* __restrict__ img, int width, int height, int pitch, int x, int y, float tau, int v_max, int h_max) { int x_min = max(0, x - h_max); - int x_max = min(img.width()-1, static_cast<unsigned int>(x + h_max)); + int x_max = min(width-1, static_cast<unsigned int>(x + h_max)); int y_min = max(0, y - v_max); - int y_max = min(img.height()-1, static_cast<unsigned int>(y + v_max)); + int y_max = min(height-1, static_cast<unsigned int>(y + v_max)); uchar4 result = make_uchar4(0, 0, 0, 0); - auto colour = img.tex2D((float)x+0.5f,(float)y+0.5f); + auto colour = img[x+y*pitch]; auto prev_colour = colour; int u; for (u=x-1; u >= x_min; --u) { - auto next_colour = img.tex2D((float)u+0.5f,(float)y+0.5f); + auto next_colour = img[u+y*pitch]; if (cross(prev_colour, next_colour) > tau) { result.x = x - u - 1; break; @@ -49,7 +49,7 @@ __device__ uchar4 calculate_support_region(const TextureObject<T> &img, int x, i prev_colour = colour; for (u=x+1; u <= x_max; ++u) { - auto next_colour = img.tex2D((float)u+0.5f,(float)y+0.5f); + auto next_colour = img[u+y*pitch]; if (cross(prev_colour, next_colour) > tau) { result.y = u - x - 1; break; @@ -61,7 +61,7 @@ __device__ uchar4 calculate_support_region(const TextureObject<T> &img, int x, i int v; prev_colour = colour; for (v=y-1; v >= y_min; --v) { - auto next_colour = img.tex2D((float)x+0.5f,(float)v+0.5f); + auto next_colour = img[x+v*pitch]; if (cross(prev_colour, next_colour) > tau) { result.z = y - v - 1; break; @@ -72,7 +72,7 @@ __device__ uchar4 calculate_support_region(const TextureObject<T> &img, int x, i prev_colour = colour; for (v=y+1; v <= y_max; ++v) { - auto next_colour = img.tex2D((float)x+0.5f,(float)v+0.5f); + auto next_colour = img[x+v*pitch]; if (cross(prev_colour, next_colour) > tau) { result.w = v - y - 1; break; @@ -91,19 +91,19 @@ __device__ uchar4 calculate_support_region(const TextureObject<T> &img, int x, i return result; } -__device__ uchar4 calculate_support_region(const TextureObject<uint8_t> &img, int x, int y, int v_max, int h_max) { +__device__ uchar4 calculate_support_region(const uint8_t* __restrict__ img, int width, int height, int pitch, int x, int y, int v_max, int h_max) { int x_min = max(0, x - h_max); - int x_max = min(img.width()-1, static_cast<unsigned int>(x + h_max)); + int x_max = min(width-1, static_cast<unsigned int>(x + h_max)); int y_min = max(0, y - v_max); - int y_max = min(img.height()-1, static_cast<unsigned int>(y + v_max)); + int y_max = min(height-1, static_cast<unsigned int>(y + v_max)); uchar4 result = make_uchar4(0, 0, 0, 0); - Mask m1(img.tex2D(x,y)); + Mask m1(img[x+y*pitch]); int u; for (u=x-1; u >= x_min; --u) { - Mask m2(img.tex2D(u,y)); + Mask m2(img[u+y*pitch]); if (m2.isDiscontinuity()) { result.x = x - u - 1; break; @@ -112,7 +112,7 @@ __device__ uchar4 calculate_support_region(const TextureObject<uint8_t> &img, in if (u < x_min) result.x = x - x_min; for (u=x+1; u <= x_max; ++u) { - Mask m2(img.tex2D(u,y)); + Mask m2(img[u+y*pitch]); if (m2.isDiscontinuity()) { result.y = u - x - 1; break; @@ -122,7 +122,7 @@ __device__ uchar4 calculate_support_region(const TextureObject<uint8_t> &img, in int v; for (v=y-1; v >= y_min; --v) { - Mask m2(img.tex2D(x,v)); + Mask m2(img[x+v*pitch]); if (m2.isDiscontinuity()) { result.z = y - v - 1; break; @@ -131,7 +131,7 @@ __device__ uchar4 calculate_support_region(const TextureObject<uint8_t> &img, in if (v < y_min) result.z = y - y_min; for (v=y+1; v <= y_max; ++v) { - Mask m2(img.tex2D(x,v)); + Mask m2(img[x+v*pitch]); if (m2.isDiscontinuity()) { result.w = v - y - 1; break; @@ -150,26 +150,26 @@ __device__ uchar4 calculate_support_region(const TextureObject<uint8_t> &img, in } template <typename T, bool SYM> -__global__ void support_region_kernel(TextureObject<T> img, TextureObject<uchar4> region, float tau, int v_max, int h_max) { +__global__ void support_region_kernel(const T* __restrict__ img, int width, int height, int pitch, TextureObject<uchar4> region, float tau, int v_max, int h_max) { const int x = blockIdx.x*blockDim.x + threadIdx.x; const int y = blockIdx.y*blockDim.y + threadIdx.y; - if (x < 0 || y < 0 || x >= img.width() || y >= img.height()) return; + if (x < 0 || y < 0 || x >= width || y >= height) return; - region(x,y) = calculate_support_region<T,SYM>(img, x, y, tau, v_max, h_max); + region(x,y) = calculate_support_region<T,SYM>(img, width, height, pitch, x, y, tau, v_max, h_max); } -__global__ void support_region_kernel(TextureObject<uint8_t> img, TextureObject<uchar4> region, int v_max, int h_max) { +__global__ void support_region_kernel(const uint8_t* __restrict__ img, int width, int height, int pitch, TextureObject<uchar4> region, int v_max, int h_max) { const int x = blockIdx.x*blockDim.x + threadIdx.x; const int y = blockIdx.y*blockDim.y + threadIdx.y; - if (x < 0 || y < 0 || x >= img.width() || y >= img.height()) return; + if (x < 0 || y < 0 || x >= width || y >= height) return; - region(x,y) = calculate_support_region(img, x, y, v_max, h_max); + region(x,y) = calculate_support_region(img, width, height, pitch, x, y, v_max, h_max); } void ftl::cuda::support_region( - ftl::cuda::TextureObject<uchar4> &colour, + const cv::cuda::GpuMat &colour, ftl::cuda::TextureObject<uchar4> ®ion, float tau, int v_max, @@ -180,8 +180,8 @@ void ftl::cuda::support_region( const dim3 gridSize((region.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (region.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - if (sym) support_region_kernel<uchar4, true><<<gridSize, blockSize, 0, stream>>>(colour, region, tau, v_max, h_max); - else support_region_kernel<uchar4, false><<<gridSize, blockSize, 0, stream>>>(colour, region, tau, v_max, h_max); + if (sym) support_region_kernel<uchar4, true><<<gridSize, blockSize, 0, stream>>>((uchar4*)colour.data, colour.cols, colour.rows, colour.step/4, region, tau, v_max, h_max); + else support_region_kernel<uchar4, false><<<gridSize, blockSize, 0, stream>>>((uchar4*)colour.data, colour.cols, colour.rows, colour.step/4, region, tau, v_max, h_max); cudaSafeCall( cudaGetLastError() ); @@ -202,7 +202,7 @@ void ftl::cuda::support_region( const dim3 gridSize((region.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (region.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - support_region_kernel<float, true><<<gridSize, blockSize, 0, stream>>>(depth, region, tau, v_max, h_max); + support_region_kernel<float, true><<<gridSize, blockSize, 0, stream>>>(depth.devicePtr(), depth.width(), depth.height(), depth.pixelPitch(), region, tau, v_max, h_max); cudaSafeCall( cudaGetLastError() ); @@ -222,7 +222,7 @@ void ftl::cuda::support_region( const dim3 gridSize((region.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (region.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - support_region_kernel<<<gridSize, blockSize, 0, stream>>>(mask, region, v_max, h_max); + support_region_kernel<<<gridSize, blockSize, 0, stream>>>(mask.devicePtr(), mask.width(), mask.height(), mask.pixelPitch(), region, v_max, h_max); cudaSafeCall( cudaGetLastError() ); diff --git a/components/operators/src/segmentation_cuda.hpp b/components/operators/src/segmentation_cuda.hpp index 445ed7e6d0f2624808232e66d834a360c6d798e8..49faa4e87fb58df03da7f5332c5313e7f35121c1 100644 --- a/components/operators/src/segmentation_cuda.hpp +++ b/components/operators/src/segmentation_cuda.hpp @@ -7,7 +7,7 @@ namespace ftl { namespace cuda { void support_region( - ftl::cuda::TextureObject<uchar4> &colour, + const cv::cuda::GpuMat &colour, ftl::cuda::TextureObject<uchar4> ®ion, float tau, int v_max, int h_max, bool sym, cudaStream_t stream); diff --git a/components/operators/src/smoothing.cpp b/components/operators/src/smoothing.cpp index 0690744f84a508520b6e02089d8d2e19a08b14fd..347b6b7754b49363be98578a94e1d049b6ffec1a 100644 --- a/components/operators/src/smoothing.cpp +++ b/components/operators/src/smoothing.cpp @@ -18,7 +18,7 @@ using ftl::codecs::Channel; using ftl::rgbd::Format; using cv::cuda::GpuMat; -HFSmoother::HFSmoother(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +HFSmoother::HFSmoother(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } @@ -71,7 +71,7 @@ bool HFSmoother::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t // ====== Smoothing Channel ==================================================== -SmoothChannel::SmoothChannel(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +SmoothChannel::SmoothChannel(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } @@ -135,7 +135,7 @@ bool SmoothChannel::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStrea // ===== MLS =================================================================== -SimpleMLS::SimpleMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg), temp_(ftl::data::Frame::make_standalone()) { +SimpleMLS::SimpleMLS(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg), temp_(ftl::data::Frame::make_standalone()) { } @@ -184,7 +184,7 @@ bool SimpleMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t -ColourMLS::ColourMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg), temp_(ftl::data::Frame::make_standalone()) { +ColourMLS::ColourMLS(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg), temp_(ftl::data::Frame::make_standalone()) { } @@ -206,6 +206,23 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t } auto &temp = temp_.cast<ftl::rgbd::Frame>(); + auto size = in.get<GpuMat>(Channel::Depth).size(); + + auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + + const GpuMat &rgb = in.get<GpuMat>(Channel::Colour); + GpuMat rgb_buf; + if (rgb.size() != size) { + if (graph()->hasBuffer(Buffer::LowLeft, in.source())) { + rgb_buf = graph()->getBuffer(Buffer::LowLeft, in.source()); + } else { + auto &t = graph()->createBuffer(Buffer::LowLeft, in.source()); + cv::cuda::resize(rgb, t, size, 0, 0, cv::INTER_LINEAR, cvstream); + rgb_buf = t; + } + } else { + rgb_buf = rgb; + } // FIXME: Assume in and out are the same frame. for (int i=0; i<iters; ++i) { @@ -229,7 +246,8 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t temp.createTexture<half4>(Channel::Normals, ftl::rgbd::Format<half4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), in.createTexture<float>(Channel::Depth), temp.createTexture<float>(Channel::Depth, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), - in.createTexture<uchar4>(Channel::Colour), + //in.createTexture<uchar4>(Channel::Colour), + rgb_buf, thresh, col_smooth, filling, @@ -250,7 +268,7 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t // ====== Aggregating MLS ====================================================== -AggreMLS::AggreMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg), temp_(ftl::data::Frame::make_standalone()) { +AggreMLS::AggreMLS(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg), temp_(ftl::data::Frame::make_standalone()) { temp_.store(); } @@ -282,6 +300,22 @@ bool AggreMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t s normals_horiz_.create(size.height, size.width); centroid_vert_.create(size.width, size.height); + auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + + const GpuMat &rgb = in.get<GpuMat>(Channel::Colour); + GpuMat rgb_buf; + if (rgb.size() != size) { + if (graph()->hasBuffer(Buffer::LowLeft, in.source())) { + rgb_buf = graph()->getBuffer(Buffer::LowLeft, in.source()); + } else { + auto &t = graph()->createBuffer(Buffer::LowLeft, in.source()); + cv::cuda::resize(rgb, t, size, 0, 0, cv::INTER_LINEAR, cvstream); + rgb_buf = t; + } + } else { + rgb_buf = rgb; + } + // FIXME: Assume in and out are the same frame. for (int i=0; i<iters; ++i) { @@ -292,7 +326,8 @@ bool AggreMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t s normals_horiz_, in.createTexture<float>(Channel::Depth), centroid_horiz_, - in.createTexture<uchar4>(Channel::Colour), + //in.createTexture<uchar4>(Channel::Colour), + rgb_buf, thresh, col_smooth, radius, @@ -333,7 +368,8 @@ bool AggreMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t s temp.createTexture<half4>(Channel::Normals, ftl::rgbd::Format<half4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), in.createTexture<float>(Channel::Depth), temp.createTexture<float>(Channel::Depth, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), - in.createTexture<uchar4>(Channel::Colour), + //in.createTexture<uchar4>(Channel::Colour), + rgb_buf, thresh, col_smooth, false, @@ -352,7 +388,7 @@ bool AggreMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t s // ====== Adaptive MLS ========================================================= -AdaptiveMLS::AdaptiveMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg), temp_(ftl::data::Frame::make_standalone()) { +AdaptiveMLS::AdaptiveMLS(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg), temp_(ftl::data::Frame::make_standalone()) { } diff --git a/components/operators/src/smoothing_cuda.hpp b/components/operators/src/smoothing_cuda.hpp index 5454d91e2aea43c49ae806524a98bc0cc85ef086..c44c7787b9fe8dd18d6e148d96c8d71803ba368c 100644 --- a/components/operators/src/smoothing_cuda.hpp +++ b/components/operators/src/smoothing_cuda.hpp @@ -35,7 +35,8 @@ void colour_mls_smooth_csr( ftl::cuda::TextureObject<half4> &normals_out, ftl::cuda::TextureObject<float> &depth_in, ftl::cuda::TextureObject<float> &depth_out, - ftl::cuda::TextureObject<uchar4> &colour_in, + //ftl::cuda::TextureObject<uchar4> &colour_in, + const cv::cuda::GpuMat &colour_in, float smoothing, float colour_smoothing, bool filling, @@ -56,7 +57,8 @@ void mls_aggr_horiz( ftl::cuda::TextureObject<half4> &normals_out, ftl::cuda::TextureObject<float> &depth_in, ftl::cuda::TextureObject<float4> ¢roid_out, - ftl::cuda::TextureObject<uchar4> &colour_in, + //ftl::cuda::TextureObject<uchar4> &colour_in, + const cv::cuda::GpuMat &colour_in, float smoothing, float colour_smoothing, int radius, diff --git a/components/operators/src/weighting.cpp b/components/operators/src/weighting.cpp index 5864f8caf87ee241b690e23072e26e0e2f3ecb26..33f1a922d7ec8306899db071c43fd5eeac94f4e8 100644 --- a/components/operators/src/weighting.cpp +++ b/components/operators/src/weighting.cpp @@ -9,7 +9,7 @@ using ftl::operators::CullWeight; using ftl::operators::DegradeWeight; using ftl::codecs::Channel; -PixelWeights::PixelWeights(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +PixelWeights::PixelWeights(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } @@ -45,9 +45,10 @@ bool PixelWeights::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream Channel dchan = (in.hasChannel(Channel::Depth)) ? Channel::Depth : Channel::GroundTruth; if (!out.hasChannel(Channel::Mask)) { + cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); auto &m = out.create<cv::cuda::GpuMat>(Channel::Mask); m.create(in.get<cv::cuda::GpuMat>(dchan).size(), CV_8UC1); - m.setTo(cv::Scalar(0)); + m.setTo(cv::Scalar(0), cvstream); } if (output_normals) { @@ -76,7 +77,7 @@ bool PixelWeights::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream return true; } -CullWeight::CullWeight(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +CullWeight::CullWeight(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } @@ -102,7 +103,7 @@ bool CullWeight::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t -DegradeWeight::DegradeWeight(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +DegradeWeight::DegradeWeight(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { } diff --git a/components/renderers/cpp/src/CUDARender.cpp b/components/renderers/cpp/src/CUDARender.cpp index 1ed37cca6563ad40729bf81d68a985ce3e832ee0..dab897d5696f78ebffbec097485778bebd57c03b 100644 --- a/components/renderers/cpp/src/CUDARender.cpp +++ b/components/renderers/cpp/src/CUDARender.cpp @@ -99,7 +99,8 @@ CUDARender::CUDARender(nlohmann::json &config) : ftl::render::FSRenderer(config) } } - cudaSafeCall(cudaStreamCreate(&stream_)); + //cudaSafeCall(cudaStreamCreate(&stream_)); + stream_ = 0; last_frame_ = -1; temp_.store(); @@ -249,6 +250,9 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre int valid_count = 0; + // FIXME: Is it possible to remember previously if there should be depth? + bool use_depth = scene_->anyHasChannel(Channel::Depth) || scene_->anyHasChannel(Channel::GroundTruth); + // For each source depth map for (size_t i=0; i < scene_->frames.size(); ++i) { //if (!scene_->hasFrame(i)) continue; @@ -260,6 +264,11 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre continue; } + // We have the needed depth data? + if (use_depth && !f.hasOwn(Channel::Depth) && !f.hasOwn(Channel::GroundTruth)) { + continue; + } + ++valid_count; //auto pose = MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>()); @@ -272,20 +281,22 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre auto &screenbuffer = _getScreenBuffer(bufsize); // Calculate and save virtual view screen position of each source pixel - if (f.hasChannel(Channel::Depth)) { - ftl::cuda::screen_coord( - f.createTexture<float>(Channel::Depth), - depthbuffer, - screenbuffer, - params_, transform, f.getLeftCamera(), stream - ); - } else if (f.hasChannel(Channel::GroundTruth)) { - ftl::cuda::screen_coord( - f.createTexture<float>(Channel::GroundTruth), - depthbuffer, - screenbuffer, - params_, transform, f.getLeftCamera(), stream - ); + if (use_depth) { + if (f.hasChannel(Channel::Depth)) { + ftl::cuda::screen_coord( + f.createTexture<float>(Channel::Depth), + depthbuffer, + screenbuffer, + params_, transform, f.getLeftCamera(), stream + ); + } else if (f.hasChannel(Channel::GroundTruth)) { + ftl::cuda::screen_coord( + f.createTexture<float>(Channel::GroundTruth), + depthbuffer, + screenbuffer, + params_, transform, f.getLeftCamera(), stream + ); + } } else { // Constant depth version ftl::cuda::screen_coord( @@ -510,6 +521,8 @@ void CUDARender::begin(ftl::rgbd::Frame &out, ftl::codecs::Channel chan) { throw FTL_Error("Already rendering"); } + stream_ = out.stream(); + out_ = &out; const auto &camera = out.getLeftCamera(); cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream_); diff --git a/components/renderers/cpp/src/clipping.cu b/components/renderers/cpp/src/clipping.cu index 016e08855e6e042c90662a98d675d9e38ce344bb..d73e7a50bf2000ae6e2ba074280a21a0c7e880da 100644 --- a/components/renderers/cpp/src/clipping.cu +++ b/components/renderers/cpp/src/clipping.cu @@ -27,12 +27,15 @@ __global__ void clipping_kernel(ftl::cuda::TextureObject<float> depth, ftl::cuda const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - if (x < depth.width() && y < depth.height()) { - float d = depth(x,y); - float4 p = make_float4(camera.screenToCam(x,y,d), 0.0f); + const float sx = float(x) / float(colour.width()) * float(depth.width()); + const float sy = float(y) / float(colour.height()) * float(depth.height()); + + if (sx >= 0.0f && sx < depth.width() && sy < depth.height() && sy >= 0.0f) { + float d = depth(sx,sy); + float4 p = make_float4(camera.screenToCam(sx,sy,d), 0.0f); if (d <= camera.minDepth || d >= camera.maxDepth || isClipped(p, clip)) { - depth(x,y) = 0.0f; + depth(sx,sy) = 0.0f; colour(x,y) = make_uchar4(0,0,0,0); } } @@ -54,7 +57,7 @@ void ftl::cuda::clipping(ftl::cuda::TextureObject<float> &depth, const ftl::rgbd::Camera &camera, const ClipSpace &clip, cudaStream_t stream) { - const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 gridSize((colour.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); clipping_kernel<<<gridSize, blockSize, 0, stream>>>(depth, colour, camera, clip); diff --git a/components/renderers/cpp/src/colouriser.cpp b/components/renderers/cpp/src/colouriser.cpp index 574073d378414068adf66143c6beb95926e65b54..a8399d016b3229fe04d009d49ca9b902f4e6e68e 100644 --- a/components/renderers/cpp/src/colouriser.cpp +++ b/components/renderers/cpp/src/colouriser.cpp @@ -2,6 +2,7 @@ #include "splatter_cuda.hpp" #include <ftl/cuda/colour_cuda.hpp> #include <ftl/cuda/normals.hpp> +#include <ftl/operators/cuda/mask.hpp> #include <opencv2/cudaarithm.hpp> #include <opencv2/cudaimgproc.hpp> @@ -119,9 +120,7 @@ TextureObject<uchar4> &Colouriser::colourise(ftl::rgbd::Frame &f, Channel c, cud } switch (c) { - case Channel::Overlay : return f.createTexture<uchar4>(c); - case Channel::ColourHighRes : - case Channel::RightHighRes : + case Channel::Overlay : case Channel::Colour : case Channel::Colour2 : return _processColour(f,c,stream); case Channel::GroundTruth : diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp index e1021651ce1989cbf06b0a4b00fc04a1d457b844..41088cc226e34f4916c6919d8edc465906dbd499 100644 --- a/components/renderers/cpp/src/splatter_cuda.hpp +++ b/components/renderers/cpp/src/splatter_cuda.hpp @@ -165,11 +165,6 @@ namespace cuda { uchar4 bad_colour, cudaStream_t stream); - void show_mask( - ftl::cuda::TextureObject<uchar4> &colour, - ftl::cuda::TextureObject<uint8_t> &mask, - int id, uchar4 style, cudaStream_t stream); - void merge_convert_depth( ftl::cuda::TextureObject<int> &d1, ftl::cuda::TextureObject<float> &d2, diff --git a/components/rgbd-sources/include/ftl/rgbd/frame.hpp b/components/rgbd-sources/include/ftl/rgbd/frame.hpp index e3a77ce2fab9af14b2b3b1f6ebb0c34a348bf9a4..99cbd5a1a9e8eeb207bc57d9b161d9dc514971d5 100644 --- a/components/rgbd-sources/include/ftl/rgbd/frame.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/frame.hpp @@ -65,6 +65,7 @@ class VideoFrame { const cv::Mat &getCPU() const; const cv::cuda::GpuMat &getGPU() const; + /// gets cv::Mat for cv::Mat &setCPU(); cv::cuda::GpuMat &setGPU(); @@ -95,6 +96,8 @@ class Frame : public ftl::data::Frame { ftl::rgbd::Camera &setRight(); Eigen::Matrix4d &setPose(); + cv::Size getSize(ftl::codecs::Channel c=ftl::codecs::Channel::Left) const; + ftl::calibration::CalibrationData& setCalibration(); const ftl::calibration::CalibrationData& getCalibration() const; diff --git a/components/rgbd-sources/src/frame.cpp b/components/rgbd-sources/src/frame.cpp index a4ace9e46eca90c70fa2910645d5513496cf4fe9..f6379f977a1fd936c2910dbc0a0c8d03b8fdedd1 100644 --- a/components/rgbd-sources/src/frame.cpp +++ b/components/rgbd-sources/src/frame.cpp @@ -85,6 +85,7 @@ const cv::cuda::GpuMat &VideoFrame::getGPU() const { } cv::Mat &VideoFrame::setCPU() { + validhost = true; return host; } @@ -96,6 +97,7 @@ cv::cuda::GpuMat &VideoFrame::setGPU() { void ftl::rgbd::Frame::upload(ftl::codecs::Channel c) { auto &vframe = set<VideoFrame>(c); const auto &cpumat = vframe.getCPU(); + LOG(WARNING) << "Sync Upload: " << int(c); vframe.createGPU().upload(cpumat); } @@ -114,6 +116,18 @@ unsigned int ftl::rgbd::Frame::getOpenGL(ftl::codecs::Channel c) const { return vframe.getOpenGL(); } +cv::Size ftl::rgbd::Frame::getSize(ftl::codecs::Channel c) const { + if (hasChannel(c)) { + const auto &f = get<VideoFrame>(c); + if (f.isGPU()) { + return f.getGPU().size(); + } else { + return f.getCPU().size(); + } + } else { + throw FTL_Error("Channel does not exists: " << int(c)); + } +} const ftl::rgbd::Camera &ftl::rgbd::Frame::getLeftCamera() const { return std::get<0>(this->get<std::tuple<ftl::rgbd::Camera, ftl::codecs::Channel, int>>(ftl::codecs::Channel::Calibration)); diff --git a/components/rgbd-sources/src/sources/stereovideo/device.hpp b/components/rgbd-sources/src/sources/stereovideo/device.hpp index f575ea3aec6b23ccbfe755690e3e6bc4a94e7b6d..0b7d4effb35643ae0fddc90db3240da6d938775b 100644 --- a/components/rgbd-sources/src/sources/stereovideo/device.hpp +++ b/components/rgbd-sources/src/sources/stereovideo/device.hpp @@ -35,16 +35,11 @@ class Device : public Configurable { //virtual const std::vector<DeviceDetails> &listDevices()=0; virtual bool grab()=0; - virtual bool get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l, cv::cuda::GpuMat &r, cv::cuda::GpuMat &h_l, cv::Mat &h_r, StereoRectification *c, cv::cuda::Stream &stream)=0; + virtual bool get(ftl::rgbd::Frame &frame, StereoRectification *c, cv::cuda::Stream &stream)=0; virtual unsigned int width() const =0; virtual unsigned int height() const =0; - virtual unsigned int fullWidth() const =0; - virtual unsigned int fullHeight() const =0; - - inline bool hasHigherRes() const { return fullWidth() != width(); } - virtual double getTimestamp() const =0; virtual bool isStereo() const =0; diff --git a/components/rgbd-sources/src/sources/stereovideo/opencv.cpp b/components/rgbd-sources/src/sources/stereovideo/opencv.cpp index 4ce893c6b0b6e1b9ab25de43c3789ad8e3b40cf6..a781765a821df181ff8e34381d7ffd70d7ede103 100644 --- a/components/rgbd-sources/src/sources/stereovideo/opencv.cpp +++ b/components/rgbd-sources/src/sources/stereovideo/opencv.cpp @@ -140,14 +140,9 @@ OpenCVDevice::OpenCVDevice(nlohmann::json &config, bool stereo) width_ = frame.cols; height_ = frame.rows; - dwidth_ = value("depth_width", width_); - float aspect = float(height_) / float(width_); - dheight_ = value("depth_height", std::min(uint32_t(aspect*float(dwidth_)), height_)) & 0xFFFe; - // Allocate page locked host memory for fast GPU transfer - left_hm_ = cv::cuda::HostMem(dheight_, dwidth_, CV_8UC4); - right_hm_ = cv::cuda::HostMem(dheight_, dwidth_, CV_8UC4); - hres_hm_ = cv::cuda::HostMem(height_, width_, CV_8UC4); + left_hm_ = cv::cuda::HostMem(height_, width_, CV_8UC4); + right_hm_ = cv::cuda::HostMem(height_, width_, CV_8UC4); interpolation_ = value("inter_cubic", true) ? cv::INTER_CUBIC : cv::INTER_LINEAR; on("inter_cubic", [this](){ @@ -332,47 +327,34 @@ bool OpenCVDevice::grab() { return true; } -bool OpenCVDevice::get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l_out, cv::cuda::GpuMat &r_out, - cv::cuda::GpuMat &l_hres_out, cv::Mat &r_hres_out, StereoRectification *c, cv::cuda::Stream &stream) { +bool OpenCVDevice::get(ftl::rgbd::Frame &frame, StereoRectification *c, cv::cuda::Stream &stream) { Mat l, r ,hres; // Use page locked memory l = left_hm_.createMatHeader(); r = right_hm_.createMatHeader(); - hres = hres_hm_.createMatHeader(); - - Mat &lfull = (!hasHigherRes()) ? l : hres; - Mat &rfull = (!hasHigherRes()) ? r : rtmp_; if (!camera_a_) return false; - //std::future<bool> future_b; if (camera_b_) { - //future_b = std::move(ftl::pool.push([this,&rfull,&r,c,&r_out,&r_hres_out,&stream](int id) { - if (!camera_b_->retrieve(frame_r_)) { - LOG(ERROR) << "Unable to read frame from camera B"; - return false; - } else { - cv::cvtColor(frame_r_, rtmp2_, cv::COLOR_BGR2BGRA); - - //if (stereo_) { - c->rectify(rtmp2_, rfull, Channel::Right); - - if (hasHigherRes()) { - // TODO: Use threads? - cv::resize(rfull, r, r.size(), 0.0, 0.0, interpolation_); - r_hres_out = rfull; - } - else { - r_hres_out = Mat(); - } - //} - - r_out.upload(r, stream); - } - //return true; - //})); + if (!camera_b_->retrieve(frame_r_)) { + LOG(ERROR) << "Unable to read frame from camera B"; + return false; + } + else { + cv::cvtColor(frame_r_, rtmp_, cv::COLOR_BGR2BGRA); + + //if (stereo_) { + c->rectify(rtmp_, r, Channel::Right); + //} + + auto& f_right = frame.create<ftl::rgbd::VideoFrame>(Channel::Right); + cv::cuda::GpuMat& r_out = f_right.createGPU(); + cv::Mat &r_host = f_right.setCPU(); + r_out.upload(r, stream); + r.copyTo(r_host); + } } if (camera_b_) { @@ -383,11 +365,8 @@ bool OpenCVDevice::get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l_out, cv::cud return false; } - /*if (camera_b_ && !camera_b_->retrieve(rfull)) { - LOG(ERROR) << "Unable to read frame from camera B"; - return false; - }*/ - } else { + } + else { if (!camera_a_->read(frame_l_)) { LOG(ERROR) << "Unable to read frame from camera A"; return false; @@ -396,32 +375,19 @@ bool OpenCVDevice::get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l_out, cv::cud if (stereo_) { cv::cvtColor(frame_l_, ltmp_, cv::COLOR_BGR2BGRA); - //FTL_Profile("Rectification", 0.01); - //c->rectifyStereo(lfull, rfull); - c->rectify(ltmp_, lfull, Channel::Left); - - // Need to resize - //if (hasHigherRes()) { - // TODO: Use threads? - // cv::resize(rfull, r, r.size(), 0.0, 0.0, interpolation_); - //} - } else { - cv::cvtColor(frame_l_, lfull, cv::COLOR_BGR2BGRA); + c->rectify(ltmp_, l, Channel::Left); } - - if (hasHigherRes()) { - //FTL_Profile("Frame Resize", 0.01); - cv::resize(lfull, l, l.size(), 0.0, 0.0, interpolation_); - l_hres_out.upload(hres, stream); - } else { - l_hres_out = cv::cuda::GpuMat(); + else { + cv::cvtColor(frame_l_, l, cv::COLOR_BGR2BGRA); } { - //FTL_Profile("Upload", 0.05); + auto& f_left = frame.create<ftl::rgbd::VideoFrame>(Channel::Left); + cv::cuda::GpuMat& l_out = f_left.createGPU(); + cv::Mat &l_host = f_left.setCPU(); l_out.upload(l, stream); + l.copyTo(l_host); } - //r_out.upload(r, stream); if (!frame.hasChannel(Channel::Thumbnail)) { cv::Mat thumb; @@ -431,11 +397,6 @@ bool OpenCVDevice::get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l_out, cv::cud cv::imencode(".jpg", thumb, thumbdata, params); } - //if (camera_b_) { - //FTL_Profile("WaitCamB", 0.05); - //future_b.wait(); - //} - return true; } diff --git a/components/rgbd-sources/src/sources/stereovideo/opencv.hpp b/components/rgbd-sources/src/sources/stereovideo/opencv.hpp index 37389e63f286458a1eeb4f8180c534a98e556941..607360591b168b15c3d1b452f1c039b3827cb2f6 100644 --- a/components/rgbd-sources/src/sources/stereovideo/opencv.hpp +++ b/components/rgbd-sources/src/sources/stereovideo/opencv.hpp @@ -20,13 +20,10 @@ class OpenCVDevice : public ftl::rgbd::detail::Device { static std::vector<DeviceDetails> listDevices(); bool grab() override; - bool get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l, cv::cuda::GpuMat &r, cv::cuda::GpuMat &h_l, cv::Mat &h_r, StereoRectification *c, cv::cuda::Stream &stream) override; + bool get(ftl::rgbd::Frame &frame, StereoRectification *c, cv::cuda::Stream &stream) override; - unsigned int width() const override { return dwidth_; } - unsigned int height() const override { return dheight_; } - - unsigned int fullWidth() const override { return width_; } - unsigned int fullHeight() const override { return height_; } + unsigned int width() const override { return width_; } + unsigned int height() const override { return height_; } double getTimestamp() const override; diff --git a/components/rgbd-sources/src/sources/stereovideo/pylon.cpp b/components/rgbd-sources/src/sources/stereovideo/pylon.cpp index 57fea9555d845c29bd1c87b5f5a041c7012f31e4..b9690e2d8fed449b57552d08e147d7c3fa1e717d 100644 --- a/components/rgbd-sources/src/sources/stereovideo/pylon.cpp +++ b/components/rgbd-sources/src/sources/stereovideo/pylon.cpp @@ -94,17 +94,17 @@ PylonDevice::PylonDevice(nlohmann::json &config) } // Choose a good default depth res - width_ = value("depth_width", std::min(1280u,fullwidth_)) & 0xFFFe; - float aspect = float(fullheight_) / float(fullwidth_); - height_ = value("depth_height", std::min(uint32_t(aspect*float(width_)), fullheight_)) & 0xFFFe; + //width_ = value("depth_width", std::min(1280u,fullwidth_)) & 0xFFFe; + //float aspect = float(fullheight_) / float(fullwidth_); + //height_ = value("depth_height", std::min(uint32_t(aspect*float(width_)), fullheight_)) & 0xFFFe; - LOG(INFO) << "Depth resolution: " << width_ << "x" << height_; + //LOG(INFO) << "Depth resolution: " << width_ << "x" << height_; // Allocate page locked host memory for fast GPU transfer - left_hm_ = cv::cuda::HostMem(height_, width_, CV_8UC4); - right_hm_ = cv::cuda::HostMem(height_, width_, CV_8UC4); - hres_hm_ = cv::cuda::HostMem(fullheight_, fullwidth_, CV_8UC4); - rtmp_.create(fullheight_, fullwidth_, CV_8UC4); + left_hm_ = cv::cuda::HostMem(fullheight_, fullwidth_, CV_8UC4); + right_hm_ = cv::cuda::HostMem(fullheight_, fullwidth_, CV_8UC4); + //hres_hm_ = cv::cuda::HostMem(fullheight_, fullwidth_, CV_8UC4); + //rtmp_.create(fullheight_, fullwidth_, CV_8UC4); on("exposure", [this]() { if (lcam_->GetDeviceInfo().GetModelName() != "Emulation") { @@ -122,10 +122,29 @@ PylonDevice::PylonDevice(nlohmann::json &config) interpolation_ = value("inter_cubic", true) ? cv::INTER_CUBIC : cv::INTER_LINEAR; }); + + monitor_ = true; + temperature_monitor_ = ftl::timer::add(ftl::timer::timerlevel_t::kTimerIdle1, 10.0, [this](int64_t ts) { + float temperature = (rcam_) ? std::max(lcam_->DeviceTemperature(), rcam_->DeviceTemperature()) : lcam_->DeviceTemperature(); + + LOG_IF(WARNING, temperature > 53.0) + << "Camera temperature over 50C (value: " << temperature << ")"; + + // TODO: check actual temperature status. + if (temperature > 65.0) { + LOG(FATAL) << "Cameras are overheating"; + } + + return true; + }); } PylonDevice::~PylonDevice() { + monitor_ = false; + temperature_monitor_.cancel(); + lcam_->Close(); + rcam_->Close(); } static std::vector<ftl::rgbd::detail::DeviceDetails> pylon_devices; @@ -226,24 +245,14 @@ bool PylonDevice::_retrieveFrames(Pylon::CGrabResultPtr &result, Pylon::CBaslerU return true; } -bool PylonDevice::get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l_out, cv::cuda::GpuMat &r_out, cv::cuda::GpuMat &h_l, cv::Mat &h_r, StereoRectification *c, cv::cuda::Stream &stream) { +bool PylonDevice::get(ftl::rgbd::Frame &frame, StereoRectification *c, cv::cuda::Stream &stream) { if (!isReady()) return false; - Mat l, r ,hres; + Mat l, r; // Use page locked memory l = left_hm_.createMatHeader(); r = right_hm_.createMatHeader(); - hres = hres_hm_.createMatHeader(); - - Mat &lfull = (!hasHigherRes()) ? l : hres; - Mat &rfull = (!hasHigherRes()) ? r : rtmp_; - - //ftl::cuda::setDevice(); - - //int dev; - //cudaGetDevice(&dev); - //LOG(INFO) << "Current cuda device = " << dev; if (isStereo()) { auto lcount = lcam_->NumReadyBuffers.GetValue(); @@ -283,50 +292,42 @@ bool PylonDevice::get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l_out, cv::cuda try { FTL_Profile("Frame Retrieve", 0.005); - //std::future<bool> future_b; bool res_r = false; + if (rcam_) { - //future_b = std::move(ftl::pool.push([this,&rfull,&r,&l,c,&r_out,&h_r,&stream](int id) { - Pylon::CGrabResultPtr result_right; - - if (_retrieveFrames(result_right, rcam_)) { - - cv::Mat wrap_right( - result_right->GetHeight(), - result_right->GetWidth(), - CV_8UC1, - (uint8_t*)result_right->GetBuffer()); - - { - FTL_Profile("Bayer Colour (R)", 0.005); - cv::cvtColor(wrap_right, rtmp2_, cv::COLOR_BayerRG2BGRA); - } - - //if (isStereo()) { - FTL_Profile("Rectify and Resize (R)", 0.005); - c->rectify(rtmp2_, rfull, Channel::Right); - - if (hasHigherRes()) { - cv::resize(rfull, r, r.size(), 0.0, 0.0, interpolation_); - h_r = rfull; - } - else { - h_r = Mat(); - } - //} - - r_out.upload(r, stream); - res_r = true; + Pylon::CGrabResultPtr result_right; + + if (_retrieveFrames(result_right, rcam_)) { + + cv::Mat wrap_right( + result_right->GetHeight(), + result_right->GetWidth(), + CV_8UC1, + (uint8_t*)result_right->GetBuffer()); + + { + FTL_Profile("Bayer Colour (R)", 0.005); + cv::cvtColor(wrap_right, rtmp_, cv::COLOR_BayerRG2BGRA); } - //})); + + { + FTL_Profile("Rectify (R)", 0.005); + c->rectify(rtmp_, r, Channel::Right); + } + + auto& f_right = frame.create<ftl::rgbd::VideoFrame>(Channel::Right); + cv::cuda::GpuMat& r_out = f_right.createGPU(); + cv::Mat &r_host = f_right.setCPU(); + + r_out.upload(r, stream); + r.copyTo(r_host); + res_r = true; + } } Pylon::CGrabResultPtr result_left; if (!_retrieveFrames(result_left, lcam_)) { - if (rcam_) { - //future_b.wait(); - } return false; } @@ -339,28 +340,24 @@ bool PylonDevice::get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l_out, cv::cuda { FTL_Profile("Bayer Colour (L)", 0.005); if (isStereo()) cv::cvtColor(wrap_left, ltmp_, cv::COLOR_BayerRG2BGRA); - else cv::cvtColor(wrap_left, lfull, cv::COLOR_BayerRG2BGRA); + else cv::cvtColor(wrap_left, l, cv::COLOR_BayerRG2BGRA); } { - FTL_Profile("Rectify and Resize (L)", 0.005); + FTL_Profile("Rectify (L)", 0.005); if (isStereo()) { - c->rectify(ltmp_, lfull, Channel::Left); - } - - if (hasHigherRes()) { - cv::resize(lfull, l, l.size(), 0.0, 0.0, interpolation_); - h_l.upload(hres, stream); - } else { - h_l = cv::cuda::GpuMat(); + c->rectify(ltmp_, l, Channel::Left); } } + auto& f_left = frame.create<ftl::rgbd::VideoFrame>(Channel::Left); + cv::cuda::GpuMat& l_out = f_left.createGPU(); + cv::Mat &l_host = f_left.setCPU(); + l_out.upload(l, stream); + l.copyTo(l_host); if (rcam_) { - //future_b.wait(); - //if (!future_b.get()) return false; if (!res_r) return false; } diff --git a/components/rgbd-sources/src/sources/stereovideo/pylon.hpp b/components/rgbd-sources/src/sources/stereovideo/pylon.hpp index 707e8f437dd971d71ed5ec95ed28592b2f9145be..096d504acf71c97e7600ed86b5ea9e811bd54a00 100644 --- a/components/rgbd-sources/src/sources/stereovideo/pylon.hpp +++ b/components/rgbd-sources/src/sources/stereovideo/pylon.hpp @@ -22,13 +22,10 @@ class PylonDevice : public ftl::rgbd::detail::Device { static std::vector<DeviceDetails> listDevices(); bool grab() override; - bool get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l, cv::cuda::GpuMat &r, cv::cuda::GpuMat &h_l, cv::Mat &h_r, StereoRectification *c, cv::cuda::Stream &stream) override; + bool get(ftl::rgbd::Frame &frame, StereoRectification *c, cv::cuda::Stream &stream) override; - unsigned int width() const override { return width_; } - unsigned int height() const override { return height_; }; - - unsigned int fullWidth() const override { return fullwidth_; } - unsigned int fullHeight() const override { return fullheight_; } + unsigned int width() const override { return fullwidth_; } + unsigned int height() const override { return fullheight_; } double getTimestamp() const override { return 0.0; } @@ -45,8 +42,6 @@ class PylonDevice : public ftl::rgbd::detail::Device { cv::Mat tmp_; uint32_t fullwidth_; uint32_t fullheight_; - uint32_t width_; - uint32_t height_; std::string name_; std::string serial_; int left_fail_=0; @@ -55,12 +50,13 @@ class PylonDevice : public ftl::rgbd::detail::Device { cv::cuda::HostMem left_hm_; cv::cuda::HostMem right_hm_; - cv::cuda::HostMem hres_hm_; cv::Mat rtmp_; - cv::Mat rtmp2_; cv::Mat ltmp_; int interpolation_; + std::atomic_bool monitor_; + ftl::Handle temperature_monitor_; + void _configureCamera(Pylon::CBaslerUniversalInstantCamera *cam); bool _retrieveFrames(Pylon::CGrabResultPtr &result, Pylon::CBaslerUniversalInstantCamera *cam); }; diff --git a/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp b/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp index aade34c89124470862ed3bc7cd7a72c506f777f2..192697d2ac240ad434e3a21b9ad35ff816cfffe4 100644 --- a/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp +++ b/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp @@ -74,6 +74,9 @@ ftl::rgbd::detail::Device::~Device() { StereoVideoSource::StereoVideoSource(ftl::rgbd::Source *host) : ftl::rgbd::BaseSourceImpl(host), ready_(false) { + + cudaSafeCall( cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) ); + auto uri = host->get<std::string>("uri"); if (uri) { init(*uri); @@ -86,12 +89,15 @@ StereoVideoSource::StereoVideoSource(ftl::rgbd::Source *host) StereoVideoSource::StereoVideoSource(ftl::rgbd::Source *host, const string &file) : ftl::rgbd::BaseSourceImpl(host), ready_(false) { + cudaSafeCall( cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) ); init(file); } StereoVideoSource::~StereoVideoSource() { + cudaStreamDestroy(stream_); + delete lsrc_; - if (pipeline_input_) delete pipeline_input_; + //if (pipeline_input_) delete pipeline_input_; } bool StereoVideoSource::supported(const std::string &dev) { @@ -143,15 +149,21 @@ void StereoVideoSource::init(const string &file) { if (!lsrc_) return; // throw? - color_size_ = cv::Size(lsrc_->width(), lsrc_->height()); + cv::Size size_full = cv::Size(lsrc_->width(), lsrc_->height()); - pipeline_input_ = ftl::config::create<ftl::operators::Graph>(host_, "input"); - #ifdef HAVE_OPTFLOW - pipeline_input_->append<ftl::operators::NVOpticalFlow>("optflow"); - #endif - pipeline_input_->append<ftl::operators::ColourChannels>("colour"); + // Choose a good default depth res + int w = lsrc_->value("depth_width", std::min(1280,size_full.width)) & 0xFFFe; + float aspect = float(size_full.height) / float(size_full.width); + int h = lsrc_->value("depth_height", std::min(int(aspect*float(w)), size_full.height)) & 0xFFFe; + + depth_size_ = cv::Size(w, h); + + //pipeline_input_ = ftl::config::create<ftl::operators::Graph>(host_, "input"); + //#ifdef HAVE_OPTFLOW + //pipeline_input_->append<ftl::operators::NVOpticalFlow>("optflow"); + //#endif + //pipeline_input_->append<ftl::operators::ColourChannels>("colour"); - cv::Size size_full = cv::Size(lsrc_->fullWidth(), lsrc_->fullHeight()); rectification_ = std::unique_ptr<StereoRectification> (ftl::create<StereoRectification>(host_, "rectification", size_full)); @@ -248,11 +260,11 @@ void StereoVideoSource::updateParameters(ftl::rgbd::Frame &frame) { cv::cv2eigen(calibration_.origin * rectification_->getPose(Channel::Left), pose); frame.setPose() = pose; - cv::Mat K = rectification_->cameraMatrix(color_size_); + cv::Mat K = rectification_->cameraMatrix(depth_size_); float fx = static_cast<float>(K.at<double>(0,0)); float baseline = static_cast<float>(rectification_->baseline()); - float doff = rectification_->doff(color_size_); + float doff = rectification_->doff(depth_size_); double d_resolution = this->host_->getConfig().value<double>("depth_resolution", 0.0); float min_depth = this->host_->getConfig().value<double>("min_depth", 0.45); @@ -270,8 +282,8 @@ void StereoVideoSource::updateParameters(ftl::rgbd::Frame &frame) { static_cast<float>(K.at<double>(1,1)), // Fy static_cast<float>(-K.at<double>(0,2)), // Cx static_cast<float>(-K.at<double>(1,2)), // Cy - (unsigned int) color_size_.width, - (unsigned int) color_size_.height, + (unsigned int) depth_size_.width, + (unsigned int) depth_size_.height, min_depth, max_depth, baseline, @@ -288,16 +300,16 @@ void StereoVideoSource::updateParameters(ftl::rgbd::Frame &frame) { Eigen::Matrix4d pose; auto& params = frame.setLeft(); - params.cx = -(color_size_.width / 2.0); - params.cy = -(color_size_.height / 2.0); + params.cx = -(depth_size_.width / 2.0); + params.cy = -(depth_size_.height / 2.0); params.fx = 700.0; params.fy = 700.0; params.maxDepth = host_->value("size", 1.0f); params.minDepth = 0.0f; params.doffs = 0.0; params.baseline = 0.1f; - params.width = color_size_.width; - params.height = color_size_.height;; + params.width = depth_size_.width; + params.height = depth_size_.height;; float offsetz = host_->value("offset_z", 0.0f); //state_.setPose(matrix(cv::Vec3d(0.0, 3.14159, 0.0), cv::Vec3d(0.0,0.0,params_.maxDepth+offsetz))); @@ -326,6 +338,7 @@ void StereoVideoSource::updateParameters(ftl::rgbd::Frame &frame) { bool StereoVideoSource::capture(int64_t ts) { cap_status_ = lsrc_->grab(); + if (!cap_status_) LOG(WARNING) << "Capture failed"; return cap_status_; } @@ -353,31 +366,12 @@ bool StereoVideoSource::retrieve(ftl::rgbd::Frame &frame) { do_update_params_ = false; } - cv::cuda::GpuMat gpu_dummy; - cv::Mat dummy; - auto &hres = (lsrc_->hasHigherRes()) ? frame.create<cv::cuda::GpuMat>(Channel::ColourHighRes) : gpu_dummy; - auto &hres_r = (lsrc_->hasHigherRes()) ? frame.create<cv::Mat>(Channel::RightHighRes) : dummy; - - if (lsrc_->isStereo()) { - cv::cuda::GpuMat &left = frame.create<cv::cuda::GpuMat>(Channel::Left); - cv::cuda::GpuMat &right = frame.create<cv::cuda::GpuMat>(Channel::Right); - if (!lsrc_->get(frame, left, right, hres, hres_r, rectification_.get(), stream2_)) { - frame.remove(Channel::Left); - frame.remove(Channel::Right); - } - } - else { - cv::cuda::GpuMat &left = frame.create<cv::cuda::GpuMat>(Channel::Left); - cv::cuda::GpuMat right; - if (!lsrc_->get(frame, left, right, hres, hres_r, rectification_.get(), stream2_)) { - frame.remove(Channel::Left); - } - } + auto cvstream = cv::cuda::StreamAccessor::wrapStream(frame.stream()); - //LOG(INFO) << "Channel size: " << hres.size(); + lsrc_->get(frame, rectification_.get(), cvstream); - pipeline_input_->apply(frame, frame, cv::cuda::StreamAccessor::getStream(stream2_)); - stream2_.waitForCompletion(); + cudaSafeCall(cudaEventRecord(frame.uploadEvent(), frame.stream())); + // FIXME: Currently possible that previous upload not finished return true; } diff --git a/components/rgbd-sources/src/sources/stereovideo/stereovideo.hpp b/components/rgbd-sources/src/sources/stereovideo/stereovideo.hpp index dd08f4f68baf5809a7c0eecf3911c58e658d452d..abd8652432eb6b9dbd8e876ef3fd03f86a791495 100644 --- a/components/rgbd-sources/src/sources/stereovideo/stereovideo.hpp +++ b/components/rgbd-sources/src/sources/stereovideo/stereovideo.hpp @@ -42,7 +42,7 @@ private: int64_t capts_; - cv::Size color_size_; + //cv::Size color_size_; cv::Size depth_size_; ftl::operators::Graph *pipeline_input_=nullptr; @@ -55,10 +55,8 @@ private: bool do_update_params_ = false; bool cap_status_ = false; - cv::cuda::Stream stream_; - cv::cuda::Stream stream2_; - cv::Mat mask_l_; + cudaStream_t stream_; ftl::Handle calibration_change_; std::string fname_calib_; diff --git a/components/streams/include/ftl/streams/feed.hpp b/components/streams/include/ftl/streams/feed.hpp index ac141a045950c5e266199fe8a1145641d82314cb..4aab46ca29d8106d955022529b7699e152511c22 100644 --- a/components/streams/include/ftl/streams/feed.hpp +++ b/components/streams/include/ftl/streams/feed.hpp @@ -235,6 +235,7 @@ private: void _createPipeline(uint32_t fsid); ftl::operators::Graph* _addPipeline(uint32_t fsid); + void _dispatch(const ftl::data::FrameSetPtr &fs); void _beginRecord(Filter *f); void _stopRecording(); diff --git a/components/streams/include/ftl/streams/receiver.hpp b/components/streams/include/ftl/streams/receiver.hpp index 7febc9e416794b762f7df9a02b0f385840c071a3..07f90efeec27fb76a24f43aa09532038e4c19b67 100644 --- a/components/streams/include/ftl/streams/receiver.hpp +++ b/components/streams/include/ftl/streams/receiver.hpp @@ -93,6 +93,7 @@ class Receiver : public ftl::Configurable, public ftl::data::Generator { InternalVideoStates &_getVideoFrame(const ftl::codecs::StreamPacket &spkt, int ix=0); InternalAudioStates &_getAudioFrame(const ftl::codecs::StreamPacket &spkt, int ix=0); void _finishPacket(ftl::streams::LockedFrameSet &fs, size_t fix); + void _terminateVideoPacket(const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet &pkt); }; } diff --git a/components/streams/src/feed.cpp b/components/streams/src/feed.cpp index 878497ac584577d53bfde1ecf9f3cf26eeac50f6..6822813cb30c7bddb12c39c6384ad89ac130913f 100644 --- a/components/streams/src/feed.cpp +++ b/components/streams/src/feed.cpp @@ -179,44 +179,26 @@ Feed::Feed(nlohmann::json &config, ftl::net::Universe*net) : SHARED_LOCK(mtx_, lk); if (pre_pipelines_.count(fs->frameset()) == 1) { - pipeline = pre_pipelines_[fs->frameset()]; //->apply(*fs, *fs, 0); + pipeline = pre_pipelines_[fs->frameset()]; } lk.unlock(); - if (pipeline) pipeline->apply(*fs, *fs, 0); + bool did_pipe = false; - lk.lock(); - - std::atomic_store(&latest_.at(fs->frameset()), fs); - - if (fs->hasAnyChanged(Channel::Thumbnail)) { - _saveThumbnail(fs); - } + if (pipeline) { + bool did_pipe = pipeline->apply(*fs, *fs, [this,fs]() { + _dispatch(fs); + }); - for (auto* filter : filters_) { - // TODO: smarter update (update only when changed) instead of - // filter->channels_available_ = fs->channels(); - - if (filter->sources().empty()) { - //filter->channels_available_ = fs->channels(); - filter->handler_.triggerParallel(fs); - } - else { - // TODO: process partial/complete sets here (drop), that is - // intersection filter->sources() and fs->sources() is - // same as filter->sources(). - - // TODO: reverse map source ids required here? - for (const auto& src : filter->sources()) { - //if (fs->hasFrame(src)) { - if (fs->frameset() == src) { - //filter->channels_available_ = fs->channels(); - filter->handler_.triggerParallel(fs); - break; - } - } + if (!did_pipe) { + LOG(WARNING) << "Feed Pipeline dropped"; + ftl::pool.push([this,fs](int id) { + _dispatch(fs); + }); } + } else { + _dispatch(fs); } return true; @@ -270,6 +252,41 @@ Feed::~Feed() { } } +void Feed::_dispatch(const ftl::data::FrameSetPtr &fs) { + SHARED_LOCK(mtx_, lk); + + std::atomic_store(&latest_.at(fs->frameset()), fs); + + if (fs->hasAnyChanged(Channel::Thumbnail)) { + _saveThumbnail(fs); + } + + for (auto* filter : filters_) { + // TODO: smarter update (update only when changed) instead of + // filter->channels_available_ = fs->channels(); + + if (filter->sources().empty()) { + //filter->channels_available_ = fs->channels(); + filter->handler_.triggerParallel(fs); + } + else { + // TODO: process partial/complete sets here (drop), that is + // intersection filter->sources() and fs->sources() is + // same as filter->sources(). + + // TODO: reverse map source ids required here? + for (const auto& src : filter->sources()) { + //if (fs->hasFrame(src)) { + if (fs->frameset() == src) { + //filter->channels_available_ = fs->channels(); + filter->handler_.triggerParallel(fs); + break; + } + } + } + } +} + void Feed::_saveThumbnail(const ftl::data::FrameSetPtr& fs) { // TODO: Put thumb somewhere here... } @@ -457,8 +474,6 @@ void Feed::_createPipeline(uint32_t fsid) { p->append<ftl::operators::DepthChannel>("depth")->value("enabled", false); p->append<ftl::operators::ClipScene>("clipping")->value("enabled", false); p->append<ftl::operators::ColourChannels>("colour"); // Convert BGR to BGRA - p->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false); - p->append<ftl::operators::ArUco>("aruco")->value("enabled", false); //p->append<ftl::operators::HFSmoother>("hfnoise"); p->append<ftl::operators::CrossSupport>("cross"); p->append<ftl::operators::PixelWeights>("weights"); @@ -468,8 +483,11 @@ void Feed::_createPipeline(uint32_t fsid) { p->append<ftl::operators::BorderMask>("border_mask"); p->append<ftl::operators::CullDiscontinuity>("remove_discontinuity"); p->append<ftl::operators::MultiViewMLS>("mvmls")->value("enabled", false); + p->append<ftl::operators::DisplayMask>("display_mask")->value("enabled", false); p->append<ftl::operators::Poser>("poser")->value("enabled", true); p->append<ftl::operators::GTAnalysis>("gtanalyse"); + p->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false); + p->append<ftl::operators::ArUco>("aruco")->value("enabled", false); } } @@ -632,7 +650,7 @@ std::set<ftl::stream::SourceInfo> Feed::recentSources() { std::vector<std::string> Feed::availableDeviceSources() { std::vector<std::string> results; - if (ftl::rgbd::Source::supports("device:pylon")) results.emplace_back("device:pylon"); + //if (ftl::rgbd::Source::supports("device:pylon")) results.emplace_back("device:pylon"); if (ftl::rgbd::Source::supports("device:camera")) results.emplace_back("device:camera"); if (ftl::rgbd::Source::supports("device:stereo")) results.emplace_back("device:stereo"); if (ftl::rgbd::Source::supports("device:screen")) results.emplace_back("device:screen"); diff --git a/components/streams/src/filestream.cpp b/components/streams/src/filestream.cpp index 3d71fffb3508c9b3ed192a03059e5beba2cfe67b..35b48d217d4fceb7c289646c90af376e78365d14 100644 --- a/components/streams/src/filestream.cpp +++ b/components/streams/src/filestream.cpp @@ -235,7 +235,7 @@ bool File::tick(int64_t ts) { // Check buffer first for frames already read { - UNIQUE_LOCK(data_mutex_, dlk); + //UNIQUE_LOCK(data_mutex_, dlk); if (data_.size() > 0) has_data = true; if (needs_endframe_) { @@ -245,7 +245,7 @@ bool File::tick(int64_t ts) { size_t frame_count = 0; - for (auto i = data_.begin(); i != data_.end(); ++i) { + for (auto i = data_.begin(); i != data_.end(); ) { if (std::get<0>(*i).timestamp <= timestamp_) { auto &spkt = std::get<0>(*i); auto &pkt = std::get<1>(*i); @@ -268,7 +268,10 @@ bool File::tick(int64_t ts) { } } - ftl::pool.push([this,i](int id) { + auto j = i; + ++i; + + ftl::pool.push([this,i=j](int id) { auto &spkt = std::get<0>(*i); auto &pkt = std::get<1>(*i); diff --git a/components/streams/src/netstream.cpp b/components/streams/src/netstream.cpp index 62864077276916106d17c53bb3bfc5ce051b03b2..fcce08aecabd2db49c86e69652483a68e7b34b24 100644 --- a/components/streams/src/netstream.cpp +++ b/components/streams/src/netstream.cpp @@ -69,9 +69,9 @@ Net::Net(nlohmann::json &config, ftl::net::Universe *net) : Stream(config), acti abr_->setMaxRate(static_cast<uint8_t>(std::max(0, std::min(255, value("max_bitrate", 200))))); }); - abr_enabled_ = value("abr_enabled", true); + abr_enabled_ = value("abr_enabled", false); on("abr_enabled", [this]() { - abr_enabled_ = value("abr_enabled", true); + abr_enabled_ = value("abr_enabled", false); bitrate_ = (abr_enabled_) ? abr_->current() : static_cast<uint8_t>(std::max(0, std::min(255, value("bitrate", 64)))); diff --git a/components/streams/src/receiver.cpp b/components/streams/src/receiver.cpp index c0cdbf04f890d5beea81b1873059a1257cb7e610..b53fc49a7beea2f4521dbc60757c02fbdf1cafde 100644 --- a/components/streams/src/receiver.cpp +++ b/components/streams/src/receiver.cpp @@ -247,6 +247,15 @@ namespace sgm { } } +void Receiver::_terminateVideoPacket(const StreamPacket &spkt, const Packet &pkt) { + auto &build = builder(spkt.streamID); + auto fs = build.get(spkt.timestamp, spkt.frame_number+pkt.frame_count-1); + if (fs) { + fs->localTimestamp = spkt.localTimestamp; + _finishPacket(fs, spkt.frame_number); + } +} + void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) { FTL_Profile("VideoPacket", 0.02); @@ -257,6 +266,7 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) { if (tx == 0 || ty == 0) { LOG(ERROR) << "No Packets"; + _terminateVideoPacket(spkt, pkt); return; } @@ -268,6 +278,7 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) { auto *decoder = ividstate.decoders[channum]; if (!decoder) { LOG(ERROR) << "No frame decoder available"; + _terminateVideoPacket(spkt, pkt); return; } @@ -276,10 +287,12 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) { FTL_Profile("Decode", 0.015); if (!decoder->decode(pkt, surface)) { LOG(ERROR) << "Decode failed on channel " << (int)spkt.channel; + _terminateVideoPacket(spkt, pkt); return; } } catch (std::exception &e) { LOG(ERROR) << "Decode failed for " << spkt.timestamp << ": " << e.what(); + _terminateVideoPacket(spkt, pkt); return; } @@ -288,6 +301,7 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) { if (width == 0 || height == 0) { LOG(ERROR) << "Invalid decoded size: " << surface.cols << "x" << surface.rows << " (" << tx << "," << ty << ")"; + _terminateVideoPacket(spkt, pkt); return; } @@ -295,6 +309,7 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) { if (surface.type() != cvtype) { LOG(ERROR) << "Invalid video format received"; + _terminateVideoPacket(spkt, pkt); return; } @@ -339,7 +354,15 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) { } // Must ensure all processing is finished before completing a frame. - cudaSafeCall(cudaStreamSynchronize(decoder->stream())); + //cudaSafeCall(cudaStreamSynchronize(decoder->stream())); + + cudaSafeCall(cudaEventRecord(decoder->event(), decoder->stream())); + //for (int i=0; i<pkt.frame_count; ++i) { + // cudaSafeCall(cudaStreamWaitEvent(fs->frames[spkt.frame_number+i].stream(), decoder->event(), 0)); + //} + + // For now, always add to frame 0 stream + cudaSafeCall(cudaStreamWaitEvent(fs->frames[0].stream(), decoder->event(), 0)); fs->localTimestamp = spkt.localTimestamp; diff --git a/components/streams/src/renderers/openvr_render.cpp b/components/streams/src/renderers/openvr_render.cpp index b4a190c652ce20c94a735f9c0513477ac2a7db61..a448206e2812f07bb8359dfd040e55b2f9577bec 100644 --- a/components/streams/src/renderers/openvr_render.cpp +++ b/components/streams/src/renderers/openvr_render.cpp @@ -361,8 +361,8 @@ bool OpenVRRender::retrieve(ftl::data::Frame &frame_out) { texture1_.make(width, height, ftl::utility::GLTexture::Type::BGRA); texture2_.make(width, height, ftl::utility::GLTexture::Type::BGRA); - rgbdframe.create<cv::cuda::GpuMat>(Channel::Colour) = texture1_.map(renderer_->getCUDAStream()); - rgbdframe.create<cv::cuda::GpuMat>(Channel::Colour2) = texture2_.map(renderer_->getCUDAStream()); + rgbdframe.create<cv::cuda::GpuMat>(Channel::Colour) = texture1_.map(rgbdframe.stream()); + rgbdframe.create<cv::cuda::GpuMat>(Channel::Colour2) = texture2_.map(rgbdframe.stream()); rgbdframe.create<cv::cuda::GpuMat>(Channel::Depth).create(height, width, CV_32F); rgbdframe.createTexture<float>(Channel::Depth); @@ -521,7 +521,7 @@ bool OpenVRRender::retrieve(ftl::data::Frame &frame_out) { post_pipe_->append<ftl::operators::GTAnalysis>("gtanalyse"); } - post_pipe_->apply(rgbdframe, rgbdframe, renderer_->getCUDAStream()); + post_pipe_->apply(rgbdframe, rgbdframe); if (host_->value("enable_touch", false)) { ftl::render::collision2touch(rgbdframe, renderer_->getCollisions(), sets, my_id_, host_->value("touch_min", 0.01f), host_->value("touch_max", 0.05f)); diff --git a/components/streams/src/renderers/screen_render.cpp b/components/streams/src/renderers/screen_render.cpp index 3bd7d8c927cf93fee9fb3a724c9d0406300df5f2..4556cfaec9505e9924b3d49c0013e2630920a2e3 100644 --- a/components/streams/src/renderers/screen_render.cpp +++ b/components/streams/src/renderers/screen_render.cpp @@ -88,6 +88,7 @@ bool ScreenRender::retrieve(ftl::data::Frame &frame_out) { my_id_ = frame_out.frameset(); auto sets = filter_->getLatestFrameSets(); bool data_only = host_->value("data_only", false); + bool blend_overlay = host_->value("blend_overlay", false); if (sets.size() > 0) { ftl::rgbd::Frame &rgbdframe = frame_out.cast<ftl::rgbd::Frame>(); @@ -140,10 +141,13 @@ bool ScreenRender::retrieve(ftl::data::Frame &frame_out) { pose.setIdentity(); if (s->hasChannel(Channel::Pose)) pose = s->cast<ftl::rgbd::Frame>().getPose(); - if (!data_only) renderer_->submit( - s.get(), - ftl::codecs::Channels<0>(ftl::codecs::Channel::Colour), - pose); + if (!data_only) { + if (blend_overlay) { + renderer_->submit(s.get(), ftl::codecs::Channels<0>(Channel::Colour) + Channel::Overlay, pose); + } else { + renderer_->submit(s.get(), ftl::codecs::Channels<0>(Channel::Colour), pose); + } + } } if (!data_only) renderer_->render(); @@ -221,7 +225,8 @@ bool ScreenRender::retrieve(ftl::data::Frame &frame_out) { post_pipe_->append<ftl::operators::GTAnalysis>("gtanalyse"); } - post_pipe_->apply(rgbdframe, rgbdframe, 0); + post_pipe_->apply(rgbdframe, rgbdframe); + cudaSafeCall(cudaStreamSynchronize(rgbdframe.stream())); if (host_->value("enable_touch", false)) { ftl::render::collision2touch(rgbdframe, renderer_->getCollisions(), sets, my_id_, host_->value("touch_min", 0.01f), host_->value("touch_max", 0.05f)); diff --git a/components/streams/src/sender.cpp b/components/streams/src/sender.cpp index a47e9854a38784802e0b26ee344a20c790b77ac9..b4d9d409323cb7a1e8d66d46c4e0e8458398eaf5 100644 --- a/components/streams/src/sender.cpp +++ b/components/streams/src/sender.cpp @@ -490,19 +490,12 @@ void Sender::_encodeVideoChannel(ftl::data::FrameSet &fs, Channel c, bool reset) int encoder_number = 0; while (offset < fs.frames.size()) { Channel cc = c; - //if ((cc == Channel::Colour) && fs.firstFrame().hasChannel(Channel::ColourHighRes)) { - // cc = Channel::ColourHighRes; - //} - - //if ((cc == Channel::Right) && fs.firstFrame().hasChannel(Channel::RightHighRes)) { - // cc = Channel::RightHighRes; - // fs.frames[offset].upload(cc); - //} - if (!fs.frames[offset].hasChannel(cc)) { - offset++; - continue; - } + // FIXME: Don't change tile layout when channel temporarily drops. + //if (!fs.frames[offset].hasChannel(cc)) { + // offset++; + // continue; + //} StreamPacket spkt; spkt.version = 5; @@ -539,15 +532,12 @@ void Sender::_encodeVideoChannel(ftl::data::FrameSet &fs, Channel c, bool reset) //} } - int count = _generateTiles(fs, offset, cc, enc->stream(), is_stereo); + int count = (fs.frames.size() == 1) ? 1 : _generateTiles(fs, offset, cc, enc->stream(), is_stereo); if (count <= 0) { LOG(ERROR) << "Could not generate tiles."; break; } - //cudaSafeCall(cudaStreamSynchronize(enc->stream())); - //enc->stream().waitForCompletion(); - if (enc) { if (reset) enc->reset(); @@ -562,8 +552,15 @@ void Sender::_encodeVideoChannel(ftl::data::FrameSet &fs, Channel c, bool reset) //if (static_cast<size_t>(fs.count) < fs.frames.size()) pkt.flags |= ftl::codecs::kFlagPartial; // Choose correct region of interest into the surface. - cv::Rect roi = _generateROI(fs, cc, offset, is_stereo); - cv::cuda::GpuMat sroi = tile.surface(roi); + //cv::Rect roi = _generateROI(fs, cc, offset, is_stereo); + cv::cuda::GpuMat sroi; + + if (fs.frames.size() > 1) { + cv::Rect roi = _generateROI(fs, cc, offset, is_stereo); + sroi = tile.surface(roi); + } else { + sroi = fs.frames[0].get<cv::cuda::GpuMat>(cc); + } FTL_Profile("Encoder",0.02); @@ -637,8 +634,6 @@ void Sender::_encodeAudioChannel(ftl::data::FrameSet &fs, Channel c, bool reset) } void Sender::_encodeDataChannel(ftl::data::FrameSet &fs, Channel c, bool reset) { - int i=0; - // TODO: Pack all frames into a single packet for (auto &f : fs.frames) { StreamPacket spkt; @@ -786,7 +781,7 @@ int Sender::_generateTiles(const ftl::rgbd::FrameSet &fs, int offset, Channel c, } else { cv::Rect roi((count % tx)*rwidth, (count / tx)*rheight, rwidth, rheight); cv::cuda::GpuMat sroi = surface.surface(roi); - sroi.setTo(cv::Scalar(0)); + sroi.setTo(cv::Scalar(0), stream); } ++count; diff --git a/components/streams/test/receiver_unit.cpp b/components/streams/test/receiver_unit.cpp index fcdeafc1a819319b35d777498c31b2a7438bad7a..29bf0b241f1f4563202987cc3656e377cf889acd 100644 --- a/components/streams/test/receiver_unit.cpp +++ b/components/streams/test/receiver_unit.cpp @@ -364,7 +364,7 @@ TEST_CASE( "Receiver sync bugs" ) { auto h = receiver->onFrameSet([&count,&ts,&haswrongchan](const ftl::data::FrameSetPtr& fs) { ts = fs->timestamp(); - haswrongchan = fs->frames[0].hasChannel(Channel::ColourHighRes); + haswrongchan = fs->frames[0].hasChannel(Channel::Overlay); ++count; @@ -373,7 +373,7 @@ TEST_CASE( "Receiver sync bugs" ) { try { stream.post(spkt, pkt); } catch(...) {} spkt.timestamp = 10; - spkt.channel = Channel::ColourHighRes; + spkt.channel = Channel::Overlay; try { stream.postEnd(spkt, pkt, 3); } catch(...) {} spkt.timestamp = 20; spkt.channel = Channel::Colour2; diff --git a/components/streams/test/sender_unit.cpp b/components/streams/test/sender_unit.cpp index 753fde010b94071152cc646a90d3f745a2e8b461..11c44ae33bad5ac61bbb01f90a45c6002372868f 100644 --- a/components/streams/test/sender_unit.cpp +++ b/components/streams/test/sender_unit.cpp @@ -210,13 +210,13 @@ TEST_CASE( "Sender::post() video frames" ) { REQUIRE( count == 1 ); REQUIRE( spkt.version == 5 ); REQUIRE( spkt.timestamp == 1000 ); - REQUIRE( (int)spkt.frame_number == 1 ); + REQUIRE( (int)spkt.frame_number == 0 ); REQUIRE( spkt.streamID == 0 ); REQUIRE( spkt.channel == Channel::Depth ); REQUIRE( pkt.codec == codec_t::HEVC ); REQUIRE( pkt.data.size() > 0 ); REQUIRE( pkt.flags == (ftl::codecs::kFlagFloat | ftl::codecs::kFlagMappedDepth) ); - REQUIRE( pkt.frame_count == 3 ); + REQUIRE( pkt.frame_count == 4 ); REQUIRE( ftl::codecs::hevc::validNAL(pkt.data.data(), pkt.data.size()) ); } diff --git a/components/structures/include/ftl/data/new_frame.hpp b/components/structures/include/ftl/data/new_frame.hpp index 94c539086e6a24802cb4ca980345a36987c8be47..ebbceed9061a380e4ade9c5b6b011a163c3887b3 100644 --- a/components/structures/include/ftl/data/new_frame.hpp +++ b/components/structures/include/ftl/data/new_frame.hpp @@ -20,6 +20,8 @@ #include <ftl/handle.hpp> #include <ftl/data/messages.hpp> +#include <cuda_runtime.h> + template<typename T> struct is_list : public std::false_type {}; template<typename T> @@ -630,6 +632,14 @@ class Frame { inline FrameMode mode() const { return mode_; } + // ==== CUDA Functions ===================================================== + + cudaStream_t stream(); + + cudaEvent_t uploadEvent(); + + cudaEvent_t pipeEvent(); + // ==== Wrapper functions ================================================== void message(ftl::data::Message code, const std::string &msg); @@ -681,6 +691,9 @@ class Frame { FrameStatus status_; FrameMode mode_ = FrameMode::PRIMARY; uint64_t available_ = 0; + cudaStream_t stream_=0; + cudaEvent_t upload_event_=0; + cudaEvent_t pipe_event_=0; inline void restart(int64_t ts) { timestamp_ = ts; diff --git a/components/structures/include/ftl/data/new_frameset.hpp b/components/structures/include/ftl/data/new_frameset.hpp index 8dcc4b3b27f332139e5c18b9d42312cc2af9ccba..6eafe9cb59a4c7d0f53033ba8e903c6ed9e55207 100644 --- a/components/structures/include/ftl/data/new_frameset.hpp +++ b/components/structures/include/ftl/data/new_frameset.hpp @@ -134,6 +134,8 @@ class FrameSet : public ftl::data::Frame { */ bool hasAnyChanged(ftl::codecs::Channel) const; + bool anyHasChannel(ftl::codecs::Channel) const; + private: std::atomic<int> flags_; }; diff --git a/components/structures/src/frameset.cpp b/components/structures/src/frameset.cpp index cba77a2136cb6222f6fea9f2d6f77a2debb5eb2a..e8cfd9404535e1fdfe410596fc92cf35a83e39ec 100644 --- a/components/structures/src/frameset.cpp +++ b/components/structures/src/frameset.cpp @@ -78,6 +78,13 @@ bool ftl::data::FrameSet::hasAnyChanged(ftl::codecs::Channel c) const { return false; } +bool ftl::data::FrameSet::anyHasChannel(ftl::codecs::Channel c) const { + for (size_t i=0; i<frames.size(); ++i) { + if (frames[i].hasOwn(c)) return true; + } + return false; +} + void FrameSet::store() { if (status() != ftl::data::FrameStatus::CREATED) throw FTL_Error("Cannot store frameset multiple times"); diff --git a/components/structures/src/new_frame.cpp b/components/structures/src/new_frame.cpp index 576896702e3e524639d59f903341fce1bc3c3916..36a1c7d06cc59ea9a710bb6a0035fdb6951a9c70 100644 --- a/components/structures/src/new_frame.cpp +++ b/components/structures/src/new_frame.cpp @@ -1,6 +1,7 @@ #include <ftl/data/new_frame.hpp> #include <ftl/data/framepool.hpp> #include <ftl/timer.hpp> +#include <ftl/cuda_common.hpp> using ftl::data::Frame; using ftl::data::Session; @@ -84,6 +85,27 @@ Frame::~Frame() { } }; +cudaStream_t Frame::stream() { + if (stream_ == 0) { + cudaSafeCall( cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) ); + } + return stream_; +} + +cudaEvent_t Frame::uploadEvent() { + if (upload_event_ == 0) { + cudaSafeCall( cudaEventCreate(&upload_event_) ); + } + return upload_event_; +} + +cudaEvent_t Frame::pipeEvent() { + if (pipe_event_ == 0) { + cudaSafeCall( cudaEventCreate(&pipe_event_) ); + } + return pipe_event_; +} + bool ftl::data::Frame::hasAll(const std::unordered_set<ftl::codecs::Channel> &cs) { for (auto &a : cs) { if (!has(a)) return false; @@ -346,6 +368,12 @@ void Frame::moveTo(Frame &f) { f.changed_ = std::move(changed_); f.packet_rx = (int)packet_rx; f.packet_tx = (int)packet_tx; + f.stream_ = stream_; + f.upload_event_ = upload_event_; + f.pipe_event_ = pipe_event_; + stream_ = 0; + pipe_event_ = 0; + upload_event_ = 0; status_ = FrameStatus::RELEASED; } diff --git a/lib/libsgm/src/path_aggregation.cu b/lib/libsgm/src/path_aggregation.cu index 16567de556520b10a8fcbb3560bcd667ab1fb7b6..e5019ec3a67ddb1113ce2a7f1a873983359e5fb4 100644 --- a/lib/libsgm/src/path_aggregation.cu +++ b/lib/libsgm/src/path_aggregation.cu @@ -29,6 +29,7 @@ PathAggregation<MAX_DISPARITY>::PathAggregation() cudaStreamCreate(&m_streams[i]); cudaEventCreate(&m_events[i]); } + cudaEventCreate(&m_event); } template <size_t MAX_DISPARITY> @@ -38,6 +39,7 @@ PathAggregation<MAX_DISPARITY>::~PathAggregation(){ cudaStreamDestroy(m_streams[i]); cudaEventDestroy(m_events[i]); } + cudaEventDestroy(m_event); } template <size_t MAX_DISPARITY> @@ -58,7 +60,13 @@ void PathAggregation<MAX_DISPARITY>::enqueue( m_cost_buffer = DeviceBuffer<cost_type>(buffer_size); } const size_t buffer_step = width * height * MAX_DISPARITY; - cudaStreamSynchronize(stream); + //cudaStreamSynchronize(stream); + cudaEventRecord(m_event, stream); + + for(unsigned int i = 0; i < NUM_PATHS; ++i){ + cudaStreamWaitEvent(m_streams[i], m_event, 0); + } + path_aggregation::enqueue_aggregate_up2down_path<MAX_DISPARITY>( m_cost_buffer.data() + 0 * buffer_step, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[0]); diff --git a/lib/libsgm/src/path_aggregation.hpp b/lib/libsgm/src/path_aggregation.hpp index 0b019a3b556fb92969acff843ad7431bc3d57b0e..7df96996da46711f7ddcb76dcbc44132ca9f194c 100644 --- a/lib/libsgm/src/path_aggregation.hpp +++ b/lib/libsgm/src/path_aggregation.hpp @@ -31,6 +31,7 @@ private: DeviceBuffer<cost_type> m_cost_buffer; cudaStream_t m_streams[NUM_PATHS]; cudaEvent_t m_events[NUM_PATHS]; + cudaEvent_t m_event; public: PathAggregation();