diff --git a/applications/vision/src/main.cpp b/applications/vision/src/main.cpp index a8c8d9006ea622cad8463c3ed0c45e986fe2f1c9..4f370c1a0f15bf7e2abb7e5b7ad188d778144f0e 100644 --- a/applications/vision/src/main.cpp +++ b/applications/vision/src/main.cpp @@ -188,11 +188,11 @@ static void run(ftl::Configurable *root) { root->on("quiet", quiet, false); auto *pipeline = ftl::config::create<ftl::operators::Graph>(root, "pipeline"); + pipeline->append<ftl::operators::ArUco>("aruco")->value("enabled", false); pipeline->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false); pipeline->append<ftl::operators::DepthChannel>("depth"); // Ensure there is a depth channel //pipeline->append<ftl::operators::ClipScene>("clipping")->value("enabled", false); pipeline->restore("vision_pipeline", { "clipping" }); - pipeline->append<ftl::operators::ArUco>("aruco")->value("enabled", false); auto h = creator->onFrameSet([sender,outstream,&stats_count,&latency,&frames,&stats_time,pipeline,&encodable,&previous_encodable](const ftl::data::FrameSetPtr &fs) { diff --git a/components/operators/include/ftl/operators/detectandtrack.hpp b/components/operators/include/ftl/operators/detectandtrack.hpp index 2e5ce171359c6a6f6bc5b8d693b64677a602be38..0d8b063d34cc2a9059d6cabe54d6c25484015891 100644 --- a/components/operators/include/ftl/operators/detectandtrack.hpp +++ b/components/operators/include/ftl/operators/detectandtrack.hpp @@ -121,6 +121,7 @@ class ArUco : public ftl::operators::Operator { inline Operator::Type type() const override { return Operator::Type::OneToOne; } bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) override; + virtual void wait(cudaStream_t) override; ftl::codecs::Channel channel_in_; ftl::codecs::Channel channel_out_; @@ -129,6 +130,7 @@ class ArUco : public ftl::operators::Operator { bool estimate_pose_; float marker_size_; cv::Mat tmp_; + std::future<void> job_; cv::Ptr<cv::aruco::Dictionary> dictionary_; cv::Ptr<cv::aruco::DetectorParameters> params_; diff --git a/components/operators/src/aruco.cpp b/components/operators/src/aruco.cpp index 0d287356748e7eebd287e18d3e53639f3e6778d2..17df4d0eb3d89711df2ead4c00545753183d0bba 100644 --- a/components/operators/src/aruco.cpp +++ b/components/operators/src/aruco.cpp @@ -40,17 +40,24 @@ static Eigen::Matrix4d matrix(cv::Vec3d &rvec, cv::Vec3d &tvec) { } ArUco::ArUco(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) { - dictionary_ = cv::aruco::getPredefinedDictionary(cfg->value("dictionary", 0)); + dictionary_ = cv::aruco::getPredefinedDictionary( + cfg->value("dictionary", int(cv::aruco::DICT_4X4_50))); params_ = cv::aruco::DetectorParameters::create(); params_->cornerRefinementMethod = cv::aruco::CORNER_REFINE_CONTOUR; - params_->cornerRefinementMinAccuracy = 0.01; - params_->cornerRefinementMaxIterations = 20; + params_->cornerRefinementMinAccuracy = 0.05; + params_->cornerRefinementMaxIterations = 10; + + // default values 13, 23, 10, for speed just one thresholding window size + params_->adaptiveThreshWinSizeMin = 13; + params_->adaptiveThreshWinSizeMax = 23; + params_->adaptiveThreshWinSizeStep = 10; channel_in_ = Channel::Colour; channel_out_ = Channel::Shapes3D; cfg->on("dictionary", [this,cfg]() { - dictionary_ = cv::aruco::getPredefinedDictionary(cfg->value("dictionary", 0)); + dictionary_ = cv::aruco::getPredefinedDictionary( + cfg->value("dictionary", 0)); }); } @@ -60,42 +67,50 @@ bool ArUco::apply(Frame &in, Frame &out, cudaStream_t) { estimate_pose_ = config()->value("estimate_pose", true); marker_size_ = config()->value("marker_size", 0.1f); - std::vector<Vec3d> rvecs; - std::vector<Vec3d> tvecs; - std::vector<std::vector<cv::Point2f>> corners; - std::vector<int> ids; + job_ = ftl::pool.push([this, &in, &out](int) { + std::vector<Vec3d> rvecs; + std::vector<Vec3d> tvecs; + std::vector<std::vector<cv::Point2f>> corners; + std::vector<int> ids; - { - FTL_Profile("ArUco", 0.02); - cv::cvtColor(in.get<cv::Mat>(channel_in_), tmp_, cv::COLOR_BGRA2GRAY); + { + FTL_Profile("ArUco", 0.02); + cv::cvtColor(in.get<cv::Mat>(channel_in_), tmp_, cv::COLOR_BGRA2GRAY); - const Mat K = in.getLeftCamera().getCameraMatrix(); - const Mat dist; + const Mat K = in.getLeftCamera().getCameraMatrix(tmp_.size()); + const Mat dist; - cv::aruco::detectMarkers(tmp_, dictionary_, - corners, ids, params_, cv::noArray(), K, dist); + cv::aruco::detectMarkers(tmp_, dictionary_, + corners, ids, params_, cv::noArray(), K, dist); - if (estimate_pose_) { - cv::aruco::estimatePoseSingleMarkers(corners, marker_size_, K, dist, rvecs, tvecs); + if (estimate_pose_) { + cv::aruco::estimatePoseSingleMarkers(corners, marker_size_, K, dist, rvecs, tvecs); + } } - } - list<Shape3D> result; - if (out.hasChannel(channel_out_)) { - result = out.get<list<Shape3D>>(channel_out_); - } + list<Shape3D> result; + if (out.hasChannel(channel_out_)) { + result = out.get<list<Shape3D>>(channel_out_); + } - for (size_t i = 0; i < rvecs.size(); i++) { - if (estimate_pose_) { - auto &t = result.emplace_back(); - t.id = ids[i]; - t.type = ftl::codecs::Shape3DType::ARUCO; - t.pose = (in.getPose() * matrix(rvecs[i], tvecs[i])).cast<float>(); - t.size = Eigen::Vector3f(1.0f, 1.0f, 0.0f)*marker_size_; - t.label = "Aruco-" + std::to_string(ids[i]); + for (size_t i = 0; i < rvecs.size(); i++) { + if (estimate_pose_) { + auto &t = result.emplace_back(); + t.id = ids[i]; + t.type = ftl::codecs::Shape3DType::ARUCO; + t.pose = (in.getPose() * matrix(rvecs[i], tvecs[i])).cast<float>(); + t.size = Eigen::Vector3f(1.0f, 1.0f, 0.0f)*marker_size_; + t.label = "Aruco-" + std::to_string(ids[i]); + } } - } - out.create<list<Shape3D>>(channel_out_).list = result; + out.create<list<Shape3D>>(channel_out_).list = result; + }); return true; } + +void ArUco::wait(cudaStream_t) { + if (job_.valid()) { + job_.wait(); + } +} \ No newline at end of file diff --git a/components/rgbd-sources/include/ftl/rgbd/camera.hpp b/components/rgbd-sources/include/ftl/rgbd/camera.hpp index 3698a32ca04e0c6856df3b1f731c3725ba23b69e..1b47187e981f3d95a23e4506746a9099d8fa351f 100644 --- a/components/rgbd-sources/include/ftl/rgbd/camera.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/camera.hpp @@ -64,14 +64,14 @@ struct __align__(16) Camera { /** * Convert screen plus depth into camera coordinates. */ - __host__ __device__ float3 screenToCam(int ux, int uy, float depth) const; + __host__ __device__ float3 screenToCam(int ux, int uy, float depth) const; //Eigen::Vector4f eigenScreenToCam(int ux, int uy, float depth) const; /** * Convert screen plus depth into camera coordinates. */ - __host__ __device__ float3 screenToCam(uint ux, uint uy, float depth) const; + __host__ __device__ float3 screenToCam(uint ux, uint uy, float depth) const; /** * Convert screen plus depth into camera coordinates. @@ -86,8 +86,8 @@ struct __align__(16) Camera { * Make a camera struct from a configurable. */ static Camera from(ftl::Configurable*); - - cv::Mat getCameraMatrix() const; + + cv::Mat getCameraMatrix(const cv::Size& sz={0, 0}) const; #endif }; @@ -104,18 +104,18 @@ inline float3 ftl::rgbd::Camera::project<ftl::rgbd::Projection::EQUIRECTANGULAR> //inverse formula for spherical projection, reference Szeliski book "Computer Vision: Algorithms and Applications" p439. const float theta = atan2(ray3d.y,sqrt(ray3d.x*ray3d.x+ray3d.z*ray3d.z)); const float phi = atan2(ray3d.x, ray3d.z); - + const float pi = 3.14159265f; //get 2D point on equirectangular map - float x_sphere = (((phi*width)/pi+width)/2.0f); + float x_sphere = (((phi*width)/pi+width)/2.0f); float y_sphere = (theta+ pi/2.0f)*height/pi; return make_float3(x_sphere,y_sphere, l); }; template <> __device__ __host__ -inline float3 ftl::rgbd::Camera::unproject<ftl::rgbd::Projection::EQUIRECTANGULAR>(const float3 &equi) const { +inline float3 ftl::rgbd::Camera::unproject<ftl::rgbd::Projection::EQUIRECTANGULAR>(const float3 &equi) const { const float pi = 3.14159265f; float phi = (equi.x * 2.0f - float(width)) * pi / float(width); @@ -131,7 +131,7 @@ inline float3 ftl::rgbd::Camera::unproject<ftl::rgbd::Projection::EQUIRECTANGULA template <> __device__ __host__ inline float3 ftl::rgbd::Camera::project<ftl::rgbd::Projection::PERSPECTIVE>(const float3 &pos) const { return make_float3( - static_cast<float>(pos.x*fx/pos.z - cx), + static_cast<float>(pos.x*fx/pos.z - cx), static_cast<float>(pos.y*fy/pos.z - cy), pos.z ); @@ -147,7 +147,7 @@ inline float3 ftl::rgbd::Camera::unproject<ftl::rgbd::Projection::PERSPECTIVE>(c template <> __device__ __host__ inline float3 ftl::rgbd::Camera::project<ftl::rgbd::Projection::ORTHOGRAPHIC>(const float3 &pos) const { return make_float3( - static_cast<float>(pos.x*fx - cx), + static_cast<float>(pos.x*fx - cx), static_cast<float>(pos.y*fy - cy), pos.z ); @@ -163,7 +163,7 @@ inline float3 ftl::rgbd::Camera::unproject<ftl::rgbd::Projection::ORTHOGRAPHIC>( template <> __device__ __host__ inline float2 ftl::rgbd::Camera::camToScreen<float2>(const float3 &pos) const { return make_float2( - static_cast<float>(pos.x*fx/pos.z - cx), + static_cast<float>(pos.x*fx/pos.z - cx), static_cast<float>(pos.y*fy/pos.z - cy) ); } diff --git a/components/rgbd-sources/src/camera.cpp b/components/rgbd-sources/src/camera.cpp index a4f540ffdbdd7a57fc187d3ad584d08d5e70cb34..ef4c084cc8754fcef287589a95ac7c4250a1f7e1 100644 --- a/components/rgbd-sources/src/camera.cpp +++ b/components/rgbd-sources/src/camera.cpp @@ -35,13 +35,18 @@ Camera Camera::from(ftl::Configurable *cfg) { return r; } -cv::Mat Camera::getCameraMatrix() const { - cv::Mat K = cv::Mat::eye(cv::Size(3, 3), CV_64FC1); - K.at<double>(0,0) = fx; - K.at<double>(0,2) = -cx; - K.at<double>(1,1) = fy; - K.at<double>(1,2) = -cy; - return K; +cv::Mat Camera::getCameraMatrix(const cv::Size& sz) const { + if (sz == cv::Size{0, 0}) { + cv::Mat K = cv::Mat::eye(cv::Size(3, 3), CV_64FC1); + K.at<double>(0,0) = fx; + K.at<double>(0,2) = -cx; + K.at<double>(1,1) = fy; + K.at<double>(1,2) = -cy; + return K; + } + else { + return scaled(sz.width, sz.height).getCameraMatrix(); + } } /* diff --git a/components/rgbd-sources/src/sources/stereovideo/rectification.cpp b/components/rgbd-sources/src/sources/stereovideo/rectification.cpp index 39e91193c8f46b6ab2bcd31191e9d78338677b08..6e1ef7df3d1384fbc7cefd5ca218b9a8d7c7f4c6 100644 --- a/components/rgbd-sources/src/sources/stereovideo/rectification.cpp +++ b/components/rgbd-sources/src/sources/stereovideo/rectification.cpp @@ -196,10 +196,10 @@ cv::Mat StereoRectification::cameraMatrix(Channel c) { } else { if (c == Channel::Left) { - return calib_left_.intrinsic.matrix(); + return CalibrationData::Intrinsic(calib_left_.intrinsic, image_resolution_).matrix(); } else if (c == Channel::Right) { - return calib_right_.intrinsic.matrix(); + return CalibrationData::Intrinsic(calib_right_.intrinsic, image_resolution_).matrix(); } } throw ftl::exception("Invalid channel, expected Left or Right"); diff --git a/components/streams/src/renderers/openvr_render.cpp b/components/streams/src/renderers/openvr_render.cpp index a448206e2812f07bb8359dfd040e55b2f9577bec..5c299083ebb2a8d421e674ac31b725014e80e117 100644 --- a/components/streams/src/renderers/openvr_render.cpp +++ b/components/streams/src/renderers/openvr_render.cpp @@ -329,6 +329,11 @@ bool OpenVRRender::retrieve(ftl::data::Frame &frame_out) { if (headset_origin.size() > 0) { ftl::operators::Poser::get(headset_origin, horigin); + double headset_offset = host_->value("headset_offset_z", 0.0); + // move z-axis by offset + Eigen::Vector3d offset = + horigin.block<3, 3>(0, 0)*Eigen::Vector3d(0.0, 0.0, headset_offset); + horigin.block<3, 1>(0, 3) -= offset; } Eigen::Matrix4d new_pose = horigin*viewPose.inverse();