From 01a3de22e00acca4eedccf791bdb4bccdad17b21 Mon Sep 17 00:00:00 2001 From: Nicolas Pope <nicolas.pope@utu.fi> Date: Thu, 6 Feb 2020 13:40:47 +0200 Subject: [PATCH] Window virtual camera effect --- applications/gui/src/camera.cpp | 286 ++++++++++-------- applications/gui/src/overlay.cpp | 63 +++- applications/gui/src/overlay.hpp | 8 + applications/gui/src/statsimage.cpp | 72 +++++ applications/gui/src/statsimage.hpp | 47 +++ .../codecs/include/ftl/codecs/codecs.hpp | 1 + .../codecs/include/ftl/codecs/faces.hpp | 25 ++ .../include/ftl/codecs/transformation.hpp | 4 +- components/codecs/src/bitrates.cpp | 1 + .../include/ftl/operators/detectandtrack.hpp | 7 +- components/operators/src/detectandtrack.cpp | 85 +++++- .../sources/realsense/realsense_source.cpp | 44 ++- .../src/sources/stereovideo/local.cpp | 44 ++- 13 files changed, 504 insertions(+), 183 deletions(-) create mode 100644 applications/gui/src/statsimage.cpp create mode 100644 applications/gui/src/statsimage.hpp create mode 100644 components/codecs/include/ftl/codecs/faces.hpp diff --git a/applications/gui/src/camera.cpp b/applications/gui/src/camera.cpp index f7aa5d74e..a3fee9a9f 100644 --- a/applications/gui/src/camera.cpp +++ b/applications/gui/src/camera.cpp @@ -8,7 +8,10 @@ #include <ftl/operators/antialiasing.hpp> +#include <ftl/codecs/faces.hpp> + #include "overlay.hpp" +#include "statsimage.hpp" #define LOGURU_REPLACE_GLOG 1 #include <loguru.hpp> @@ -26,110 +29,8 @@ using ftl::codecs::Channel; using ftl::codecs::Channels; using cv::cuda::GpuMat; -// TODO(Nick) MOVE -class StatisticsImage { -private: - cv::Mat data_; // CV_32FC3, channels: m, s, f - cv::Size size_; // image size - float n_; // total number of samples - -public: - explicit StatisticsImage(cv::Size size); - StatisticsImage(cv::Size size, float max_f); - - /* @brief reset all statistics to 0 - */ - void reset(); - - /* @brief update statistics with new values - */ - void update(const cv::Mat &in); - - /* @brief variance (depth) - */ - void getVariance(cv::Mat &out); - - /* @brief standard deviation (depth) - */ - void getStdDev(cv::Mat &out); - - /* @brief mean value (depth) - */ - void getMean(cv::Mat &out); - - /* @brief percent of samples having valid depth value - */ - void getValidRatio(cv::Mat &out); -}; - -StatisticsImage::StatisticsImage(cv::Size size) : - StatisticsImage(size, std::numeric_limits<float>::infinity()) {} - -StatisticsImage::StatisticsImage(cv::Size size, float max_f) { - size_ = size; - n_ = 0.0f; - data_ = cv::Mat(size, CV_32FC3, cv::Scalar(0.0, 0.0, 0.0)); - - // TODO - if (!std::isinf(max_f)) { - LOG(WARNING) << "TODO: max_f_ not used. Values calculated for all samples"; - } -} - -void StatisticsImage::reset() { - n_ = 0.0f; - data_ = cv::Scalar(0.0, 0.0, 0.0); -} - -void StatisticsImage::update(const cv::Mat &in) { - DCHECK(in.type() == CV_32F); - DCHECK(in.size() == size_); - - n_ = n_ + 1.0f; - - // Welford's Method - for (int row = 0; row < in.rows; row++) { - float* ptr_data = data_.ptr<float>(row); - const float* ptr_in = in.ptr<float>(row); - - for (int col = 0; col < in.cols; col++, ptr_in++) { - float x = *ptr_in; - float &m = *ptr_data++; - float &s = *ptr_data++; - float &f = *ptr_data++; - float m_prev = m; - - if (!ftl::rgbd::isValidDepth(x)) continue; - - f = f + 1.0f; - m = m + (x - m) / f; - s = s + (x - m) * (x - m_prev); - } - } -} -void StatisticsImage::getVariance(cv::Mat &out) { - std::vector<cv::Mat> channels(3); - cv::split(data_, channels); - cv::divide(channels[1], channels[2], out); -} - -void StatisticsImage::getStdDev(cv::Mat &out) { - getVariance(out); - cv::sqrt(out, out); -} - -void StatisticsImage::getMean(cv::Mat &out) { - std::vector<cv::Mat> channels(3); - cv::split(data_, channels); - out = channels[0]; -} - -void StatisticsImage::getValidRatio(cv::Mat &out) { - std::vector<cv::Mat> channels(3); - cv::split(data_, channels); - cv::divide(channels[2], n_, out); -} +static int vcamcount = 0; static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) { Eigen::Affine3d rx = @@ -141,8 +42,6 @@ static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) { return rz * rx * ry; } -static int vcamcount = 0; - ftl::gui::Camera::Camera(ftl::gui::Screen *screen, int fsmask, int fid, ftl::codecs::Channel c) : screen_(screen), fsmask_(fsmask), fid_(fid), channel_(c),channels_(0u) { eye_ = Eigen::Vector3d(0.0f, 0.0f, 0.0f); neye_ = Eigen::Vector4d(0.0f, 0.0f, 0.0f, 0.0f); @@ -218,11 +117,103 @@ ftl::gui::Camera::Camera(ftl::gui::Screen *screen, int fsmask, int fid, ftl::cod } } +/*template<class T> +static Eigen::Matrix<T,4,4> lookAt +( + Eigen::Matrix<T,3,1> const & eye, + Eigen::Matrix<T,3,1> const & center, + Eigen::Matrix<T,3,1> const & up +) +{ + typedef Eigen::Matrix<T,4,4> Matrix4; + typedef Eigen::Matrix<T,3,1> Vector3; + + Vector3 f = (center - eye).normalized(); + Vector3 u = up.normalized(); + Vector3 s = f.cross(u).normalized(); + u = s.cross(f); + + Matrix4 res; + res << s.x(),s.y(),s.z(),-s.dot(eye), + u.x(),u.y(),u.z(),-u.dot(eye), + -f.x(),-f.y(),-f.z(),f.dot(eye), + 0,0,0,1; + + return res; +}*/ + +/*static float4 screenProjection( + const Eigen::Vector3f &pa, // Screen corner 1 + const Eigen::Vector3f &pb, // Screen corner 2 + const Eigen::Vector3f &pc, // Screen corner 3 + const Eigen::Vector3f &pe // Eye position + ) { + + Eigen::Vector3f va, vb, vc; + Eigen::Vector3f vr, vu, vn; + + float l, r, b, t, d; + + // Compute an orthonormal basis for the screen. + + //subtract(vr, pb, pa); + //subtract(vu, pc, pa); + vr = pb - pa; + vu = pc - pa; + + //normalize(vr); + //normalize(vu); + //cross_product(vn, vr, vu); + //normalize(vn); + vr.normalize(); + vu.normalize(); + vn = vr.cross(vu); + vn.normalize(); + + // Compute the screen corner vectors. + + //subtract(va, pa, pe); + //subtract(vb, pb, pe); + //subtract(vc, pc, pe); + va = pa - pe; + vb = pb - pe; + vc = pc - pe; + + // Find the distance from the eye to screen plane. + + //d = -dot_product(va, vn); + d = -va.dot(vn); + + // Find the extent of the perpendicular projection. + + //l = dot_product(vr, va) * n / d; + //r = dot_product(vr, vb) * n / d; + //b = dot_product(vu, va) * n / d; + //t = dot_product(vu, vc) * n / d; + + float n = d; + l = vr.dot(va) * n / d; + r = vr.dot(vb) * n / d; + b = vu.dot(va) * n / d; + t = vu.dot(vc) * n / d; + + //return nanogui::frustum(l,r,b,t,n,f); + return make_float4(l,r,b,t); +}*/ + ftl::gui::Camera::~Camera() { //delete writer_; //delete fileout_; } +static Eigen::Vector3f cudaToEigen(const float3 &v) { + Eigen::Vector3f e; + e[0] = v.x; + e[1] = v.y; + e[2] = v.z; + return e; +} + void ftl::gui::Camera::draw(std::vector<ftl::rgbd::FrameSet*> &fss) { if (fid_ != 255) return; //if (fsid_ >= fss.size()) return; @@ -234,28 +225,65 @@ void ftl::gui::Camera::draw(std::vector<ftl::rgbd::FrameSet*> &fss) { //state_.getLeft().fy = state_.getLeft().fx; _draw(fss); - for (auto *fset : fss) { - for (const auto &f : fset->frames) { - if (f.hasChannel(Channel::Data)) { - std::vector<cv::Rect2d> data; - f.get(Channel::Data, data); - - for (auto &d : data) { - cv::Mat over_depth; - over_depth.create(im1_.size(), CV_32F); - - Eigen::Matrix4d fakepose = Eigen::Matrix4d::Identity().inverse() * state_.getPose(); - ftl::rgbd::Camera fakecam; - fakecam.width = 1280; - fakecam.height = 720; - fakecam.fx = 700.0; - fakecam.cx = -d.x; - fakecam.cy = -(720.0-d.y); - - state_.getLeft().cx = -d.x; - state_.getLeft().cy = -(state_.getLeft().height-d.y); - - ftl::overlay::drawCamera(state_.getLeft(), im1_, over_depth, fakecam, fakepose, cv::Scalar(0,0,255,255), 0.2,screen_->root()->value("show_frustrum", false)); + if (renderer_->value("window_effect", false)) { + for (auto *fset : fss) { + for (const auto &f : fset->frames) { + if (f.hasChannel(Channel::Faces)) { + std::vector<ftl::codecs::Face> data; + f.get(Channel::Faces, data); + + if (data.size() > 0) { + auto &d = *data.rbegin(); + + //cv::Mat over_depth; + //over_depth.create(im1_.size(), CV_32F); + + auto cam = ftl::rgbd::Camera::from(intrinsics_); + + float screenWidth = intrinsics_->value("screen_size", 1.2f); // In meters + float screenHeight = (9.0f/16.0f) * screenWidth; + + //Eigen::Vector3f pc(-screenWidth/2.0f,-screenHeight/2.0f,0.0); + Eigen::Vector3f pa(-screenWidth/2.0f,screenHeight/2.0f,0.0); + //Eigen::Vector3f pb(screenWidth/2.0f,screenHeight/2.0f,0.0); + + float screenDistance = (d.depth > cam.minDepth && d.depth < cam.maxDepth) ? d.depth : intrinsics_->value("screen_dist_default", 1.2f); // Face distance from screen in meters + + auto pos = f.getLeft().screenToCam(d.box.x+(d.box.width/2), d.box.y+(d.box.height/2), -screenDistance); + Eigen::Vector3f eye; + eye[0] = pos.x; + eye[1] = -pos.y; + eye[2] = pos.z; + + Eigen::Translation3f trans(eye); + Eigen::Affine3f t(trans); + Eigen::Matrix4f viewPose = t.matrix(); + + // Top Left norm dist + Eigen::Vector3f tl = (pa - eye); + Eigen::Vector3f princ = tl; + + Eigen::Matrix4d windowPose; + windowPose.setIdentity(); + + Eigen::Matrix4d fakepose = viewPose.cast<double>().inverse() * state_.getPose(); + ftl::rgbd::Camera fakecam = cam; + fakecam.fx = screenDistance; //eye.norm(); + fakecam.fy = fakecam.fx; + fakecam.cx = (princ[0]) * fakecam.fx / princ[2] / screenWidth * cam.width; + fakecam.cy = ((princ[1]) * fakecam.fy / princ[2] / screenHeight * cam.height) - cam.height; + fakecam.fx = fakecam.fx / screenWidth * cam.width; + fakecam.fy = fakecam.fy / screenHeight * cam.height; + + // Use face tracked window pose for virtual camera + state_.getLeft() = fakecam; + transform_ix_ = fset->id; // Disable keyboard/mouse pose setting + state_.setPose(transforms_[transform_ix_] * viewPose.cast<double>()); + + //ftl::overlay::draw3DLine(state_.getLeft(), im1_, over_depth, state_.getPose().inverse() * Eigen::Vector4d(eye[0],eye[1],eye[2],1), state_.getPose().inverse() * Eigen::Vector4d(0,0,0,1), cv::Scalar(0,0,255,0)); + //ftl::overlay::drawRectangle(state_.getLeft(), im1_, over_depth, windowPose.inverse() * state_.getPose(), cv::Scalar(255,0,0,0), screenWidth, screenHeight); + //ftl::overlay::drawCamera(state_.getLeft(), im1_, over_depth, fakecam, fakepose, cv::Scalar(255,0,255,255), 1.0,screen_->root()->value("show_frustrum", false)); + } } } } @@ -646,9 +674,9 @@ const GLTexture &ftl::gui::Camera::captureFrame() { Eigen::Matrix4d viewPose = t.matrix() * rotmat_; if (isVirtual()) { - if (transform_ix_ < 0) { + if (transform_ix_ == -1) { state_.setPose(viewPose); - } else { + } else if (transform_ix_ >= 0) { transforms_[transform_ix_] = viewPose; } } diff --git a/applications/gui/src/overlay.cpp b/applications/gui/src/overlay.cpp index f5460febc..341f2daf6 100644 --- a/applications/gui/src/overlay.cpp +++ b/applications/gui/src/overlay.cpp @@ -73,6 +73,35 @@ void ftl::overlay::drawPoseBox( draw3DLine(cam, colour, depth, p011, p111, linecolour); } +void ftl::overlay::drawRectangle( + const ftl::rgbd::Camera &cam, + cv::Mat &colour, + cv::Mat &depth, + const Eigen::Matrix4d &pose, + const cv::Scalar &linecolour, + double width, double height) { + + double width2 = width/2.0; + double height2 = height/2.0; + + Eigen::Vector4d p001 = pose.inverse() * Eigen::Vector4d(width2,height2,0,1); + Eigen::Vector4d p011 = pose.inverse() * Eigen::Vector4d(width2,-height2,0,1); + Eigen::Vector4d p111 = pose.inverse() * Eigen::Vector4d(-width2,-height2,0,1); + Eigen::Vector4d p101 = pose.inverse() * Eigen::Vector4d(-width2,height2,0,1); + + p001 /= p001[3]; + p011 /= p011[3]; + p111 /= p111[3]; + p101 /= p101[3]; + + if (p001[2] < 0.1 || p011[2] < 0.1 || p111[2] < 0.1 || p101[2] < 0.1) return; + + draw3DLine(cam, colour, depth, p001, p011, linecolour); + draw3DLine(cam, colour, depth, p001, p101, linecolour); + draw3DLine(cam, colour, depth, p101, p111, linecolour); + draw3DLine(cam, colour, depth, p011, p111, linecolour); +} + void ftl::overlay::drawPoseCone( const ftl::rgbd::Camera &cam, cv::Mat &colour, @@ -130,11 +159,15 @@ void ftl::overlay::drawCamera( double principx = (((static_cast<double>(params.width) / 2.0) + params.cx) / static_cast<double>(params.fx)) * scale; double principy = (((static_cast<double>(params.height) / 2.0) + params.cy) / static_cast<double>(params.fx)) * scale; - Eigen::Vector4d p110 = pose.inverse() * Eigen::Vector4d(-width2,-height2,scale,1); - Eigen::Vector4d p100 = pose.inverse() * Eigen::Vector4d(-width2,height2,scale,1); - Eigen::Vector4d p010 = pose.inverse() * Eigen::Vector4d(width2,-height2,scale,1); - Eigen::Vector4d p000 = pose.inverse() * Eigen::Vector4d(width2,height2,scale,1); - Eigen::Vector4d origin = pose.inverse() * Eigen::Vector4d(principx,principy,0,1); + auto ptcoord = params.screenToCam(0,0,scale); + Eigen::Vector4d p110 = pose.inverse() * Eigen::Vector4d(ptcoord.x,ptcoord.y,ptcoord.z,1); + ptcoord = params.screenToCam(0,params.height,scale); + Eigen::Vector4d p100 = pose.inverse() * Eigen::Vector4d(ptcoord.x,ptcoord.y,ptcoord.z,1); + ptcoord = params.screenToCam(params.width,0,scale); + Eigen::Vector4d p010 = pose.inverse() * Eigen::Vector4d(ptcoord.x,ptcoord.y,ptcoord.z,1); + ptcoord = params.screenToCam(params.width,params.height,scale); + Eigen::Vector4d p000 = pose.inverse() * Eigen::Vector4d(ptcoord.x,ptcoord.y,ptcoord.z,1); + Eigen::Vector4d origin = pose.inverse() * Eigen::Vector4d(0,0,0,1); p110 /= p110[3]; p100 /= p100[3]; @@ -158,10 +191,22 @@ void ftl::overlay::drawCamera( if (frustrum) { const double fscale = 16.0; - Eigen::Vector4d f110 = pose.inverse() * Eigen::Vector4d(-width2*fscale,-height2*fscale,scale*fscale,1); - Eigen::Vector4d f100 = pose.inverse() * Eigen::Vector4d(-width2*fscale,height2*fscale,scale*fscale,1); - Eigen::Vector4d f010 = pose.inverse() * Eigen::Vector4d(width2*fscale,-height2*fscale,scale*fscale,1); - Eigen::Vector4d f000 = pose.inverse() * Eigen::Vector4d(width2*fscale,height2*fscale,scale*fscale,1); + ptcoord = params.screenToCam(0,0,fscale); + Eigen::Vector4d f110 = pose.inverse() * Eigen::Vector4d(ptcoord.x,ptcoord.y,ptcoord.z,1); + ptcoord = params.screenToCam(0,params.height,fscale); + Eigen::Vector4d f100 = pose.inverse() * Eigen::Vector4d(ptcoord.x,ptcoord.y,ptcoord.z,1); + ptcoord = params.screenToCam(params.width,0,fscale); + Eigen::Vector4d f010 = pose.inverse() * Eigen::Vector4d(ptcoord.x,ptcoord.y,ptcoord.z,1); + ptcoord = params.screenToCam(params.width,params.height,fscale); + Eigen::Vector4d f000 = pose.inverse() * Eigen::Vector4d(ptcoord.x,ptcoord.y,ptcoord.z,1); + + f110 /= f110[3]; + f100 /= f100[3]; + f010 /= f010[3]; + f000 /= f000[3]; + + if (f110[2] < 0.1 || f100[2] < 0.1 || f010[2] < 0.1 || f000[2] < 0.1) return; + draw3DLine(vcam, colour, depth, f000, p000, cv::Scalar(0,255,0,0)); draw3DLine(vcam, colour, depth, f010, p010, cv::Scalar(0,255,0,0)); draw3DLine(vcam, colour, depth, f100, p100, cv::Scalar(0,255,0,0)); diff --git a/applications/gui/src/overlay.hpp b/applications/gui/src/overlay.hpp index d9ce66572..9aff7628e 100644 --- a/applications/gui/src/overlay.hpp +++ b/applications/gui/src/overlay.hpp @@ -36,6 +36,14 @@ void drawPoseBox( const cv::Scalar &linecolour, double size); +void drawRectangle( + const ftl::rgbd::Camera &cam, + cv::Mat &colour, + cv::Mat &depth, + const Eigen::Matrix4d &pose, + const cv::Scalar &linecolour, + double width, double height); + void drawPoseCone( const ftl::rgbd::Camera &cam, cv::Mat &colour, diff --git a/applications/gui/src/statsimage.cpp b/applications/gui/src/statsimage.cpp new file mode 100644 index 000000000..dc8179fdc --- /dev/null +++ b/applications/gui/src/statsimage.cpp @@ -0,0 +1,72 @@ +#include "statsimage.hpp" + +using ftl::gui::StatisticsImage; + +StatisticsImage::StatisticsImage(cv::Size size) : + StatisticsImage(size, std::numeric_limits<float>::infinity()) {} + +StatisticsImage::StatisticsImage(cv::Size size, float max_f) { + size_ = size; + n_ = 0.0f; + data_ = cv::Mat(size, CV_32FC3, cv::Scalar(0.0, 0.0, 0.0)); + + // TODO + if (!std::isinf(max_f)) { + LOG(WARNING) << "TODO: max_f_ not used. Values calculated for all samples"; + } +} + +void StatisticsImage::reset() { + n_ = 0.0f; + data_ = cv::Scalar(0.0, 0.0, 0.0); +} + +void StatisticsImage::update(const cv::Mat &in) { + DCHECK(in.type() == CV_32F); + DCHECK(in.size() == size_); + + n_ = n_ + 1.0f; + + // Welford's Method + for (int row = 0; row < in.rows; row++) { + float* ptr_data = data_.ptr<float>(row); + const float* ptr_in = in.ptr<float>(row); + + for (int col = 0; col < in.cols; col++, ptr_in++) { + float x = *ptr_in; + float &m = *ptr_data++; + float &s = *ptr_data++; + float &f = *ptr_data++; + float m_prev = m; + + if (!ftl::rgbd::isValidDepth(x)) continue; + + f = f + 1.0f; + m = m + (x - m) / f; + s = s + (x - m) * (x - m_prev); + } + } +} + +void StatisticsImage::getVariance(cv::Mat &out) { + std::vector<cv::Mat> channels(3); + cv::split(data_, channels); + cv::divide(channels[1], channels[2], out); +} + +void StatisticsImage::getStdDev(cv::Mat &out) { + getVariance(out); + cv::sqrt(out, out); +} + +void StatisticsImage::getMean(cv::Mat &out) { + std::vector<cv::Mat> channels(3); + cv::split(data_, channels); + out = channels[0]; +} + +void StatisticsImage::getValidRatio(cv::Mat &out) { + std::vector<cv::Mat> channels(3); + cv::split(data_, channels); + cv::divide(channels[2], n_, out); +} diff --git a/applications/gui/src/statsimage.hpp b/applications/gui/src/statsimage.hpp new file mode 100644 index 000000000..c796bfb74 --- /dev/null +++ b/applications/gui/src/statsimage.hpp @@ -0,0 +1,47 @@ +#ifndef _FTL_GUI_STATISTICSIMAGE_HPP_ +#define _FTL_GUI_STATISTICSIMAGE_HPP_ + +#include <opencv2/core/mat.hpp> + +namespace ftl { +namespace gui { + +class StatisticsImage { +private: + cv::Mat data_; // CV_32FC3, channels: m, s, f + cv::Size size_; // image size + float n_; // total number of samples + +public: + explicit StatisticsImage(cv::Size size); + StatisticsImage(cv::Size size, float max_f); + + /* @brief reset all statistics to 0 + */ + void reset(); + + /* @brief update statistics with new values + */ + void update(const cv::Mat &in); + + /* @brief variance (depth) + */ + void getVariance(cv::Mat &out); + + /* @brief standard deviation (depth) + */ + void getStdDev(cv::Mat &out); + + /* @brief mean value (depth) + */ + void getMean(cv::Mat &out); + + /* @brief percent of samples having valid depth value + */ + void getValidRatio(cv::Mat &out); +}; + +} +} + +#endif // _FTL_GUI_STATISTICSIMAGE_HPP_ \ No newline at end of file diff --git a/components/codecs/include/ftl/codecs/codecs.hpp b/components/codecs/include/ftl/codecs/codecs.hpp index 49097ba92..58002ac42 100644 --- a/components/codecs/include/ftl/codecs/codecs.hpp +++ b/components/codecs/include/ftl/codecs/codecs.hpp @@ -59,6 +59,7 @@ enum struct definition_t : uint8_t { Any = 7, HTC_VIVE = 8, + OLD_SKOOL = 9, // TODO: Add audio definitions diff --git a/components/codecs/include/ftl/codecs/faces.hpp b/components/codecs/include/ftl/codecs/faces.hpp new file mode 100644 index 000000000..5f17e3bad --- /dev/null +++ b/components/codecs/include/ftl/codecs/faces.hpp @@ -0,0 +1,25 @@ +#ifndef _FTL_CODECS_FACE_HPP_ +#define _FTL_CODECS_FACE_HPP_ + +#include <opencv2/core/mat.hpp> + +#include <ftl/utility/msgpack.hpp> + +namespace ftl { +namespace codecs { +struct Face { + Face() {}; + //Face(const int &id, const cv::Vec3d &rvec, const cv::Vec3d &tvec) : + // id(id), rvec(rvec), tvec(tvec) {} + + int id; + cv::Rect2d box; + float depth; + + MSGPACK_DEFINE_ARRAY(id, box, depth); +}; + +} +} + +#endif diff --git a/components/codecs/include/ftl/codecs/transformation.hpp b/components/codecs/include/ftl/codecs/transformation.hpp index b1ff85374..dfc8ef099 100644 --- a/components/codecs/include/ftl/codecs/transformation.hpp +++ b/components/codecs/include/ftl/codecs/transformation.hpp @@ -10,8 +10,8 @@ namespace ftl { namespace codecs { struct Transformation { - explicit Transformation() {}; - explicit Transformation(const int &id, const cv::Vec3d &rvec, const cv::Vec3d &tvec) : + Transformation() {}; + Transformation(const int &id, const cv::Vec3d &rvec, const cv::Vec3d &tvec) : id(id), rvec(rvec), tvec(tvec) {} int id; diff --git a/components/codecs/src/bitrates.cpp b/components/codecs/src/bitrates.cpp index 4d5fe63f3..b72b437d9 100644 --- a/components/codecs/src/bitrates.cpp +++ b/components/codecs/src/bitrates.cpp @@ -23,6 +23,7 @@ static const Resolution resolutions[] = { 640, 360, // LD360 0, 0, // ANY 1852, 2056, // HTC_VIVE + 640, 480, // Old school 4:3 0, 0 }; diff --git a/components/operators/include/ftl/operators/detectandtrack.hpp b/components/operators/include/ftl/operators/detectandtrack.hpp index 76d478730..f6c5c869f 100644 --- a/components/operators/include/ftl/operators/detectandtrack.hpp +++ b/components/operators/include/ftl/operators/detectandtrack.hpp @@ -6,6 +6,8 @@ #include <opencv2/tracking.hpp> #include <opencv2/aruco.hpp> +#include <ftl/threads.hpp> + namespace ftl { namespace operators { @@ -54,6 +56,9 @@ class DetectAndTrack : public ftl::operators::Operator { private: std::future<bool> job_; + std::future<bool> detect_job_; + bool detecting_; + MUTEX mtx_; int createTracker(const cv::Mat &im, const cv::Rect2d &obj); @@ -69,7 +74,7 @@ class DetectAndTrack : public ftl::operators::Operator { cv::Ptr<cv::Tracker> tracker; int fail_count; }; - std::vector<Object> tracked_; + std::list<Object> tracked_; // 0: KCF, 1: CSRT int tracker_type_; diff --git a/components/operators/src/detectandtrack.cpp b/components/operators/src/detectandtrack.cpp index 5ea48eff0..0f2e0dd06 100644 --- a/components/operators/src/detectandtrack.cpp +++ b/components/operators/src/detectandtrack.cpp @@ -3,6 +3,7 @@ #include "loguru.hpp" #include "ftl/operators/detectandtrack.hpp" #include <ftl/exception.hpp> +#include <ftl/codecs/faces.hpp> using std::string; using std::vector; @@ -15,10 +16,11 @@ using cv::Rect2d; using cv::Point2i; using cv::Point2d; +using ftl::codecs::Channel; using ftl::rgbd::Frame; using ftl::operators::DetectAndTrack; -DetectAndTrack::DetectAndTrack(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { +DetectAndTrack::DetectAndTrack(ftl::Configurable *cfg) : ftl::operators::Operator(cfg), detecting_(false) { init(); } @@ -61,7 +63,7 @@ bool DetectAndTrack::init() { if (min_size_[1] > max_size_[1]) { min_size_[1] = max_size_[1]; } update_bounding_box_ = config()->value("update_bounding_box", false); - std::string tracker_type = config()->value("tracker_type", std::string("KCF")); + std::string tracker_type = config()->value("tracker_type", std::string("CSRT")); if (tracker_type == "CSRT") { tracker_type_ = 1; } @@ -112,6 +114,7 @@ int DetectAndTrack::createTracker(const Mat &im, const Rect2d &obj) { int id = id_max_++; tracker->init(im, obj); + tracked_.push_back({ id, obj, tracker, 0 }); return id; } @@ -121,13 +124,33 @@ bool DetectAndTrack::detect(const Mat &im) { Size max_size(im.size().width*max_size_[0], im.size().height*max_size_[1]); vector<Rect> objects; + vector<int> rejectLevels; + vector<double> levelWeights; - classifier_.detectMultiScale(im, objects, - scalef_, min_neighbors_, 0, min_size, max_size); + classifier_.detectMultiScale(im, objects, rejectLevels, levelWeights, + scalef_, min_neighbors_, 0, min_size, max_size, true); - LOG(INFO) << "Cascade classifier found " << objects.size() << " objects"; + if (objects.size() > 0) LOG(INFO) << "Cascade classifier found " << objects.size() << " objects"; + + double best = -100.0; + int best_ix = -1; + for (size_t i=0; i<levelWeights.size(); ++i) { + if (levelWeights[i] > best) { + best = levelWeights[i]; + best_ix = i; + } + } + + UNIQUE_LOCK(mtx_, lk); - for (const Rect2d &obj : objects) { + // Assume failure unless matched + for (auto &tracker : tracked_) { + tracker.fail_count++; + } + + //for (int i=0; i<objects.size(); ++i) { + if (best_ix >= 0) { + const Rect2d &obj = objects[best_ix]; Point2d c = center(obj); bool found = false; @@ -137,12 +160,17 @@ bool DetectAndTrack::detect(const Mat &im) { tracker.object = obj; tracker.tracker->init(im, obj); } + + // Matched so didn't fail really. + tracker.fail_count = 0; found = true; break; } } + //LOG(INFO) << "Face confidence: " << levelWeights[best_ix]; + if (!found && (tracked_.size() < max_tracked_)) { createTracker(im, obj); } @@ -152,8 +180,9 @@ bool DetectAndTrack::detect(const Mat &im) { } bool DetectAndTrack::track(const Mat &im) { + UNIQUE_LOCK(mtx_, lk); for (auto it = tracked_.begin(); it != tracked_.end();) { - if (!it->tracker->update(im, it->object)) { + /*if (!it->tracker->update(im, it->object)) { it->fail_count++; } else { @@ -161,9 +190,15 @@ bool DetectAndTrack::track(const Mat &im) { } if (it->fail_count > max_fail_) { - tracked_.erase(it); + it = tracked_.erase(it); + } + else { it++; }*/ + + if (it->fail_count >= max_fail_ || !it->tracker->update(im, it->object)) { + it = tracked_.erase(it); + } else { + ++it; } - else { it++; } } return true; @@ -204,9 +239,13 @@ bool DetectAndTrack::apply(Frame &in, Frame &out, cudaStream_t stream) { track(im); - if ((n_frame_++ % detect_n_frames_ == 0) && (tracked_.size() < max_tracked_)) { + if (!detecting_) { // && tracked_.size() < max_tracked_) { + //if (detect_job_.valid()) detect_job_.get(); // To throw any exceptions + detecting_ = true; + + //if ((n_frame_++ % detect_n_frames_ == 0) && (tracked_.size() < max_tracked_)) { if (im.channels() == 1) { - gray_ = im; + gray_ = im; // FIXME: Needs to be a copy } else if (im.channels() == 4) { cv::cvtColor(im, gray_, cv::COLOR_BGRA2GRAY); @@ -219,13 +258,31 @@ bool DetectAndTrack::apply(Frame &in, Frame &out, cudaStream_t stream) { return false; } - detect(gray_); + ftl::pool.push([this](int id) { + try { + detect(gray_); + } catch (std::exception &e) { + LOG(ERROR) << "Exception in face detection: " << e.what(); + } + detecting_ = false; + return true; + }); + //} + } + + cv::Mat depth; + if (in.hasChannel(Channel::Depth)) { + depth = in.fastDownload(Channel::Depth, cv::cuda::Stream::Null()); } - std::vector<Rect2d> result; + std::vector<ftl::codecs::Face> result; result.reserve(tracked_.size()); for (auto const &tracked : tracked_) { - result.push_back(tracked.object); + auto &face = result.emplace_back(); + face.box = tracked.object; + face.id = tracked.id; + if (depth.empty()) face.depth = 0.0f; + else face.depth = depth.at<float>(cv::Point(face.box.x+face.box.width/2, face.box.y+face.box.height/2)); if (debug_) { cv::putText(im, "#" + std::to_string(tracked.id), diff --git a/components/rgbd-sources/src/sources/realsense/realsense_source.cpp b/components/rgbd-sources/src/sources/realsense/realsense_source.cpp index 68c1a9acf..3198b53e8 100644 --- a/components/rgbd-sources/src/sources/realsense/realsense_source.cpp +++ b/components/rgbd-sources/src/sources/realsense/realsense_source.cpp @@ -9,7 +9,7 @@ using ftl::codecs::Channel; using cv::cuda::GpuMat; RealsenseSource::RealsenseSource(ftl::rgbd::Source *host) - : ftl::rgbd::detail::Source(host), align_to_depth_(RS2_STREAM_DEPTH) { + : ftl::rgbd::detail::Source(host), align_to_depth_(RS2_STREAM_COLOR) { capabilities_ = kCapVideo; rs2::config cfg; @@ -32,10 +32,12 @@ RealsenseSource::RealsenseSource(ftl::rgbd::Source *host) params_.cy = -intrin.ppy; params_.fx = intrin.fx; params_.fy = intrin.fy; - params_.maxDepth = 11.0; + params_.maxDepth = 3.0; params_.minDepth = 0.1; params_.doffs = 0.0; + state_.getLeft() = params_; + LOG(INFO) << "Realsense Intrinsics: " << params_.fx << "," << params_.fy << " - " << params_.cx << "," << params_.cy << " - " << params_.width; } @@ -44,23 +46,41 @@ RealsenseSource::~RealsenseSource() { } bool RealsenseSource::compute(int n, int b) { + frame_.reset(); + frame_.setOrigin(&state_); + rs2::frameset frames; if (!pipe_.poll_for_frames(&frames)) return false; //wait_for_frames(); //std::this_thread::sleep_for(std::chrono::milliseconds(10000)); - frames = align_to_depth_.process(frames); + if (host_->value("colour_only", false)) { + auto cframe = frames.get_color_frame(); + int w = cframe.get_width(); + int h = cframe.get_height(); + + if (params_.width != w) { + params_.width = w; + params_.height = h; + state_.getLeft() = params_; + } + + cv::Mat tmp_rgb(cv::Size(w, h), CV_8UC4, (void*)cframe.get_data(), cv::Mat::AUTO_STEP); + frame_.create<GpuMat>(Channel::Colour).upload(tmp_rgb); + } else { + frames = align_to_depth_.process(frames); - rs2::depth_frame depth = frames.get_depth_frame(); - float w = depth.get_width(); - float h = depth.get_height(); - rscolour_ = frames.first(RS2_STREAM_COLOR); //.get_color_frame(); + rs2::depth_frame depth = frames.get_depth_frame(); + float w = depth.get_width(); + float h = depth.get_height(); + rscolour_ = frames.first(RS2_STREAM_COLOR); //.get_color_frame(); - cv::Mat tmp_depth(cv::Size((int)w, (int)h), CV_16UC1, (void*)depth.get_data(), depth.get_stride_in_bytes()); - tmp_depth.convertTo(tmp_depth, CV_32FC1, scale_); - frame_.get<GpuMat>(Channel::Depth).upload(tmp_depth); - cv::Mat tmp_rgb(cv::Size(w, h), CV_8UC4, (void*)rscolour_.get_data(), cv::Mat::AUTO_STEP); - frame_.get<GpuMat>(Channel::Colour).upload(tmp_rgb); + cv::Mat tmp_depth(cv::Size((int)w, (int)h), CV_16UC1, (void*)depth.get_data(), depth.get_stride_in_bytes()); + tmp_depth.convertTo(tmp_depth, CV_32FC1, scale_); + frame_.create<GpuMat>(Channel::Depth).upload(tmp_depth); + cv::Mat tmp_rgb(cv::Size(w, h), CV_8UC4, (void*)rscolour_.get_data(), cv::Mat::AUTO_STEP); + frame_.create<GpuMat>(Channel::Colour).upload(tmp_rgb); + } host_->notify(timestamp_, frame_); return true; diff --git a/components/rgbd-sources/src/sources/stereovideo/local.cpp b/components/rgbd-sources/src/sources/stereovideo/local.cpp index eca500f79..b03428df0 100644 --- a/components/rgbd-sources/src/sources/stereovideo/local.cpp +++ b/components/rgbd-sources/src/sources/stereovideo/local.cpp @@ -68,13 +68,16 @@ LocalSource::LocalSource(nlohmann::json &config) stereo_ = true; } + LOG(INFO) << "Video backend: " << camera_a_->getBackendName(); + LOG(INFO) << "Video defaults: " << camera_a_->get(cv::CAP_PROP_FRAME_WIDTH) << "x" << camera_a_->get(cv::CAP_PROP_FRAME_HEIGHT) << " @ " << camera_a_->get(cv::CAP_PROP_FPS); + camera_a_->set(cv::CAP_PROP_FRAME_WIDTH, value("width", 640)); camera_a_->set(cv::CAP_PROP_FRAME_HEIGHT, value("height", 480)); camera_a_->set(cv::CAP_PROP_FPS, 1000 / ftl::timer::getInterval()); //camera_a_->set(cv::CAP_PROP_BUFFERSIZE, 0); // Has no effect Mat frame; - camera_a_->grab(); + if (!camera_a_->grab()) LOG(ERROR) << "Could not grab a video frame"; camera_a_->retrieve(frame); LOG(INFO) << "Video size : " << frame.cols << "x" << frame.rows; width_ = frame.cols; @@ -221,13 +224,15 @@ LocalSource::LocalSource(nlohmann::json &config, const string &vid) bool LocalSource::grab() { if (!camera_a_) return false; - if (!camera_a_->grab()) { - LOG(ERROR) << "Unable to grab from camera A"; - return false; - } - if (camera_b_ && !camera_b_->grab()) { - LOG(ERROR) << "Unable to grab from camera B"; - return false; + if (camera_b_) { + if (!camera_a_->grab()) { + LOG(ERROR) << "Unable to grab from camera A"; + return false; + } + if (camera_b_ && !camera_b_->grab()) { + LOG(ERROR) << "Unable to grab from camera B"; + return false; + } } return true; @@ -246,15 +251,22 @@ bool LocalSource::get(cv::cuda::GpuMat &l_out, cv::cuda::GpuMat &r_out, cv::cuda if (!camera_a_) return false; - // TODO: Use threads here? - if (!camera_a_->retrieve(lfull)) { - LOG(ERROR) << "Unable to read frame from camera A"; - return false; - } + if (camera_b_) { + // TODO: Use threads here? + if (!camera_a_->retrieve(lfull)) { + LOG(ERROR) << "Unable to read frame from camera A"; + return false; + } - if (camera_b_ && !camera_b_->retrieve(rfull)) { - LOG(ERROR) << "Unable to read frame from camera B"; - return false; + if (camera_b_ && !camera_b_->retrieve(rfull)) { + LOG(ERROR) << "Unable to read frame from camera B"; + return false; + } + } else { + if (!camera_a_->read(lfull)) { + LOG(ERROR) << "Unable to read frame from camera A"; + return false; + } } if (stereo_) { -- GitLab