diff --git a/applications/gui/src/camera.cpp b/applications/gui/src/camera.cpp index a2b9ef5b8c68ccc98e2a5ef29a19e8153180650b..24fb0aedb1acd40ce50debdf19054472c039e867 100644 --- a/applications/gui/src/camera.cpp +++ b/applications/gui/src/camera.cpp @@ -48,11 +48,11 @@ static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) { ftl::gui::Camera::Camera(ftl::gui::Screen *screen, int fsmask, int fid, ftl::codecs::Channel c) : screen_(screen), fsmask_(fsmask), fid_(fid), texture1_(GLTexture::Type::BGRA), texture2_(GLTexture::Type::BGRA), depth1_(GLTexture::Type::Float), channel_(c),channels_(0u) { - + eye_ = Eigen::Vector3d::Zero(); neye_ = Eigen::Vector4d::Zero(); rotmat_.setIdentity(); - + //up_ = Eigen::Vector3f(0,1.0f,0); lerpSpeed_ = 0.999f; sdepth_ = false; @@ -98,7 +98,7 @@ ftl::gui::Camera::Camera(ftl::gui::Screen *screen, int fsmask, int fid, ftl::cod }); intrinsics_ = ftl::create<ftl::Configurable>(renderer_, "intrinsics"); - + state_.getLeft() = ftl::rgbd::Camera::from(intrinsics_); state_.getRight() = state_.getLeft(); @@ -389,7 +389,7 @@ void ftl::gui::Camera::_draw(std::vector<ftl::rgbd::FrameSet*> &fss) { if (!post_pipe_) { post_pipe_ = ftl::config::create<ftl::operators::Graph>(screen_->root(), "post_filters"); post_pipe_->append<ftl::operators::FXAA>("fxaa"); - post_pipe_->append<ftl::operators::GTAnalysis>("gtanal"); + post_pipe_->append<ftl::operators::GTAnalysis>("gtanalyse"); } post_pipe_->apply(frame_, frame_, 0); @@ -587,14 +587,14 @@ bool ftl::gui::Camera::setVR(bool on) { state_.getLeft().height = size_y; state_.getRight().width = size_x; state_.getRight().height = size_y; - + intrinsic = getCameraMatrix(screen_->getVR(), vr::Eye_Left); CHECK(intrinsic(0, 2) < 0 && intrinsic(1, 2) < 0); state_.getLeft().fx = intrinsic(0,0); state_.getLeft().fy = intrinsic(0,0); state_.getLeft().cx = intrinsic(0,2); state_.getLeft().cy = intrinsic(1,2); - + intrinsic = getCameraMatrix(screen_->getVR(), vr::Eye_Right); CHECK(intrinsic(0, 2) < 0 && intrinsic(1, 2) < 0); state_.getRight().fx = intrinsic(0,0); @@ -671,7 +671,7 @@ const void ftl::gui::Camera::captureFrame() { if (screen_->isVR()) { #ifdef HAVE_OPENVR - + vr::VRCompositor()->SetTrackingSpace(vr::TrackingUniverseStanding); vr::VRCompositor()->WaitGetPoses(rTrackedDevicePose_, vr::k_unMaxTrackedDeviceCount, NULL, 0 ); @@ -679,12 +679,12 @@ const void ftl::gui::Camera::captureFrame() { { Eigen::Matrix4d eye_l = ConvertSteamVRMatrixToMatrix4( vr::VRSystem()->GetEyeToHeadTransform(vr::Eye_Left)); - + //Eigen::Matrix4d eye_r = ConvertSteamVRMatrixToMatrix4( // vr::VRSystem()->GetEyeToHeadTransform(vr::Eye_Left)); float baseline_in = 2.0 * eye_l(0, 3); - + if (baseline_in != baseline_) { baseline_ = baseline_in; //src_->set("baseline", baseline_); @@ -698,17 +698,17 @@ const void ftl::gui::Camera::captureFrame() { vreye[0] = pose(0, 3); vreye[1] = -pose(1, 3); vreye[2] = -pose(2, 3); - + // NOTE: If modified, should be verified with VR headset! Eigen::Matrix3d R; R = Eigen::AngleAxisd(ea[0], Eigen::Vector3d::UnitX()) * Eigen::AngleAxisd(-ea[1], Eigen::Vector3d::UnitY()) * - Eigen::AngleAxisd(-ea[2], Eigen::Vector3d::UnitZ()); - + Eigen::AngleAxisd(-ea[2], Eigen::Vector3d::UnitZ()); + //double rd = 180.0 / 3.141592; //LOG(INFO) << "rotation x: " << ea[0] *rd << ", y: " << ea[1] * rd << ", z: " << ea[2] * rd; // pose.block<3, 3>(0, 0) = R; - + rotmat_.block(0, 0, 3, 3) = R; // TODO: Apply a rotation to orient also @@ -720,7 +720,7 @@ const void ftl::gui::Camera::captureFrame() { Eigen::Translation3d trans(eye_ + vreye); Eigen::Affine3d t(trans); viewPose = t.matrix() * rotmat_; - + } else { //LOG(ERROR) << "No VR Pose"; } diff --git a/applications/gui/src/src_window.cpp b/applications/gui/src/src_window.cpp index b3b71fabfd77187444df545c8fe6d389e0c954ad..d397285fe6fdbf90415a39b325e1a4146bd6eabe 100644 --- a/applications/gui/src/src_window.cpp +++ b/applications/gui/src/src_window.cpp @@ -55,7 +55,7 @@ using std::vector; using ftl::config::json_t; static ftl::rgbd::Generator *createSourceGenerator(ftl::Configurable *root, const std::vector<ftl::rgbd::Source*> &srcs) { - + auto *grp = new ftl::rgbd::Group(); /*auto pipeline = ftl::config::create<ftl::operators::Graph>(root, "pipeline"); pipeline->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false); @@ -75,7 +75,7 @@ SourceWindow::SourceWindow(ftl::gui::Screen *screen) setLayout(new nanogui::BoxLayout(nanogui::Orientation::Vertical, nanogui::Alignment::Fill, 20, 5)); using namespace nanogui; - + new Label(this, "Select Camera","sans-bold",20); // FIXME: Reallocating the vector may currently causes thread issues since @@ -209,7 +209,7 @@ bool SourceWindow::_processFrameset(ftl::rgbd::FrameSet &fs, bool fromstream) { } if (!paused_) { - if (!fs.test(ftl::data::FSFlag::PARTIAL) || !screen_->root()->value("drop_partial_framesets", false)) { + if (!fs.test(ftl::data::FSFlag::PARTIAL) || !screen_->root()->value("drop_partial_framesets", false)) { // Enforce interpolated colour and GPU upload for (size_t i=0; i<fs.frames.size(); ++i) { if (!fs.hasFrame(i)) continue; @@ -274,7 +274,7 @@ void SourceWindow::_checkFrameSets(size_t id) { p->append<ftl::operators::CullDiscontinuity>("remove_discontinuity"); p->append<ftl::operators::MultiViewMLS>("mvmls")->value("enabled", false); p->append<ftl::operators::Poser>("poser")->value("enabled", true); - p->append<ftl::operators::GTAnalysis>("gtanal"); + p->append<ftl::operators::GTAnalysis>("gtanalyse"); pre_pipelines_.push_back(p); framesets_.push_back(new ftl::rgbd::FrameSet); diff --git a/components/operators/include/ftl/operators/cuda/disparity.hpp b/components/operators/include/ftl/operators/cuda/disparity.hpp index e18fcd4fe972b82706fdefecb285546e345c2778..de4648f6f4c4d2dfe8681aea6a96d7fbc40b40af 100644 --- a/components/operators/include/ftl/operators/cuda/disparity.hpp +++ b/components/operators/include/ftl/operators/cuda/disparity.hpp @@ -3,11 +3,13 @@ namespace ftl { namespace cuda { -void disparity_to_depth(const cv::cuda::GpuMat &disparity, cv::cuda::GpuMat &depth, - const ftl::rgbd::Camera &c, cudaStream_t &stream); +template<typename T_in, typename T_out> +void disparity_to_depth(const cv::cuda::GpuMat &depth, cv::cuda::GpuMat &disparity, + const ftl::rgbd::Camera &c, float scale, cudaStream_t &stream); -void depth_to_disparity(cv::cuda::GpuMat &disparity, const cv::cuda::GpuMat &depth, - const ftl::rgbd::Camera &c, cudaStream_t &stream); +template<typename T_in, typename T_out> +void depth_to_disparity(const cv::cuda::GpuMat &depth, cv::cuda::GpuMat &disparity, + const ftl::rgbd::Camera &c, float scale, cudaStream_t &stream); void remove_occlusions(cv::cuda::GpuMat &depth, const cv::cuda::GpuMat &depthR, const ftl::rgbd::Camera &c, cudaStream_t stream); diff --git a/components/operators/include/ftl/operators/cuda/gt.hpp b/components/operators/include/ftl/operators/cuda/gt.hpp index cd317a615e21e5dedf30c6d53f054e5564a075fc..e98600e6d7f7d4bb00d47cf9ecaeadd1bbca89f8 100644 --- a/components/operators/include/ftl/operators/cuda/gt.hpp +++ b/components/operators/include/ftl/operators/cuda/gt.hpp @@ -8,33 +8,43 @@ namespace ftl { namespace cuda { struct GTAnalysisData { - int invalid; // Count of invalid (missing depth) - int bad; // Count bad (above x disparity error) - float totalerror; // Summed disparity error (of valid values) - int masked; // Count of pixels masked. + int valid; // number of pixels with valid pixels + int missing; // number of missing non-masked pixels + int missing_masked; // number of missing masked pixels + int masked; // number of pixels masked (in gt) + + int good; // number of good pixels (valid value and error within min/max threshold) + float err; // sum of absolute error + float err_sq; // sum of squared error }; void gt_analysis( ftl::cuda::TextureObject<uchar4> &colour, ftl::cuda::TextureObject<float> &depth, ftl::cuda::TextureObject<float> >, + ftl::cuda::TextureObject<uchar> &mask, ftl::cuda::GTAnalysisData *out, const ftl::rgbd::Camera &cam, - float threshold, - float outmax, + float t_min, + float t_max, + uchar4 colour_value, + bool use_disparity, cudaStream_t stream ); void gt_analysis( ftl::cuda::TextureObject<float> &depth, ftl::cuda::TextureObject<float> >, + ftl::cuda::TextureObject<uchar> &mask, ftl::cuda::GTAnalysisData *out, const ftl::rgbd::Camera &cam, - float threshold, + float t_min, + float t_max, + bool use_disparity, cudaStream_t stream ); } } -#endif \ No newline at end of file +#endif \ No newline at end of file diff --git a/components/operators/src/disparity/bilateral_filter.cpp b/components/operators/src/disparity/bilateral_filter.cpp index 0f515ed41b842df11099927542498ae861dc1df9..28f54675e123b34177d39623bddc24eeb9a55efe 100644 --- a/components/operators/src/disparity/bilateral_filter.cpp +++ b/components/operators/src/disparity/bilateral_filter.cpp @@ -13,7 +13,7 @@ using ftl::operators::DisparityBilateralFilter; DisparityBilateralFilter::DisparityBilateralFilter(ftl::Configurable* cfg) : ftl::operators::Operator(cfg) { - + scale_ = 16.0; n_disp_ = cfg->value("n_disp", 256); radius_ = cfg->value("radius", 4); @@ -39,7 +39,7 @@ bool DisparityBilateralFilter::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out //LOG(ERROR) << "Calculated disparity from depth"; - ftl::cuda::depth_to_disparity(disp, depth, params, stream); + ftl::cuda::depth_to_disparity<float, short>(depth, disp, params, 16.0f, stream); } else { throw FTL_Error("Joint Bilateral Filter is missing Disparity and Depth"); return false; diff --git a/components/operators/src/disparity/disp2depth.cu b/components/operators/src/disparity/disp2depth.cu index cc023c6c2b551043042ae93cc13830aec307bc98..c0011adf4d92ea9d30a4f56e1ad9ccbe4c06b976 100644 --- a/components/operators/src/disparity/disp2depth.cu +++ b/components/operators/src/disparity/disp2depth.cu @@ -8,58 +8,70 @@ #define PINF __int_as_float(0x7f800000) #endif -__global__ void d2d_kernel(cv::cuda::PtrStepSz<short> disp, cv::cuda::PtrStepSz<float> depth, - ftl::rgbd::Camera cam) { +template<typename T_in, typename T_out> +__global__ void d2d_kernel(cv::cuda::PtrStepSz<T_in> disp, cv::cuda::PtrStepSz<T_out> depth, + const ftl::rgbd::Camera cam, const float scale) { for (STRIDE_Y(v,disp.rows)) { for (STRIDE_X(u,disp.cols)) { short d = disp(v,u); - depth(v,u) = (d == 0) ? 0.0f : ((cam.baseline*cam.fx) / ((float(d)/16.0f) - cam.doffs)); + depth(v,u) = (d == 0) ? 0.0f : ((cam.baseline*cam.fx) / ((float(d)*scale) - cam.doffs)); } } } namespace ftl { namespace cuda { + + template<typename T_in, typename T_out> void disparity_to_depth(const cv::cuda::GpuMat &disparity, cv::cuda::GpuMat &depth, - const ftl::rgbd::Camera &c, cudaStream_t &stream) { + const ftl::rgbd::Camera &c, float scale, cudaStream_t &stream) { dim3 grid(1,1,1); dim3 threads(128, 1, 1); grid.x = cv::cuda::device::divUp(disparity.cols, 128); grid.y = cv::cuda::device::divUp(disparity.rows, 1); - d2d_kernel<<<grid, threads, 0, stream>>>( - disparity, depth, c); + d2d_kernel<T_in, T_out><<<grid, threads, 0, stream>>>( + disparity, depth, c, scale); cudaSafeCall( cudaGetLastError() ); } + + template void disparity_to_depth<short, float>(const cv::cuda::GpuMat&, cv::cuda::GpuMat&, const ftl::rgbd::Camera&, float, cudaStream_t&); + template void disparity_to_depth<float, float>(const cv::cuda::GpuMat&, cv::cuda::GpuMat&, const ftl::rgbd::Camera&, float, cudaStream_t&); } } //============================================================================== -__global__ void d2drev_kernel(cv::cuda::PtrStepSz<short> disp, cv::cuda::PtrStepSz<float> depth, - ftl::rgbd::Camera cam) { +template<typename T_in, typename T_out> +__global__ void d2drev_kernel(cv::cuda::PtrStepSz<T_in> disp, cv::cuda::PtrStepSz<T_out> depth, + const ftl::rgbd::Camera cam, const float scale) { -for (STRIDE_Y(v,disp.rows)) { -for (STRIDE_X(u,disp.cols)) { - float d = depth(v,u); - float disparity = (d > cam.maxDepth || d < cam.minDepth) ? 0.0f : ((cam.baseline*cam.fx) / d) + cam.doffs; - disp(v,u) = short(disparity*16.0f); -} -} + for (STRIDE_Y(v,disp.rows)) { + for (STRIDE_X(u,disp.cols)) { + float d = depth(v,u); + float disparity = (d > cam.maxDepth || d < cam.minDepth) ? 0.0f : ((cam.baseline*cam.fx) / d) + cam.doffs; + disp(v,u) = T_out(disparity*scale); + }} } namespace ftl { namespace cuda { -void depth_to_disparity(cv::cuda::GpuMat &disparity, const cv::cuda::GpuMat &depth, - const ftl::rgbd::Camera &c, cudaStream_t &stream) { - dim3 grid(1,1,1); - dim3 threads(128, 1, 1); - grid.x = cv::cuda::device::divUp(disparity.cols, 128); - grid.y = cv::cuda::device::divUp(disparity.rows, 1); - d2drev_kernel<<<grid, threads, 0, stream>>>( - disparity, depth, c); - cudaSafeCall( cudaGetLastError() ); -} + + template<typename T_in, typename T_out> + void depth_to_disparity(const cv::cuda::GpuMat &depth, cv::cuda::GpuMat &disparity, + const ftl::rgbd::Camera &c, float scale, cudaStream_t &stream) { + dim3 grid(1,1,1); + dim3 threads(128, 1, 1); + grid.x = cv::cuda::device::divUp(disparity.cols, 128); + grid.y = cv::cuda::device::divUp(disparity.rows, 1); + d2drev_kernel<T_in, T_out><<<grid, threads, 0, stream>>>( + disparity, depth, c, scale); + cudaSafeCall( cudaGetLastError() ); + } + + template void depth_to_disparity<float, float>(const cv::cuda::GpuMat&, cv::cuda::GpuMat&, const ftl::rgbd::Camera&, float, cudaStream_t&); + template void depth_to_disparity<float, short>(const cv::cuda::GpuMat&, cv::cuda::GpuMat&, const ftl::rgbd::Camera&, float, cudaStream_t&); + } } diff --git a/components/operators/src/disparity/disparity_to_depth.cpp b/components/operators/src/disparity/disparity_to_depth.cpp index 5dbe97b335e4d40d06cd61f63bac37f1d2a2d215..1ffd157dead77ef4fcf8859903c4439955a70f0b 100644 --- a/components/operators/src/disparity/disparity_to_depth.cpp +++ b/components/operators/src/disparity/disparity_to_depth.cpp @@ -8,7 +8,7 @@ using cv::cuda::GpuMat; bool DisparityToDepth::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) { - + if (!in.hasChannel(Channel::Disparity)) { throw FTL_Error("Missing disparity before convert to depth"); } @@ -19,6 +19,6 @@ bool DisparityToDepth::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, GpuMat &depth = out.create<GpuMat>(Channel::Depth); depth.create(disp.size(), CV_32FC1); - ftl::cuda::disparity_to_depth(disp, depth, params, stream); + ftl::cuda::disparity_to_depth<short, float>(disp, depth, params, 1.0f/16.0f, stream); return true; } \ No newline at end of file diff --git a/components/operators/src/gt_analysis.cpp b/components/operators/src/gt_analysis.cpp index d4a29a5894580bdaf6bb0839ea9b11fb058c776c..066f04d94c1ec96c2a01d8094e99cf86a66136bf 100644 --- a/components/operators/src/gt_analysis.cpp +++ b/components/operators/src/gt_analysis.cpp @@ -15,56 +15,95 @@ GTAnalysis::~GTAnalysis() { template <typename T> std::string to_string_with_precision(const T a_value, const int n = 6) { - std::ostringstream out; - out.precision(n); - out << std::fixed << a_value; - return out.str(); + std::ostringstream out; + out.precision(n); + out << std::fixed << a_value; + return out.str(); +} + +struct Options { + float t_min; + float t_max; + uchar4 colour; +}; + +static const std::vector<Options> options_disparity = { + {-INFINITY, INFINITY, {0,0,224,255}}, + {-INFINITY, 2.0, {66,174,255,255}}, + {-INFINITY, 1.0, {16,192,16,255}}, + {-INFINITY, 0.25, {64,255,64,255}}, +}; + +static const std::vector<Options> options_depth = { + {-INFINITY, INFINITY, {0,0,224,255}}, + {-INFINITY, 0.1, {66,174,255,255}}, + {-INFINITY, 0.025, {16,192,16,255}}, + {-INFINITY, 0.01, {64,255,64,255}}, +}; + +static void report(std::vector<std::string> &msgs, const ftl::cuda::GTAnalysisData &data, + const Options &o, float npixels, const std::string &unit="", float scale=1.0f) { + + msgs.push_back( "(" + to_string_with_precision(o.t_min, 2) + + "," + to_string_with_precision(o.t_max, 2) + "] "); + msgs.push_back("valid: " + to_string_with_precision(100.0f*data.good/data.valid, 1) + "%, " + + "all: " + to_string_with_precision(100.0f*data.good/npixels, 1) + "%"); + msgs.push_back( "RMS: " + + to_string_with_precision(sqrt(data.err_sq/data.good) * scale, 2) + + (unit.empty() ? "" : " " + unit)); } bool GTAnalysis::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) { - if (in.hasChannel(Channel::Depth) && in.hasChannel(Channel::GroundTruth)) { + + if (!in.hasChannel(Channel::Depth) || !in.hasChannel(Channel::GroundTruth)) { + return true; + } + + std::vector<std::string> msgs; + if (in.hasChannel(Channel::Messages)) { in.get(Channel::Messages, msgs); } + + bool use_disp = config()->value("use_disparity", true); + auto &dmat = in.get<cv::cuda::GpuMat>(Channel::Depth); + const float npixels = dmat.rows * dmat.cols; + ftl::cuda::GTAnalysisData err; + + for (const auto &o : (use_disp ? options_disparity : options_depth)) { if (config()->value("show_colour", false)) { ftl::cuda::gt_analysis( in.createTexture<uchar4>(Channel::Colour), in.createTexture<float>(Channel::Depth), in.createTexture<float>(Channel::GroundTruth), + in.createTexture<uchar>(Channel::Mask), output_, in.getLeft(), - config()->value("bad_threshold", 2.0f), - config()->value("viz_threshold", 5.0f), + o.t_min, + o.t_max, + o.colour, + use_disp, stream ); - } else { + } + else { ftl::cuda::gt_analysis( in.createTexture<float>(Channel::Depth), in.createTexture<float>(Channel::GroundTruth), + in.createTexture<uchar>(Channel::Mask), output_, in.getLeft(), - config()->value("bad_threshold", 2.0f), + o.t_min, + o.t_max, + use_disp, stream ); } - ftl::cuda::GTAnalysisData anal; - cudaMemcpy(&anal, output_, sizeof(anal), cudaMemcpyDeviceToHost); - - auto &dmat = in.get<cv::cuda::GpuMat>(Channel::Depth); - int totalvalid = dmat.cols*dmat.rows - anal.invalid - anal.masked; - //int totaltested = dmat.cols*dmat.rows - anal.masked; - - float pbad = float(anal.bad) / float(totalvalid) * 100.0f; - float pinvalid = float(anal.invalid) / float(dmat.cols*dmat.rows - anal.masked) * 100.0f; - float avgerr = anal.totalerror / float(totalvalid) * 100.0f; - - std::vector<std::string> msgs; - if (in.hasChannel(Channel::Messages)) in.get(Channel::Messages, msgs); - - msgs.push_back(string("Bad %: ") + to_string_with_precision(pbad, 1)); - msgs.push_back(string("Invalid %: ") + to_string_with_precision(pinvalid,1)); - msgs.push_back(string("Avg Error: ") + to_string_with_precision(avgerr, 2)); - - in.create(Channel::Messages, msgs); + cudaMemcpy(&err, output_, sizeof(err), cudaMemcpyDeviceToHost); + msgs.push_back(" "); + if (use_disp) { report(msgs, err, o, npixels, "px", 1.0); } + else { report(msgs, err, o, npixels, "mm", 1000.0); } } + in.create(Channel::Messages, msgs); + return true; } diff --git a/components/operators/src/gt_analysis.cu b/components/operators/src/gt_analysis.cu index 53b76cd6332f6c00878be70c187d5cefdaa930a8..e3e524057868947c4220f7cd157ac2c939c4784b 100644 --- a/components/operators/src/gt_analysis.cu +++ b/components/operators/src/gt_analysis.cu @@ -6,8 +6,8 @@ #define FULL_MASK 0xffffffff -template <bool COLOUR> -__global__ void gt_anal_kernel( +template <bool DISPARITY, bool VISUALISE> +__global__ void gt_analysis_kernel( uchar4* __restrict__ colour, int cpitch, int width, @@ -16,95 +16,131 @@ __global__ void gt_anal_kernel( int dpitch, const float* __restrict__ gt, int gpitch, + const uchar* __restrict__ mask, + int mpitch, ftl::cuda::GTAnalysisData *out, ftl::rgbd::Camera cam, - float threshold, - float outmax + float t_min, + float t_max, + uchar4 colour_value ) { - - __shared__ int sinvalid; - __shared__ int sbad; + __shared__ int svalid; + __shared__ int smissing; + __shared__ int smissing_masked; __shared__ int smasked; + __shared__ int sgood; __shared__ float serr; + __shared__ float serr_sq; if (threadIdx.x == 0 && threadIdx.y == 0) { - sinvalid = 0; - sbad = 0; + svalid = 0; + smissing = 0; + smissing_masked = 0; smasked = 0; + sgood = 0; serr = 0.0f; + serr_sq = 0.0f; } __syncthreads(); const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - int invalid = 0; - int bad = 0; + int valid = 0; + int missing = 0; + int missing_masked = 0; int masked = 0; + int good = 0; float err = 0.0f; + float err_sq = 0.0f; const float numer = cam.baseline*cam.fx; if (x < width) { const float* __restrict__ gt_ptr = gt+x; const float* __restrict__ d_ptr = depth+x; + const uchar* __restrict__ m_ptr = mask+x; for (STRIDE_Y(y, height)) { // TODO: Verify gt and depth pitch are same float gtval = gt_ptr[y*dpitch]; float dval = d_ptr[y*dpitch]; - const int tmasked = (gtval > cam.minDepth && gtval < cam.maxDepth) ? 0 : 1; - const int tinvalid = (tmasked == 0 && (dval <= cam.minDepth || dval >= cam.maxDepth)) ? 1 : 0; + const int tmasked = (m_ptr[y*mpitch] == 0) ? 0 : 1; + const int tinvalid = (dval <= cam.minDepth || dval >= cam.maxDepth) ? 1 : 0; + const int tgtinvalid = (gtval > cam.minDepth && gtval < cam.maxDepth) ? 0 : 1; - uchar4 c = make_uchar4((tinvalid==1)?255:0,0,0,255); + if (tinvalid == 0 && tgtinvalid == 0) { + // if there is valid value in both (gt and depth) + valid += 1; - // Convert both to disparity... - if (tinvalid == 0 && tmasked == 0) { - dval = (numer / dval); - gtval = (numer / gtval); + if (DISPARITY) { + dval = (numer / dval); + gtval = (numer / gtval); + } const float e = fabsf(dval-gtval); - bad += (e >= threshold) ? 1 : 0; - err += e; - if (COLOUR) { - float nerr = min(1.0f, e / outmax); - c.z = min(255.0f, 255.0f * nerr); + if ((t_min < e) && (e <= t_max)) { + good += 1; + err += e; + err_sq += e*e; + + if (VISUALISE) { colour[x+y*cpitch] = colour_value; } } } + else if (tinvalid == 0 && tmasked == 1 && tgtinvalid == 1) { + // masked and not missing (but no gt value) + if (VISUALISE) { colour[x+y*cpitch] = {192, 0, 192, 255}; } // magenta + } + else if (tinvalid == 1 && (tmasked == 1 || tgtinvalid == 1)) { + // missing and (masked or missing gt) + if (VISUALISE) { colour[x+y*cpitch] = {0, 0, 0, 255}; } // black + missing_masked += 1; + } + else if (tinvalid == 1) { + // missing value (not masked) + if (VISUALISE) { colour[x+y*cpitch] = {224, 32, 32, 255}; } // blue + missing += 1; + } - invalid += tinvalid; - masked += tmasked; - - if (COLOUR) colour[x+y*cpitch] = c; + masked += (tmasked == 1 || tgtinvalid == 1) ? 1 : 0; } } // Warp aggregate #pragma unroll for (int i = WARP_SIZE/2; i > 0; i /= 2) { - bad += __shfl_xor_sync(FULL_MASK, bad, i, WARP_SIZE); - invalid += __shfl_xor_sync(FULL_MASK, invalid, i, WARP_SIZE); + valid += __shfl_xor_sync(FULL_MASK, valid, i, WARP_SIZE); + missing += __shfl_xor_sync(FULL_MASK, missing, i, WARP_SIZE); + missing_masked += __shfl_xor_sync(FULL_MASK, missing_masked, i, WARP_SIZE); masked += __shfl_xor_sync(FULL_MASK, masked, i, WARP_SIZE); + good += __shfl_xor_sync(FULL_MASK, good, i, WARP_SIZE); err += __shfl_xor_sync(FULL_MASK, err, i, WARP_SIZE); + err_sq += __shfl_xor_sync(FULL_MASK, err_sq, i, WARP_SIZE); } // Block aggregate if (threadIdx.x % WARP_SIZE == 0) { - atomicAdd(&serr, err); - atomicAdd(&sbad, bad); - atomicAdd(&sinvalid, invalid); + atomicAdd(&svalid, valid); + atomicAdd(&smissing, missing); + atomicAdd(&smissing_masked, missing_masked); atomicAdd(&smasked, masked); + atomicAdd(&sgood, good); + atomicAdd(&serr, err); + atomicAdd(&serr_sq, err_sq); } __syncthreads(); // Global aggregate if (threadIdx.x == 0 && threadIdx.y == 0) { - atomicAdd(&out->totalerror, serr); - atomicAdd(&out->bad, sbad); - atomicAdd(&out->invalid, sinvalid); + atomicAdd(&out->valid, svalid); + atomicAdd(&out->missing, smissing); + atomicAdd(&out->missing_masked, smissing_masked); atomicAdd(&out->masked, smasked); + atomicAdd(&out->good, sgood); + atomicAdd(&out->err, serr); + atomicAdd(&out->err_sq, serr_sq); } } @@ -112,10 +148,13 @@ void ftl::cuda::gt_analysis( ftl::cuda::TextureObject<uchar4> &colour, ftl::cuda::TextureObject<float> &depth, ftl::cuda::TextureObject<float> >, + ftl::cuda::TextureObject<uchar> &mask, ftl::cuda::GTAnalysisData *out, const ftl::rgbd::Camera &cam, - float threshold, - float outmax, + float t_min, + float t_max, + uchar4 colour_value, + bool use_disparity, cudaStream_t stream ) { static constexpr int THREADS_X = 128; @@ -126,21 +165,45 @@ void ftl::cuda::gt_analysis( cudaMemsetAsync(out, 0, sizeof(ftl::cuda::GTAnalysisData), stream); - gt_anal_kernel<true><<<gridSize, blockSize, 0, stream>>>( - colour.devicePtr(), - colour.pixelPitch(), - colour.width(), - colour.height(), - depth.devicePtr(), - depth.pixelPitch(), - gt.devicePtr(), - gt.pixelPitch(), - out, - cam, - threshold, - outmax - ); - cudaSafeCall( cudaGetLastError() ); + if (use_disparity) { + gt_analysis_kernel<true, true><<<gridSize, blockSize, 0, stream>>>( + colour.devicePtr(), + colour.pixelPitch(), + colour.width(), + colour.height(), + depth.devicePtr(), + depth.pixelPitch(), + gt.devicePtr(), + gt.pixelPitch(), + mask.devicePtr(), + mask.pixelPitch(), + out, + cam, + t_min, + t_max, + colour_value + ); + } + else { + gt_analysis_kernel<false, true><<<gridSize, blockSize, 0, stream>>>( + colour.devicePtr(), + colour.pixelPitch(), + colour.width(), + colour.height(), + depth.devicePtr(), + depth.pixelPitch(), + gt.devicePtr(), + gt.pixelPitch(), + mask.devicePtr(), + mask.pixelPitch(), + out, + cam, + t_min, + t_max, + colour_value + ); + } + cudaSafeCall(cudaGetLastError()); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); @@ -150,9 +213,12 @@ void ftl::cuda::gt_analysis( void ftl::cuda::gt_analysis( ftl::cuda::TextureObject<float> &depth, ftl::cuda::TextureObject<float> >, + ftl::cuda::TextureObject<uchar> &mask, ftl::cuda::GTAnalysisData *out, const ftl::rgbd::Camera &cam, - float threshold, + float t_min, + float t_max, + bool use_disparity, cudaStream_t stream ) { static constexpr int THREADS_X = 128; @@ -163,20 +229,45 @@ void ftl::cuda::gt_analysis( cudaMemsetAsync(out, 0, sizeof(ftl::cuda::GTAnalysisData), stream); - gt_anal_kernel<false><<<gridSize, blockSize, 0, stream>>>( - nullptr, - 0, - depth.width(), - depth.height(), - depth.devicePtr(), - depth.pixelPitch(), - gt.devicePtr(), - gt.pixelPitch(), - out, - cam, - threshold, - 1.0f - ); + if (use_disparity) { + gt_analysis_kernel<true, false><<<gridSize, blockSize, 0, stream>>>( + nullptr, + 0, + depth.width(), + depth.height(), + depth.devicePtr(), + depth.pixelPitch(), + gt.devicePtr(), + gt.pixelPitch(), + mask.devicePtr(), + mask.pixelPitch(), + out, + cam, + t_min, + t_max, + {0,0,0,0} + ); + } + else { + gt_analysis_kernel<false, false><<<gridSize, blockSize, 0, stream>>>( + nullptr, + 0, + depth.width(), + depth.height(), + depth.devicePtr(), + depth.pixelPitch(), + gt.devicePtr(), + gt.pixelPitch(), + mask.devicePtr(), + mask.pixelPitch(), + out, + cam, + t_min, + t_max, + {0,0,0,0} + ); + } + cudaSafeCall( cudaGetLastError() ); #ifdef _DEBUG