Skip to content
Snippets Groups Projects
Commit 7cb907f8 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Merge branch 'feature/gt' into 'master'

Ground truth

See merge request nicolas.pope/ftl!295
parents 28d0b1ac 64219909
No related branches found
No related tags found
1 merge request!295Ground truth
Pipeline #23221 passed
......@@ -48,11 +48,11 @@ static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) {
ftl::gui::Camera::Camera(ftl::gui::Screen *screen, int fsmask, int fid, ftl::codecs::Channel c)
: screen_(screen), fsmask_(fsmask), fid_(fid), texture1_(GLTexture::Type::BGRA), texture2_(GLTexture::Type::BGRA), depth1_(GLTexture::Type::Float), channel_(c),channels_(0u) {
eye_ = Eigen::Vector3d::Zero();
neye_ = Eigen::Vector4d::Zero();
rotmat_.setIdentity();
//up_ = Eigen::Vector3f(0,1.0f,0);
lerpSpeed_ = 0.999f;
sdepth_ = false;
......@@ -98,7 +98,7 @@ ftl::gui::Camera::Camera(ftl::gui::Screen *screen, int fsmask, int fid, ftl::cod
});
intrinsics_ = ftl::create<ftl::Configurable>(renderer_, "intrinsics");
state_.getLeft() = ftl::rgbd::Camera::from(intrinsics_);
state_.getRight() = state_.getLeft();
......@@ -389,7 +389,7 @@ void ftl::gui::Camera::_draw(std::vector<ftl::rgbd::FrameSet*> &fss) {
if (!post_pipe_) {
post_pipe_ = ftl::config::create<ftl::operators::Graph>(screen_->root(), "post_filters");
post_pipe_->append<ftl::operators::FXAA>("fxaa");
post_pipe_->append<ftl::operators::GTAnalysis>("gtanal");
post_pipe_->append<ftl::operators::GTAnalysis>("gtanalyse");
}
post_pipe_->apply(frame_, frame_, 0);
......@@ -587,14 +587,14 @@ bool ftl::gui::Camera::setVR(bool on) {
state_.getLeft().height = size_y;
state_.getRight().width = size_x;
state_.getRight().height = size_y;
intrinsic = getCameraMatrix(screen_->getVR(), vr::Eye_Left);
CHECK(intrinsic(0, 2) < 0 && intrinsic(1, 2) < 0);
state_.getLeft().fx = intrinsic(0,0);
state_.getLeft().fy = intrinsic(0,0);
state_.getLeft().cx = intrinsic(0,2);
state_.getLeft().cy = intrinsic(1,2);
intrinsic = getCameraMatrix(screen_->getVR(), vr::Eye_Right);
CHECK(intrinsic(0, 2) < 0 && intrinsic(1, 2) < 0);
state_.getRight().fx = intrinsic(0,0);
......@@ -671,7 +671,7 @@ const void ftl::gui::Camera::captureFrame() {
if (screen_->isVR()) {
#ifdef HAVE_OPENVR
vr::VRCompositor()->SetTrackingSpace(vr::TrackingUniverseStanding);
vr::VRCompositor()->WaitGetPoses(rTrackedDevicePose_, vr::k_unMaxTrackedDeviceCount, NULL, 0 );
......@@ -679,12 +679,12 @@ const void ftl::gui::Camera::captureFrame() {
{
Eigen::Matrix4d eye_l = ConvertSteamVRMatrixToMatrix4(
vr::VRSystem()->GetEyeToHeadTransform(vr::Eye_Left));
//Eigen::Matrix4d eye_r = ConvertSteamVRMatrixToMatrix4(
// vr::VRSystem()->GetEyeToHeadTransform(vr::Eye_Left));
float baseline_in = 2.0 * eye_l(0, 3);
if (baseline_in != baseline_) {
baseline_ = baseline_in;
//src_->set("baseline", baseline_);
......@@ -698,17 +698,17 @@ const void ftl::gui::Camera::captureFrame() {
vreye[0] = pose(0, 3);
vreye[1] = -pose(1, 3);
vreye[2] = -pose(2, 3);
// NOTE: If modified, should be verified with VR headset!
Eigen::Matrix3d R;
R = Eigen::AngleAxisd(ea[0], Eigen::Vector3d::UnitX()) *
Eigen::AngleAxisd(-ea[1], Eigen::Vector3d::UnitY()) *
Eigen::AngleAxisd(-ea[2], Eigen::Vector3d::UnitZ());
Eigen::AngleAxisd(-ea[2], Eigen::Vector3d::UnitZ());
//double rd = 180.0 / 3.141592;
//LOG(INFO) << "rotation x: " << ea[0] *rd << ", y: " << ea[1] * rd << ", z: " << ea[2] * rd;
// pose.block<3, 3>(0, 0) = R;
rotmat_.block(0, 0, 3, 3) = R;
// TODO: Apply a rotation to orient also
......@@ -720,7 +720,7 @@ const void ftl::gui::Camera::captureFrame() {
Eigen::Translation3d trans(eye_ + vreye);
Eigen::Affine3d t(trans);
viewPose = t.matrix() * rotmat_;
} else {
//LOG(ERROR) << "No VR Pose";
}
......
......@@ -55,7 +55,7 @@ using std::vector;
using ftl::config::json_t;
static ftl::rgbd::Generator *createSourceGenerator(ftl::Configurable *root, const std::vector<ftl::rgbd::Source*> &srcs) {
auto *grp = new ftl::rgbd::Group();
/*auto pipeline = ftl::config::create<ftl::operators::Graph>(root, "pipeline");
pipeline->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false);
......@@ -75,7 +75,7 @@ SourceWindow::SourceWindow(ftl::gui::Screen *screen)
setLayout(new nanogui::BoxLayout(nanogui::Orientation::Vertical, nanogui::Alignment::Fill, 20, 5));
using namespace nanogui;
new Label(this, "Select Camera","sans-bold",20);
// FIXME: Reallocating the vector may currently causes thread issues since
......@@ -209,7 +209,7 @@ bool SourceWindow::_processFrameset(ftl::rgbd::FrameSet &fs, bool fromstream) {
}
if (!paused_) {
if (!fs.test(ftl::data::FSFlag::PARTIAL) || !screen_->root()->value("drop_partial_framesets", false)) {
if (!fs.test(ftl::data::FSFlag::PARTIAL) || !screen_->root()->value("drop_partial_framesets", false)) {
// Enforce interpolated colour and GPU upload
for (size_t i=0; i<fs.frames.size(); ++i) {
if (!fs.hasFrame(i)) continue;
......@@ -274,7 +274,7 @@ void SourceWindow::_checkFrameSets(size_t id) {
p->append<ftl::operators::CullDiscontinuity>("remove_discontinuity");
p->append<ftl::operators::MultiViewMLS>("mvmls")->value("enabled", false);
p->append<ftl::operators::Poser>("poser")->value("enabled", true);
p->append<ftl::operators::GTAnalysis>("gtanal");
p->append<ftl::operators::GTAnalysis>("gtanalyse");
pre_pipelines_.push_back(p);
framesets_.push_back(new ftl::rgbd::FrameSet);
......
......@@ -3,11 +3,13 @@
namespace ftl {
namespace cuda {
void disparity_to_depth(const cv::cuda::GpuMat &disparity, cv::cuda::GpuMat &depth,
const ftl::rgbd::Camera &c, cudaStream_t &stream);
template<typename T_in, typename T_out>
void disparity_to_depth(const cv::cuda::GpuMat &depth, cv::cuda::GpuMat &disparity,
const ftl::rgbd::Camera &c, float scale, cudaStream_t &stream);
void depth_to_disparity(cv::cuda::GpuMat &disparity, const cv::cuda::GpuMat &depth,
const ftl::rgbd::Camera &c, cudaStream_t &stream);
template<typename T_in, typename T_out>
void depth_to_disparity(const cv::cuda::GpuMat &depth, cv::cuda::GpuMat &disparity,
const ftl::rgbd::Camera &c, float scale, cudaStream_t &stream);
void remove_occlusions(cv::cuda::GpuMat &depth, const cv::cuda::GpuMat &depthR,
const ftl::rgbd::Camera &c, cudaStream_t stream);
......
......@@ -8,33 +8,43 @@ namespace ftl {
namespace cuda {
struct GTAnalysisData {
int invalid; // Count of invalid (missing depth)
int bad; // Count bad (above x disparity error)
float totalerror; // Summed disparity error (of valid values)
int masked; // Count of pixels masked.
int valid; // number of pixels with valid pixels
int missing; // number of missing non-masked pixels
int missing_masked; // number of missing masked pixels
int masked; // number of pixels masked (in gt)
int good; // number of good pixels (valid value and error within min/max threshold)
float err; // sum of absolute error
float err_sq; // sum of squared error
};
void gt_analysis(
ftl::cuda::TextureObject<uchar4> &colour,
ftl::cuda::TextureObject<float> &depth,
ftl::cuda::TextureObject<float> &gt,
ftl::cuda::TextureObject<uchar> &mask,
ftl::cuda::GTAnalysisData *out,
const ftl::rgbd::Camera &cam,
float threshold,
float outmax,
float t_min,
float t_max,
uchar4 colour_value,
bool use_disparity,
cudaStream_t stream
);
void gt_analysis(
ftl::cuda::TextureObject<float> &depth,
ftl::cuda::TextureObject<float> &gt,
ftl::cuda::TextureObject<uchar> &mask,
ftl::cuda::GTAnalysisData *out,
const ftl::rgbd::Camera &cam,
float threshold,
float t_min,
float t_max,
bool use_disparity,
cudaStream_t stream
);
}
}
#endif
\ No newline at end of file
#endif
\ No newline at end of file
......@@ -13,7 +13,7 @@ using ftl::operators::DisparityBilateralFilter;
DisparityBilateralFilter::DisparityBilateralFilter(ftl::Configurable* cfg) :
ftl::operators::Operator(cfg) {
scale_ = 16.0;
n_disp_ = cfg->value("n_disp", 256);
radius_ = cfg->value("radius", 4);
......@@ -39,7 +39,7 @@ bool DisparityBilateralFilter::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out
//LOG(ERROR) << "Calculated disparity from depth";
ftl::cuda::depth_to_disparity(disp, depth, params, stream);
ftl::cuda::depth_to_disparity<float, short>(depth, disp, params, 16.0f, stream);
} else {
throw FTL_Error("Joint Bilateral Filter is missing Disparity and Depth");
return false;
......
......@@ -8,58 +8,70 @@
#define PINF __int_as_float(0x7f800000)
#endif
__global__ void d2d_kernel(cv::cuda::PtrStepSz<short> disp, cv::cuda::PtrStepSz<float> depth,
ftl::rgbd::Camera cam) {
template<typename T_in, typename T_out>
__global__ void d2d_kernel(cv::cuda::PtrStepSz<T_in> disp, cv::cuda::PtrStepSz<T_out> depth,
const ftl::rgbd::Camera cam, const float scale) {
for (STRIDE_Y(v,disp.rows)) {
for (STRIDE_X(u,disp.cols)) {
short d = disp(v,u);
depth(v,u) = (d == 0) ? 0.0f : ((cam.baseline*cam.fx) / ((float(d)/16.0f) - cam.doffs));
depth(v,u) = (d == 0) ? 0.0f : ((cam.baseline*cam.fx) / ((float(d)*scale) - cam.doffs));
}
}
}
namespace ftl {
namespace cuda {
template<typename T_in, typename T_out>
void disparity_to_depth(const cv::cuda::GpuMat &disparity, cv::cuda::GpuMat &depth,
const ftl::rgbd::Camera &c, cudaStream_t &stream) {
const ftl::rgbd::Camera &c, float scale, cudaStream_t &stream) {
dim3 grid(1,1,1);
dim3 threads(128, 1, 1);
grid.x = cv::cuda::device::divUp(disparity.cols, 128);
grid.y = cv::cuda::device::divUp(disparity.rows, 1);
d2d_kernel<<<grid, threads, 0, stream>>>(
disparity, depth, c);
d2d_kernel<T_in, T_out><<<grid, threads, 0, stream>>>(
disparity, depth, c, scale);
cudaSafeCall( cudaGetLastError() );
}
template void disparity_to_depth<short, float>(const cv::cuda::GpuMat&, cv::cuda::GpuMat&, const ftl::rgbd::Camera&, float, cudaStream_t&);
template void disparity_to_depth<float, float>(const cv::cuda::GpuMat&, cv::cuda::GpuMat&, const ftl::rgbd::Camera&, float, cudaStream_t&);
}
}
//==============================================================================
__global__ void d2drev_kernel(cv::cuda::PtrStepSz<short> disp, cv::cuda::PtrStepSz<float> depth,
ftl::rgbd::Camera cam) {
template<typename T_in, typename T_out>
__global__ void d2drev_kernel(cv::cuda::PtrStepSz<T_in> disp, cv::cuda::PtrStepSz<T_out> depth,
const ftl::rgbd::Camera cam, const float scale) {
for (STRIDE_Y(v,disp.rows)) {
for (STRIDE_X(u,disp.cols)) {
float d = depth(v,u);
float disparity = (d > cam.maxDepth || d < cam.minDepth) ? 0.0f : ((cam.baseline*cam.fx) / d) + cam.doffs;
disp(v,u) = short(disparity*16.0f);
}
}
for (STRIDE_Y(v,disp.rows)) {
for (STRIDE_X(u,disp.cols)) {
float d = depth(v,u);
float disparity = (d > cam.maxDepth || d < cam.minDepth) ? 0.0f : ((cam.baseline*cam.fx) / d) + cam.doffs;
disp(v,u) = T_out(disparity*scale);
}}
}
namespace ftl {
namespace cuda {
void depth_to_disparity(cv::cuda::GpuMat &disparity, const cv::cuda::GpuMat &depth,
const ftl::rgbd::Camera &c, cudaStream_t &stream) {
dim3 grid(1,1,1);
dim3 threads(128, 1, 1);
grid.x = cv::cuda::device::divUp(disparity.cols, 128);
grid.y = cv::cuda::device::divUp(disparity.rows, 1);
d2drev_kernel<<<grid, threads, 0, stream>>>(
disparity, depth, c);
cudaSafeCall( cudaGetLastError() );
}
template<typename T_in, typename T_out>
void depth_to_disparity(const cv::cuda::GpuMat &depth, cv::cuda::GpuMat &disparity,
const ftl::rgbd::Camera &c, float scale, cudaStream_t &stream) {
dim3 grid(1,1,1);
dim3 threads(128, 1, 1);
grid.x = cv::cuda::device::divUp(disparity.cols, 128);
grid.y = cv::cuda::device::divUp(disparity.rows, 1);
d2drev_kernel<T_in, T_out><<<grid, threads, 0, stream>>>(
disparity, depth, c, scale);
cudaSafeCall( cudaGetLastError() );
}
template void depth_to_disparity<float, float>(const cv::cuda::GpuMat&, cv::cuda::GpuMat&, const ftl::rgbd::Camera&, float, cudaStream_t&);
template void depth_to_disparity<float, short>(const cv::cuda::GpuMat&, cv::cuda::GpuMat&, const ftl::rgbd::Camera&, float, cudaStream_t&);
}
}
......
......@@ -8,7 +8,7 @@ using cv::cuda::GpuMat;
bool DisparityToDepth::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out,
cudaStream_t stream) {
if (!in.hasChannel(Channel::Disparity)) {
throw FTL_Error("Missing disparity before convert to depth");
}
......@@ -19,6 +19,6 @@ bool DisparityToDepth::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out,
GpuMat &depth = out.create<GpuMat>(Channel::Depth);
depth.create(disp.size(), CV_32FC1);
ftl::cuda::disparity_to_depth(disp, depth, params, stream);
ftl::cuda::disparity_to_depth<short, float>(disp, depth, params, 1.0f/16.0f, stream);
return true;
}
\ No newline at end of file
......@@ -15,56 +15,95 @@ GTAnalysis::~GTAnalysis() {
template <typename T>
std::string to_string_with_precision(const T a_value, const int n = 6) {
std::ostringstream out;
out.precision(n);
out << std::fixed << a_value;
return out.str();
std::ostringstream out;
out.precision(n);
out << std::fixed << a_value;
return out.str();
}
struct Options {
float t_min;
float t_max;
uchar4 colour;
};
static const std::vector<Options> options_disparity = {
{-INFINITY, INFINITY, {0,0,224,255}},
{-INFINITY, 2.0, {66,174,255,255}},
{-INFINITY, 1.0, {16,192,16,255}},
{-INFINITY, 0.25, {64,255,64,255}},
};
static const std::vector<Options> options_depth = {
{-INFINITY, INFINITY, {0,0,224,255}},
{-INFINITY, 0.1, {66,174,255,255}},
{-INFINITY, 0.025, {16,192,16,255}},
{-INFINITY, 0.01, {64,255,64,255}},
};
static void report(std::vector<std::string> &msgs, const ftl::cuda::GTAnalysisData &data,
const Options &o, float npixels, const std::string &unit="", float scale=1.0f) {
msgs.push_back( "(" + to_string_with_precision(o.t_min, 2)
+ "," + to_string_with_precision(o.t_max, 2) + "] ");
msgs.push_back("valid: " + to_string_with_precision(100.0f*data.good/data.valid, 1) + "%, "
+ "all: " + to_string_with_precision(100.0f*data.good/npixels, 1) + "%");
msgs.push_back( "RMS: "
+ to_string_with_precision(sqrt(data.err_sq/data.good) * scale, 2)
+ (unit.empty() ? "" : " " + unit));
}
bool GTAnalysis::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) {
if (in.hasChannel(Channel::Depth) && in.hasChannel(Channel::GroundTruth)) {
if (!in.hasChannel(Channel::Depth) || !in.hasChannel(Channel::GroundTruth)) {
return true;
}
std::vector<std::string> msgs;
if (in.hasChannel(Channel::Messages)) { in.get(Channel::Messages, msgs); }
bool use_disp = config()->value("use_disparity", true);
auto &dmat = in.get<cv::cuda::GpuMat>(Channel::Depth);
const float npixels = dmat.rows * dmat.cols;
ftl::cuda::GTAnalysisData err;
for (const auto &o : (use_disp ? options_disparity : options_depth)) {
if (config()->value("show_colour", false)) {
ftl::cuda::gt_analysis(
in.createTexture<uchar4>(Channel::Colour),
in.createTexture<float>(Channel::Depth),
in.createTexture<float>(Channel::GroundTruth),
in.createTexture<uchar>(Channel::Mask),
output_,
in.getLeft(),
config()->value("bad_threshold", 2.0f),
config()->value("viz_threshold", 5.0f),
o.t_min,
o.t_max,
o.colour,
use_disp,
stream
);
} else {
}
else {
ftl::cuda::gt_analysis(
in.createTexture<float>(Channel::Depth),
in.createTexture<float>(Channel::GroundTruth),
in.createTexture<uchar>(Channel::Mask),
output_,
in.getLeft(),
config()->value("bad_threshold", 2.0f),
o.t_min,
o.t_max,
use_disp,
stream
);
}
ftl::cuda::GTAnalysisData anal;
cudaMemcpy(&anal, output_, sizeof(anal), cudaMemcpyDeviceToHost);
auto &dmat = in.get<cv::cuda::GpuMat>(Channel::Depth);
int totalvalid = dmat.cols*dmat.rows - anal.invalid - anal.masked;
//int totaltested = dmat.cols*dmat.rows - anal.masked;
float pbad = float(anal.bad) / float(totalvalid) * 100.0f;
float pinvalid = float(anal.invalid) / float(dmat.cols*dmat.rows - anal.masked) * 100.0f;
float avgerr = anal.totalerror / float(totalvalid) * 100.0f;
std::vector<std::string> msgs;
if (in.hasChannel(Channel::Messages)) in.get(Channel::Messages, msgs);
msgs.push_back(string("Bad %: ") + to_string_with_precision(pbad, 1));
msgs.push_back(string("Invalid %: ") + to_string_with_precision(pinvalid,1));
msgs.push_back(string("Avg Error: ") + to_string_with_precision(avgerr, 2));
in.create(Channel::Messages, msgs);
cudaMemcpy(&err, output_, sizeof(err), cudaMemcpyDeviceToHost);
msgs.push_back(" ");
if (use_disp) { report(msgs, err, o, npixels, "px", 1.0); }
else { report(msgs, err, o, npixels, "mm", 1000.0); }
}
in.create(Channel::Messages, msgs);
return true;
}
......@@ -6,8 +6,8 @@
#define FULL_MASK 0xffffffff
template <bool COLOUR>
__global__ void gt_anal_kernel(
template <bool DISPARITY, bool VISUALISE>
__global__ void gt_analysis_kernel(
uchar4* __restrict__ colour,
int cpitch,
int width,
......@@ -16,95 +16,131 @@ __global__ void gt_anal_kernel(
int dpitch,
const float* __restrict__ gt,
int gpitch,
const uchar* __restrict__ mask,
int mpitch,
ftl::cuda::GTAnalysisData *out,
ftl::rgbd::Camera cam,
float threshold,
float outmax
float t_min,
float t_max,
uchar4 colour_value
) {
__shared__ int sinvalid;
__shared__ int sbad;
__shared__ int svalid;
__shared__ int smissing;
__shared__ int smissing_masked;
__shared__ int smasked;
__shared__ int sgood;
__shared__ float serr;
__shared__ float serr_sq;
if (threadIdx.x == 0 && threadIdx.y == 0) {
sinvalid = 0;
sbad = 0;
svalid = 0;
smissing = 0;
smissing_masked = 0;
smasked = 0;
sgood = 0;
serr = 0.0f;
serr_sq = 0.0f;
}
__syncthreads();
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
int invalid = 0;
int bad = 0;
int valid = 0;
int missing = 0;
int missing_masked = 0;
int masked = 0;
int good = 0;
float err = 0.0f;
float err_sq = 0.0f;
const float numer = cam.baseline*cam.fx;
if (x < width) {
const float* __restrict__ gt_ptr = gt+x;
const float* __restrict__ d_ptr = depth+x;
const uchar* __restrict__ m_ptr = mask+x;
for (STRIDE_Y(y, height)) {
// TODO: Verify gt and depth pitch are same
float gtval = gt_ptr[y*dpitch];
float dval = d_ptr[y*dpitch];
const int tmasked = (gtval > cam.minDepth && gtval < cam.maxDepth) ? 0 : 1;
const int tinvalid = (tmasked == 0 && (dval <= cam.minDepth || dval >= cam.maxDepth)) ? 1 : 0;
const int tmasked = (m_ptr[y*mpitch] == 0) ? 0 : 1;
const int tinvalid = (dval <= cam.minDepth || dval >= cam.maxDepth) ? 1 : 0;
const int tgtinvalid = (gtval > cam.minDepth && gtval < cam.maxDepth) ? 0 : 1;
uchar4 c = make_uchar4((tinvalid==1)?255:0,0,0,255);
if (tinvalid == 0 && tgtinvalid == 0) {
// if there is valid value in both (gt and depth)
valid += 1;
// Convert both to disparity...
if (tinvalid == 0 && tmasked == 0) {
dval = (numer / dval);
gtval = (numer / gtval);
if (DISPARITY) {
dval = (numer / dval);
gtval = (numer / gtval);
}
const float e = fabsf(dval-gtval);
bad += (e >= threshold) ? 1 : 0;
err += e;
if (COLOUR) {
float nerr = min(1.0f, e / outmax);
c.z = min(255.0f, 255.0f * nerr);
if ((t_min < e) && (e <= t_max)) {
good += 1;
err += e;
err_sq += e*e;
if (VISUALISE) { colour[x+y*cpitch] = colour_value; }
}
}
else if (tinvalid == 0 && tmasked == 1 && tgtinvalid == 1) {
// masked and not missing (but no gt value)
if (VISUALISE) { colour[x+y*cpitch] = {192, 0, 192, 255}; } // magenta
}
else if (tinvalid == 1 && (tmasked == 1 || tgtinvalid == 1)) {
// missing and (masked or missing gt)
if (VISUALISE) { colour[x+y*cpitch] = {0, 0, 0, 255}; } // black
missing_masked += 1;
}
else if (tinvalid == 1) {
// missing value (not masked)
if (VISUALISE) { colour[x+y*cpitch] = {224, 32, 32, 255}; } // blue
missing += 1;
}
invalid += tinvalid;
masked += tmasked;
if (COLOUR) colour[x+y*cpitch] = c;
masked += (tmasked == 1 || tgtinvalid == 1) ? 1 : 0;
}
}
// Warp aggregate
#pragma unroll
for (int i = WARP_SIZE/2; i > 0; i /= 2) {
bad += __shfl_xor_sync(FULL_MASK, bad, i, WARP_SIZE);
invalid += __shfl_xor_sync(FULL_MASK, invalid, i, WARP_SIZE);
valid += __shfl_xor_sync(FULL_MASK, valid, i, WARP_SIZE);
missing += __shfl_xor_sync(FULL_MASK, missing, i, WARP_SIZE);
missing_masked += __shfl_xor_sync(FULL_MASK, missing_masked, i, WARP_SIZE);
masked += __shfl_xor_sync(FULL_MASK, masked, i, WARP_SIZE);
good += __shfl_xor_sync(FULL_MASK, good, i, WARP_SIZE);
err += __shfl_xor_sync(FULL_MASK, err, i, WARP_SIZE);
err_sq += __shfl_xor_sync(FULL_MASK, err_sq, i, WARP_SIZE);
}
// Block aggregate
if (threadIdx.x % WARP_SIZE == 0) {
atomicAdd(&serr, err);
atomicAdd(&sbad, bad);
atomicAdd(&sinvalid, invalid);
atomicAdd(&svalid, valid);
atomicAdd(&smissing, missing);
atomicAdd(&smissing_masked, missing_masked);
atomicAdd(&smasked, masked);
atomicAdd(&sgood, good);
atomicAdd(&serr, err);
atomicAdd(&serr_sq, err_sq);
}
__syncthreads();
// Global aggregate
if (threadIdx.x == 0 && threadIdx.y == 0) {
atomicAdd(&out->totalerror, serr);
atomicAdd(&out->bad, sbad);
atomicAdd(&out->invalid, sinvalid);
atomicAdd(&out->valid, svalid);
atomicAdd(&out->missing, smissing);
atomicAdd(&out->missing_masked, smissing_masked);
atomicAdd(&out->masked, smasked);
atomicAdd(&out->good, sgood);
atomicAdd(&out->err, serr);
atomicAdd(&out->err_sq, serr_sq);
}
}
......@@ -112,10 +148,13 @@ void ftl::cuda::gt_analysis(
ftl::cuda::TextureObject<uchar4> &colour,
ftl::cuda::TextureObject<float> &depth,
ftl::cuda::TextureObject<float> &gt,
ftl::cuda::TextureObject<uchar> &mask,
ftl::cuda::GTAnalysisData *out,
const ftl::rgbd::Camera &cam,
float threshold,
float outmax,
float t_min,
float t_max,
uchar4 colour_value,
bool use_disparity,
cudaStream_t stream
) {
static constexpr int THREADS_X = 128;
......@@ -126,21 +165,45 @@ void ftl::cuda::gt_analysis(
cudaMemsetAsync(out, 0, sizeof(ftl::cuda::GTAnalysisData), stream);
gt_anal_kernel<true><<<gridSize, blockSize, 0, stream>>>(
colour.devicePtr(),
colour.pixelPitch(),
colour.width(),
colour.height(),
depth.devicePtr(),
depth.pixelPitch(),
gt.devicePtr(),
gt.pixelPitch(),
out,
cam,
threshold,
outmax
);
cudaSafeCall( cudaGetLastError() );
if (use_disparity) {
gt_analysis_kernel<true, true><<<gridSize, blockSize, 0, stream>>>(
colour.devicePtr(),
colour.pixelPitch(),
colour.width(),
colour.height(),
depth.devicePtr(),
depth.pixelPitch(),
gt.devicePtr(),
gt.pixelPitch(),
mask.devicePtr(),
mask.pixelPitch(),
out,
cam,
t_min,
t_max,
colour_value
);
}
else {
gt_analysis_kernel<false, true><<<gridSize, blockSize, 0, stream>>>(
colour.devicePtr(),
colour.pixelPitch(),
colour.width(),
colour.height(),
depth.devicePtr(),
depth.pixelPitch(),
gt.devicePtr(),
gt.pixelPitch(),
mask.devicePtr(),
mask.pixelPitch(),
out,
cam,
t_min,
t_max,
colour_value
);
}
cudaSafeCall(cudaGetLastError());
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
......@@ -150,9 +213,12 @@ void ftl::cuda::gt_analysis(
void ftl::cuda::gt_analysis(
ftl::cuda::TextureObject<float> &depth,
ftl::cuda::TextureObject<float> &gt,
ftl::cuda::TextureObject<uchar> &mask,
ftl::cuda::GTAnalysisData *out,
const ftl::rgbd::Camera &cam,
float threshold,
float t_min,
float t_max,
bool use_disparity,
cudaStream_t stream
) {
static constexpr int THREADS_X = 128;
......@@ -163,20 +229,45 @@ void ftl::cuda::gt_analysis(
cudaMemsetAsync(out, 0, sizeof(ftl::cuda::GTAnalysisData), stream);
gt_anal_kernel<false><<<gridSize, blockSize, 0, stream>>>(
nullptr,
0,
depth.width(),
depth.height(),
depth.devicePtr(),
depth.pixelPitch(),
gt.devicePtr(),
gt.pixelPitch(),
out,
cam,
threshold,
1.0f
);
if (use_disparity) {
gt_analysis_kernel<true, false><<<gridSize, blockSize, 0, stream>>>(
nullptr,
0,
depth.width(),
depth.height(),
depth.devicePtr(),
depth.pixelPitch(),
gt.devicePtr(),
gt.pixelPitch(),
mask.devicePtr(),
mask.pixelPitch(),
out,
cam,
t_min,
t_max,
{0,0,0,0}
);
}
else {
gt_analysis_kernel<false, false><<<gridSize, blockSize, 0, stream>>>(
nullptr,
0,
depth.width(),
depth.height(),
depth.devicePtr(),
depth.pixelPitch(),
gt.devicePtr(),
gt.pixelPitch(),
mask.devicePtr(),
mask.pixelPitch(),
out,
cam,
t_min,
t_max,
{0,0,0,0}
);
}
cudaSafeCall( cudaGetLastError() );
#ifdef _DEBUG
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment