diff --git a/applications/calibration-multi/src/main.cpp b/applications/calibration-multi/src/main.cpp index dbea22842d4b0a70546187f48e161def746af207..7c92ea9eaffcf68f1af97c47e1255d95b5a533cc 100644 --- a/applications/calibration-multi/src/main.cpp +++ b/applications/calibration-multi/src/main.cpp @@ -395,37 +395,6 @@ void runCameraCalibration( ftl::Configurable* root, const size_t n_cameras = n_sources * 2; size_t reference_camera = 0; - { - auto camera = sources[0]->parameters(); - params.size = Size(camera.width, camera.height); - LOG(INFO) << "Camera resolution: " << params.size; - } - - params.idx_cameras.resize(n_cameras); - std::iota(params.idx_cameras.begin(), params.idx_cameras.end(), 0); - - // TODO: parameter for calibration target type - auto calib = MultiCameraCalibrationNew( n_cameras, reference_camera, - params.size, CalibrationTarget(0.250) - ); - - for (size_t i = 0; i < n_sources; i++) { - auto camera_r = sources[i]->parameters(Channel::Right); - auto camera_l = sources[i]->parameters(Channel::Left); - - CHECK(params.size == Size(camera_r.width, camera_r.height)); - CHECK(params.size == Size(camera_l.width, camera_l.height)); - - Mat K; - K = createCameraMatrix(camera_r); - LOG(INFO) << "K[" << 2 * i + 1 << "] = \n" << K; - calib.setCameraParameters(2 * i + 1, K); - - K = createCameraMatrix(camera_l); - LOG(INFO) << "K[" << 2 * i << "] = \n" << K; - calib.setCameraParameters(2 * i, K); - } - ftl::rgbd::Group group; for (Source* src : sources) { src->setChannel(Channel::Right); @@ -442,6 +411,8 @@ void runCameraCalibration( ftl::Configurable* root, mutex.lock(); bool good = true; for (size_t i = 0; i < frames.sources.size(); i ++) { + frames.frames[i].download(Channel::Left+Channel::Right); + if (frames.frames[i].get<cv::Mat>(Channel::Left).empty()) good = false; if (frames.frames[i].get<cv::Mat>(Channel::Right).empty()) good = false; if (frames.frames[i].get<cv::Mat>(Channel::Left).channels() != 3) good = false; // ASSERT @@ -455,6 +426,43 @@ void runCameraCalibration( ftl::Configurable* root, mutex.unlock(); return true; }); + + for (auto &source : sources) { + while (!source->isReady()) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + } + + { + auto camera = sources[0]->parameters(); + params.size = Size(camera.width, camera.height); + LOG(INFO) << "Camera resolution: " << params.size; + } + + params.idx_cameras.resize(n_cameras); + std::iota(params.idx_cameras.begin(), params.idx_cameras.end(), 0); + + // TODO: parameter for calibration target type + auto calib = MultiCameraCalibrationNew( n_cameras, reference_camera, + params.size, CalibrationTarget(0.250) + ); + + for (size_t i = 0; i < n_sources; i++) { + auto camera_r = sources[i]->parameters(Channel::Right); + auto camera_l = sources[i]->parameters(); + + CHECK(params.size == Size(camera_r.width, camera_r.height)); + CHECK(params.size == Size(camera_l.width, camera_l.height)); + + Mat K; + K = createCameraMatrix(camera_r); + LOG(INFO) << "K[" << 2 * i + 1 << "] = \n" << K; + calib.setCameraParameters(2 * i + 1, K); + + K = createCameraMatrix(camera_l); + LOG(INFO) << "K[" << 2 * i << "] = \n" << K; + calib.setCameraParameters(2 * i, K); + } int iter = 0; Mat show; diff --git a/applications/gui/src/camera.cpp b/applications/gui/src/camera.cpp index debb8e73d5cf6f1f1bc1d685bb7298350014614c..ae0d5627a94a84afb819a9b1daf62b6bddacccae 100644 --- a/applications/gui/src/camera.cpp +++ b/applications/gui/src/camera.cpp @@ -293,6 +293,7 @@ void ftl::gui::Camera::setChannel(Channel c) { case Channel::Flow: case Channel::Confidence: case Channel::Normals: + case Channel::ColourNormals: case Channel::Right: src_->setChannel(c); break; @@ -464,7 +465,7 @@ const GLTexture &ftl::gui::Camera::captureFrame() { break; //case Channel::Flow: - case Channel::Normals: + case Channel::ColourNormals: case Channel::Right: if (im2_.rows == 0 || im2_.type() != CV_8UC3) { break; } texture2_.update(im2_); diff --git a/applications/gui/src/media_panel.cpp b/applications/gui/src/media_panel.cpp index b3ea1a6afbbc8f2a31bf294e85a85564fdffe1cb..800940199e2488ca1b11bd723498059efd63d374 100644 --- a/applications/gui/src/media_panel.cpp +++ b/applications/gui/src/media_panel.cpp @@ -192,7 +192,7 @@ MediaPanel::MediaPanel(ftl::gui::Screen *screen) : nanogui::Window(screen, ""), button->setCallback([this]() { ftl::gui::Camera *cam = screen_->activeCamera(); if (cam) { - cam->setChannel(Channel::Normals); + cam->setChannel(Channel::ColourNormals); } }); diff --git a/applications/gui/src/screen.cpp b/applications/gui/src/screen.cpp index a9f4cc023265ba427a0be451ee3337fe834528bc..c359e227d9be39ab98df2cd8e816323b6a0ecbef 100644 --- a/applications/gui/src/screen.cpp +++ b/applications/gui/src/screen.cpp @@ -48,11 +48,13 @@ namespace { constexpr char const *const defaultImageViewFragmentShader = R"(#version 330 - uniform sampler2D image; + uniform sampler2D image1; + uniform sampler2D image2; + uniform float blendAmount; out vec4 color; in vec2 uv; void main() { - color = texture(image, uv); + color = blendAmount * texture(image1, uv) + (1.0 - blendAmount) * texture(image2, uv); })"; } @@ -70,6 +72,20 @@ ftl::gui::Screen::Screen(ftl::Configurable *proot, ftl::net::Universe *pnet, ftl has_vr_ = vr::VR_IsHmdPresent(); #endif + zoom_ = root_->value("zoom", 1.0f); + root_->on("zoom", [this](const ftl::config::Event &e) { + zoom_ = root_->value("zoom", 1.0f); + }); + + pos_x_ = root_->value("position_x", 0.0f); + root_->on("position_x", [this](const ftl::config::Event &e) { + pos_x_ = root_->value("position_x", 0.0f); + }); + pos_y_ = root_->value("position_y", 0.0f); + root_->on("position_y", [this](const ftl::config::Event &e) { + pos_y_ = root_->value("position_y", 0.0f); + }); + setSize(Vector2i(1280,720)); toolbuttheme = new Theme(*theme()); @@ -352,11 +368,27 @@ void ftl::gui::Screen::setActiveCamera(ftl::gui::Camera *cam) { } } +bool ftl::gui::Screen::scrollEvent(const Eigen::Vector2i &p, const Eigen::Vector2f &rel) { + if (nanogui::Screen::scrollEvent(p, rel)) { + return true; + } else { + zoom_ += zoom_ * 0.1f * rel[1]; + return true; + } +} + bool ftl::gui::Screen::mouseMotionEvent(const Eigen::Vector2i &p, const Eigen::Vector2i &rel, int button, int modifiers) { if (nanogui::Screen::mouseMotionEvent(p, rel, button, modifiers)) { return true; } else { - if (camera_) camera_->mouseMovement(rel[0], rel[1], button); + if (camera_) { + if (button == 1) { + camera_->mouseMovement(rel[0], rel[1], button); + } else if (button == 2) { + pos_x_ += rel[0]; + pos_y_ += -rel[1]; + } + } } return true; // TODO: return statement was missing; is true correct? } @@ -365,7 +397,10 @@ bool ftl::gui::Screen::mouseButtonEvent(const nanogui::Vector2i &p, int button, if (nanogui::Screen::mouseButtonEvent(p, button, down, modifiers)) { return true; } else { - if (camera_ && down) { + if (!camera_) return false; + + //ol movable = camera_->source()->hasCapabilities(ftl::rgbd::kCapMovable); + if (button == 0 && down) { Eigen::Vector2f screenSize = size().cast<float>(); auto mScale = (screenSize.cwiseQuotient(imageSize).minCoeff()); Eigen::Vector2f scaleFactor = mScale * imageSize.cwiseQuotient(screenSize); @@ -389,8 +424,10 @@ bool ftl::gui::Screen::mouseButtonEvent(const nanogui::Vector2i &p, int button, //lookPoint_ = Eigen::Vector3f(worldPos[0],worldPos[1],worldPos[2]); //LOG(INFO) << "Depth at click = " << -camPos[2]; return true; + } else { + } - return false; + return false; } } @@ -444,9 +481,9 @@ void ftl::gui::Screen::draw(NVGcontext *ctx) { #endif if (mImageID < std::numeric_limits<unsigned int>::max() && imageSize[0] > 0) { - auto mScale = (screenSize.cwiseQuotient(imageSize).minCoeff()); + auto mScale = (screenSize.cwiseQuotient(imageSize).minCoeff()) * zoom_; Vector2f scaleFactor = mScale * imageSize.cwiseQuotient(screenSize); - Vector2f positionInScreen(0.0f, 0.0f); + Vector2f positionInScreen(pos_x_, pos_y_); auto mOffset = (screenSize - (screenSize.cwiseProduct(scaleFactor))) / 2; Vector2f positionAfterOffset = positionInScreen + mOffset; Vector2f imagePosition = positionAfterOffset.cwiseQuotient(screenSize); @@ -457,8 +494,12 @@ void ftl::gui::Screen::draw(NVGcontext *ctx) { size().x() * r, size().y() * r);*/ mShader.bind(); glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, mImageID); - mShader.setUniform("image", 0); + glBindTexture(GL_TEXTURE_2D, leftEye_); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, (camera_->getRight().isValid()) ? rightEye_ : leftEye_); + mShader.setUniform("image1", 0); + mShader.setUniform("image2", 1); + mShader.setUniform("blendAmount", (camera_->getChannel() != ftl::codecs::Channel::Left) ? root_->value("blending", 0.5f) : 1.0f); mShader.setUniform("scaleFactor", scaleFactor); mShader.setUniform("position", imagePosition); mShader.drawIndexed(GL_TRIANGLES, 0, 2); diff --git a/applications/gui/src/screen.hpp b/applications/gui/src/screen.hpp index a0ac39d064875bdc5be81e526a95ac9269617c25..a195f4ccdd45500c534039709ba6762fdd640b22 100644 --- a/applications/gui/src/screen.hpp +++ b/applications/gui/src/screen.hpp @@ -29,6 +29,7 @@ class Screen : public nanogui::Screen { ~Screen(); bool mouseMotionEvent(const Eigen::Vector2i &p, const Eigen::Vector2i &rel, int button, int modifiers); + bool scrollEvent(const Eigen::Vector2i &p, const Eigen::Vector2f &rel); bool mouseButtonEvent(const nanogui::Vector2i &p, int button, bool down, int modifiers); bool keyboardEvent(int key, int scancode, int action, int modifiers); @@ -92,6 +93,9 @@ class Screen : public nanogui::Screen { ftl::Configurable *root_; std::string status_; ftl::gui::Camera *camera_; + float zoom_; + float pos_x_; + float pos_y_; GLuint leftEye_; GLuint rightEye_; diff --git a/applications/reconstruct/src/ilw/correspondence.cu b/applications/reconstruct/src/ilw/correspondence.cu index 8fb9d94b75df5a5d2687fa81da01d999f2bcfeeb..9fb440ae41bf6e1515d72759207154fa8224a588 100644 --- a/applications/reconstruct/src/ilw/correspondence.cu +++ b/applications/reconstruct/src/ilw/correspondence.cu @@ -65,7 +65,7 @@ __global__ void correspondence_energy_vector_kernel( const float3 world1 = pose1 * cam1.screenToCam(x,y,depth1); - const uchar4 colour1 = c1.tex2D(x, y); + const auto colour1 = c1.tex2D((float)x+0.5f, (float)y+0.5f); float bestdepth = 0.0f; float bestweight = 0.0f; @@ -91,17 +91,17 @@ __global__ void correspondence_energy_vector_kernel( // Calculate adjusted depth 3D point in camera 2 space const float3 worldPos = world1 + j * rayStep_world; //(pose1 * cam1.screenToCam(x, y, depth_adjust)); const float3 camPos = rayStart_2 + j * rayStep_2; //pose2 * worldPos; - const uint2 screen = cam2.camToScreen<uint2>(camPos); + const float2 screen = cam2.camToScreen<float2>(camPos); if (screen.x >= cam2.width || screen.y >= cam2.height) continue; // Generate a depth correspondence value - const float depth2 = d2.tex2D((int)screen.x, (int)screen.y); + const float depth2 = d2.tex2D(int(screen.x+0.5f), int(screen.y+0.5f)); const float dweight = ftl::cuda::weighting(fabs(depth2 - camPos.z), params.spatial_smooth); //const float dweight = ftl::cuda::weighting(fabs(depth_adjust - depth1), 2.0f*params.range); // Generate a colour correspondence value - const uchar4 colour2 = c2.tex2D((int)screen.x, (int)screen.y); + const auto colour2 = c2.tex2D(screen.x, screen.y); const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth); const float weight = weightFunction<FUNCTION>(params, dweight, cweight); diff --git a/applications/reconstruct/src/ilw/ilw.cpp b/applications/reconstruct/src/ilw/ilw.cpp index dfa43267ed5d3ac09309a466b150c0498fb72967..a686f35c337573a04415b0f13a598ec8a3538057 100644 --- a/applications/reconstruct/src/ilw/ilw.cpp +++ b/applications/reconstruct/src/ilw/ilw.cpp @@ -25,7 +25,7 @@ static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) { } ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) { - enabled_ = value("ilw_align", true); + enabled_ = value("ilw_align", false); iterations_ = value("iterations", 4); motion_rate_ = value("motion_rate", 0.8f); motion_window_ = value("motion_window", 3); @@ -42,7 +42,7 @@ ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) { }); on("ilw_align", [this](const ftl::config::Event &e) { - enabled_ = value("ilw_align", true); + enabled_ = value("ilw_align", false); }); on("iterations", [this](const ftl::config::Event &e) { @@ -215,11 +215,11 @@ bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { f.createTexture<float>(Channel::Depth2, Format<float>(f.get<GpuMat>(Channel::Colour).size())); f.createTexture<float>(Channel::Confidence, Format<float>(f.get<GpuMat>(Channel::Colour).size())); - f.createTexture<int>(Channel::Mask, Format<int>(f.get<GpuMat>(Channel::Colour).size())); + //f.createTexture<int>(Channel::Mask, Format<int>(f.get<GpuMat>(Channel::Colour).size())); f.createTexture<uchar4>(Channel::Colour); f.createTexture<float>(Channel::Depth); - f.get<GpuMat>(Channel::Mask).setTo(cv::Scalar(0)); + //f.get<GpuMat>(Channel::Mask).setTo(cv::Scalar(0)); } //cudaSafeCall(cudaStreamSynchronize(stream_)); @@ -232,7 +232,7 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream) { cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); // Find discontinuity mask - for (size_t i=0; i<fs.frames.size(); ++i) { + /*for (size_t i=0; i<fs.frames.size(); ++i) { auto &f = fs.frames[i]; auto s = fs.sources[i]; @@ -243,7 +243,7 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream) { discon_mask_, stream ); - } + }*/ // First do any preprocessing if (fill_depth_) { diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp index bcbe3e52e655e600ea6e654fbeddf6248221e3b4..0603dad4028d69223b0ce5ddc31d36eafc900c14 100644 --- a/applications/reconstruct/src/main.cpp +++ b/applications/reconstruct/src/main.cpp @@ -33,6 +33,12 @@ #include <ftl/operators/smoothing.hpp> #include <ftl/operators/colours.hpp> #include <ftl/operators/normals.hpp> +#include <ftl/operators/filling.hpp> +#include <ftl/operators/segmentation.hpp> +#include <ftl/operators/mask.hpp> +#include <ftl/operators/antialiasing.hpp> +#include <ftl/operators/mvmls.hpp> +#include <ftl/operators/clipping.hpp> #include <ftl/cuda/normals.hpp> #include <ftl/registration.hpp> @@ -62,15 +68,64 @@ using ftl::registration::loadTransformations; using ftl::registration::saveTransformations; static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) { - Eigen::Affine3d rx = - Eigen::Affine3d(Eigen::AngleAxisd(ax, Eigen::Vector3d(1, 0, 0))); - Eigen::Affine3d ry = - Eigen::Affine3d(Eigen::AngleAxisd(ay, Eigen::Vector3d(0, 1, 0))); - Eigen::Affine3d rz = - Eigen::Affine3d(Eigen::AngleAxisd(az, Eigen::Vector3d(0, 0, 1))); - return rz * rx * ry; + Eigen::Affine3d rx = + Eigen::Affine3d(Eigen::AngleAxisd(ax, Eigen::Vector3d(1, 0, 0))); + Eigen::Affine3d ry = + Eigen::Affine3d(Eigen::AngleAxisd(ay, Eigen::Vector3d(0, 1, 0))); + Eigen::Affine3d rz = + Eigen::Affine3d(Eigen::AngleAxisd(az, Eigen::Vector3d(0, 0, 1))); + return rz * rx * ry; } +// TODO: * Remove this class (requires more general solution). Also does not +// process disconnections/reconnections/types etc. correctly. +// * Update when new options become available. + +class ConfigProxy { + private: + vector<ftl::UUID> peers_; + vector<std::string> uris_; + ftl::net::Universe *net_; + + public: + ConfigProxy(ftl::net::Universe *net) { + net_ = net; + + auto response = net_->findAll<std::string>("node_details"); + for (auto &r : response) { + auto r_json = json_t::parse(r); + peers_.push_back(ftl::UUID(r_json["id"].get<std::string>())); + uris_.push_back(r_json["title"].get<std::string>()); + } + } + + void add(ftl::Configurable *root, const std::string &uri, const std::string &name) { + auto config = json_t::parse(net_->call<string>(peers_[0], "get_cfg", uris_[0] + "/" + uri)); + auto *proxy = ftl::create<ftl::Configurable>(root, name); + + try { + for (auto &itm : config.get<json::object_t>()) { + auto key = itm.first; + auto value = itm.second; + if (*key.begin() == '$') { continue; } + + proxy->set(key, value); + proxy->on(key, [this, uri, key, value, proxy](const ftl::config::Event&) { + for (size_t i = 0; i < uris_.size(); i++) { + // TODO: check that config exists? + auto peer = peers_[i]; + std::string name = uris_[i] + "/" + uri + "/" + key; + net_->send(peer, "update_cfg", name, proxy->getConfig()[key].dump()); + } + }); + } + } + catch (nlohmann::detail::type_error) { + LOG(ERROR) << "Failed to add config proxy for: " << uri << "/" << name; + } + } +}; + static void run(ftl::Configurable *root) { Universe *net = ftl::create<Universe>(root, "net"); ftl::ctrl::Slave slave(net, root); @@ -80,7 +135,7 @@ static void run(ftl::Configurable *root) { net->start(); net->waitConnections(); - + // Check paths for an FTL file to load... auto paths = (*root->get<nlohmann::json>("paths")); for (auto &x : paths.items()) { @@ -120,6 +175,17 @@ static void run(ftl::Configurable *root) { LOG(ERROR) << "No sources configured!"; return; } + + ConfigProxy *configproxy = nullptr; + if (net->numberOfPeers() > 0) { + configproxy = new ConfigProxy(net); // TODO delete + auto *disparity = ftl::create<ftl::Configurable>(root, "disparity"); + configproxy->add(disparity, "source/disparity/algorithm", "algorithm"); + configproxy->add(disparity, "source/disparity/bilateral_filter", "bilateral_filter"); + configproxy->add(disparity, "source/disparity/optflow_filter", "optflow_filter"); + configproxy->add(disparity, "source/disparity/mls", "mls"); + configproxy->add(disparity, "source/disparity/cross", "cross"); + } // Create scene transform, intended for axis aligning the walls and floor Eigen::Matrix4d transform; @@ -182,8 +248,12 @@ static void run(ftl::Configurable *root) { int o = root->value("origin_pose", 0) % sources.size(); virt->setPose(sources[o]->getPose()); + auto *renderpipe = ftl::config::create<ftl::operators::Graph>(root, "render_pipe"); + renderpipe->append<ftl::operators::ColourChannels>("colour"); // Generate interpolation texture... + renderpipe->append<ftl::operators::FXAA>("antialiasing"); + // Generate virtual camera render when requested by streamer - virt->onRender([splat,virt,&scene_B,align](ftl::rgbd::Frame &out) { + virt->onRender([splat,virt,&scene_B,align,renderpipe](ftl::rgbd::Frame &out) { //virt->setTimestamp(scene_B.timestamp); // Do we need to convert Lab to BGR? if (align->isLabColour()) { @@ -193,6 +263,7 @@ static void run(ftl::Configurable *root) { } } splat->render(virt, out); + renderpipe->apply(out, out, virt, 0); }); stream->add(virt); @@ -229,7 +300,7 @@ static void run(ftl::Configurable *root) { // TODO: Write pose+calibration+config packets auto sources = group->sources(); - for (int i=0; i<sources.size(); ++i) { + for (size_t i=0; i<sources.size(); ++i) { //writeSourceProperties(writer, i, sources[i]); sources[i]->inject(Channel::Calibration, sources[i]->parameters(), Channel::Left, sources[i]->getCapabilities()); sources[i]->inject(sources[i]->getPose()); @@ -251,11 +322,18 @@ static void run(ftl::Configurable *root) { // Create the source depth map pipeline auto *pipeline1 = ftl::config::create<ftl::operators::Graph>(root, "pre_filters"); + pipeline1->append<ftl::operators::ClipScene>("clipping"); pipeline1->append<ftl::operators::ColourChannels>("colour"); // Convert BGR to BGRA - pipeline1->append<ftl::operators::HFSmoother>("hfnoise"); // Remove high-frequency noise + //pipeline1->append<ftl::operators::HFSmoother>("hfnoise"); // Remove high-frequency noise pipeline1->append<ftl::operators::Normals>("normals"); // Estimate surface normals - pipeline1->append<ftl::operators::SmoothChannel>("smoothing"); // Generate a smoothing channel - pipeline1->append<ftl::operators::AdaptiveMLS>("mls"); // Perform MLS (using smoothing channel) + //pipeline1->append<ftl::operators::SmoothChannel>("smoothing"); // Generate a smoothing channel + //pipeline1->append<ftl::operators::ScanFieldFill>("filling"); // Generate a smoothing channel + pipeline1->append<ftl::operators::CrossSupport>("cross"); + pipeline1->append<ftl::operators::DiscontinuityMask>("discontinuity"); + pipeline1->append<ftl::operators::CullDiscontinuity>("remove_discontinuity"); + //pipeline1->append<ftl::operators::AggreMLS>("mls"); // Perform MLS (using smoothing channel) + pipeline1->append<ftl::operators::VisCrossSupport>("viscross")->set("enabled", false); + pipeline1->append<ftl::operators::MultiViewMLS>("mvmls"); // Alignment diff --git a/applications/vision/src/main.cpp b/applications/vision/src/main.cpp index ba07e90424db91c5cc36e18d0aa40a68109bddbd..f99754cbf558bb3d0f53fde37763855fac7af128 100644 --- a/applications/vision/src/main.cpp +++ b/applications/vision/src/main.cpp @@ -48,32 +48,6 @@ using std::chrono::milliseconds; using cv::Mat; using json = nlohmann::json; -namespace ftl { -void disparityToDepth(const cv::Mat &disparity, cv::Mat &depth, const cv::Mat &q) { - cv::Matx44d _Q; - q.convertTo(_Q, CV_64F); - - if (depth.empty()) depth = cv::Mat(disparity.size(), CV_32F); - - for( int y = 0; y < disparity.rows; y++ ) { - const float *sptr = disparity.ptr<float>(y); - float *dptr = depth.ptr<float>(y); - - for( int x = 0; x < disparity.cols; x++ ) { - double d = sptr[x]; - cv::Vec4d homg_pt = _Q*cv::Vec4d(x, y, d, 1.0); - //dptr[x] = Vec3d(homg_pt.val); - //dptr[x] /= homg_pt[3]; - dptr[x] = (homg_pt[2] / homg_pt[3]); // Depth in meters - - if( fabs(d) <= FLT_EPSILON ) - dptr[x] = 1000.0f; - } - } -} -}; - - static void run(ftl::Configurable *root) { Universe *net = ftl::create<Universe>(root, "net"); ftl::ctrl::Slave slave(net, root); @@ -107,6 +81,8 @@ static void run(ftl::Configurable *root) { stream->run(true); //} + ftl::timer::start(true); + LOG(INFO) << "Stopping..."; slave.stop(); stream->stop(); diff --git a/components/codecs/include/ftl/codecs/channels.hpp b/components/codecs/include/ftl/codecs/channels.hpp index 85cd9fffc45ddcda1b6cc13a160c74c30797b804..6673275fe7b4f874b0807c48e690e24b4aaddb51 100644 --- a/components/codecs/include/ftl/codecs/channels.hpp +++ b/components/codecs/include/ftl/codecs/channels.hpp @@ -19,7 +19,7 @@ enum struct Channel : int { Deviation = 4, Screen = 4, Normals = 5, // 32FC4 - Points = 6, // 32FC4 + Points = 6, // 32FC4 (should be deprecated) Confidence = 7, // 32F Contribution = 7, // 32F EnergyVector = 8, // 32FC4 @@ -28,9 +28,10 @@ enum struct Channel : int { Energy = 10, // 32F Mask = 11, // 32U Density = 12, // 32F - LeftGray = 13, // Deprecated - RightGray = 14, // Deprecated - Overlay1 = 15, // Unused + Support1 = 13, // 8UC4 (currently) + Support2 = 14, // 8UC4 (currently) + Segmentation = 15, // 32S? + ColourNormals = 16, // 8UC4 AudioLeft = 32, AudioRight = 33, diff --git a/components/codecs/src/channels.cpp b/components/codecs/src/channels.cpp index 25b06b533b5c95d16bd60365ccbcc4373e09bd45..effaf9eb261dfe460f2d6f5bf6f08849e4f93538 100644 --- a/components/codecs/src/channels.cpp +++ b/components/codecs/src/channels.cpp @@ -21,9 +21,9 @@ static ChannelInfo info[] = { "Energy", CV_32F, "Mask", CV_32S, "Density", CV_32F, - "LeftGray", CV_8U, - "RightGray", CV_8U, - "Overlay1", CV_8UC3, + "Support1", CV_8UC4, + "Support2", CV_8UC4, + "Segmentation", CV_32S, "NoName", 0, "NoName", 0, diff --git a/components/common/cpp/include/ftl/configurable.hpp b/components/common/cpp/include/ftl/configurable.hpp index 244b548285b92902e645436891b04acb182ffd08..219bd171edf4b7997cc35f97fa7787a0c4bb4559 100644 --- a/components/common/cpp/include/ftl/configurable.hpp +++ b/components/common/cpp/include/ftl/configurable.hpp @@ -104,7 +104,7 @@ class Configurable { protected: nlohmann::json *config_; - virtual void inject(const std::string name, nlohmann::json &value) {} + virtual void inject(const std::string &name, nlohmann::json &value) {} private: std::map<std::string, std::list<std::function<void(const config::Event&)>>> observers_; diff --git a/components/common/cpp/include/ftl/cuda_common.hpp b/components/common/cpp/include/ftl/cuda_common.hpp index 116e26ec74ff4b469e8d1214cafa2897264f80ad..1b4f3a47afb000a9d1e05d264a5dd79a9d1c3e4a 100644 --- a/components/common/cpp/include/ftl/cuda_common.hpp +++ b/components/common/cpp/include/ftl/cuda_common.hpp @@ -28,6 +28,22 @@ bool hasCompute(int major, int minor); int deviceCount(); +template <typename T> +struct Float; + +template <> struct Float<float> { typedef float type; }; +template <> struct Float<int> { typedef float type; }; +template <> struct Float<float4> { typedef float4 type; }; +template <> struct Float<uchar4> { typedef float4 type; }; +template <> struct Float<short2> { typedef float2 type; }; + +template <typename T> +struct ScaleValue; + +template <> struct ScaleValue<uchar4> { static constexpr float value = 255.0f; }; +template <> struct ScaleValue<float> { static constexpr float value = 1.0f; }; +template <> struct ScaleValue<float4> { static constexpr float value = 1.0f; }; + /** * Represent a CUDA texture object. Instances of this class can be used on both * host and device. A texture object base cannot be constructed directly, it @@ -89,7 +105,7 @@ class TextureObject : public TextureObjectBase { static_assert((16u % sizeof(T)) == 0, "Channel format must be aligned with 16 bytes"); __host__ __device__ TextureObject() : TextureObjectBase() {}; - explicit TextureObject(const cv::cuda::GpuMat &d); + explicit TextureObject(const cv::cuda::GpuMat &d, bool interpolated=false); explicit TextureObject(const cv::cuda::PtrStepSz<T> &d); TextureObject(T *ptr, int pitch, int width, int height); TextureObject(size_t width, size_t height); @@ -110,7 +126,8 @@ class TextureObject : public TextureObjectBase { #ifdef __CUDACC__ __device__ inline T tex2D(int u, int v) const { return ::tex2D<T>(texobj_, u, v); } - __device__ inline T tex2D(float u, float v) const { return ::tex2D<T>(texobj_, u, v); } + __device__ inline T tex2D(unsigned int u, unsigned int v) const { return ::tex2D<T>(texobj_, (int)u, (int)v); } + __device__ inline typename Float<T>::type tex2D(float u, float v) const { return ::tex2D<typename Float<T>::type>(texobj_, u, v) * ScaleValue<T>::value; } #endif __host__ __device__ inline const T &operator()(int u, int v) const { return reinterpret_cast<T*>(ptr_)[u+v*pitch2_]; } @@ -137,7 +154,7 @@ TextureObject<T> &TextureObject<T>::cast(TextureObjectBase &b) { * Create a 2D array texture from an OpenCV GpuMat object. */ template <typename T> -TextureObject<T>::TextureObject(const cv::cuda::GpuMat &d) { +TextureObject<T>::TextureObject(const cv::cuda::GpuMat &d, bool interpolated) { // GpuMat must have correct data type CHECK(d.type() == ftl::traits::OpenCVType<T>::value); @@ -153,7 +170,8 @@ TextureObject<T>::TextureObject(const cv::cuda::GpuMat &d) { cudaTextureDesc texDesc; // cppcheck-suppress memsetClassFloat memset(&texDesc, 0, sizeof(texDesc)); - texDesc.readMode = cudaReadModeElementType; + texDesc.readMode = (interpolated) ? cudaReadModeNormalizedFloat : cudaReadModeElementType; + if (interpolated) texDesc.filterMode = cudaFilterModeLinear; cudaTextureObject_t tex = 0; cudaSafeCall(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL)); @@ -165,7 +183,7 @@ TextureObject<T>::TextureObject(const cv::cuda::GpuMat &d) { height_ = d.rows; needsfree_ = false; cvType_ = ftl::traits::OpenCVType<T>::value; - //needsdestroy_ = true; + needsdestroy_ = true; } #endif // __CUDACC__ @@ -188,6 +206,7 @@ TextureObject<T>::TextureObject(const cv::cuda::PtrStepSz<T> &d) { // cppcheck-suppress memsetClassFloat memset(&texDesc, 0, sizeof(texDesc)); texDesc.readMode = cudaReadModeElementType; + //if (std::is_same<T,uchar4>::value) texDesc.filterMode = cudaFilterModeLinear; cudaTextureObject_t tex = 0; cudaSafeCall(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL)); @@ -199,7 +218,7 @@ TextureObject<T>::TextureObject(const cv::cuda::PtrStepSz<T> &d) { height_ = d.rows; needsfree_ = false; cvType_ = ftl::traits::OpenCVType<T>::value; - //needsdestroy_ = true; + needsdestroy_ = true; } /** @@ -221,9 +240,10 @@ TextureObject<T>::TextureObject(T *ptr, int pitch, int width, int height) { // cppcheck-suppress memsetClassFloat memset(&texDesc, 0, sizeof(texDesc)); texDesc.readMode = cudaReadModeElementType; + //if (std::is_same<T,uchar4>::value) texDesc.filterMode = cudaFilterModeLinear; cudaTextureObject_t tex = 0; - cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL); + cudaSafeCall(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL)); texobj_ = tex; pitch_ = pitch; pitch2_ = pitch_ / sizeof(T); @@ -232,7 +252,7 @@ TextureObject<T>::TextureObject(T *ptr, int pitch, int width, int height) { height_ = height; needsfree_ = false; cvType_ = ftl::traits::OpenCVType<T>::value; - //needsdestroy_ = true; + needsdestroy_ = true; } template <typename T> @@ -255,7 +275,8 @@ TextureObject<T>::TextureObject(size_t width, size_t height) { // cppcheck-suppress memsetClassFloat memset(&texDesc, 0, sizeof(texDesc)); texDesc.readMode = cudaReadModeElementType; - cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL); + //if (std::is_same<T,uchar4>::value) texDesc.filterMode = cudaFilterModeLinear; + cudaSafeCall(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL)); //} texobj_ = tex; @@ -264,7 +285,7 @@ TextureObject<T>::TextureObject(size_t width, size_t height) { needsfree_ = true; pitch2_ = pitch_ / sizeof(T); cvType_ = ftl::traits::OpenCVType<T>::value; - //needsdestroy_ = true; + needsdestroy_ = true; } #ifndef __CUDACC__ @@ -291,6 +312,7 @@ TextureObject<T>::TextureObject(const TextureObject<T> &p) { pitch2_ = pitch_ / sizeof(T); cvType_ = ftl::traits::OpenCVType<T>::value; needsfree_ = false; + needsdestroy_ = false; } template <typename T> @@ -302,14 +324,17 @@ TextureObject<T>::TextureObject(TextureObject<T> &&p) { pitch_ = p.pitch_; pitch2_ = pitch_ / sizeof(T); needsfree_ = p.needsfree_; + needsdestroy_ = p.needsdestroy_; p.texobj_ = 0; p.needsfree_ = false; + p.needsdestroy_ = false; p.ptr_ = nullptr; cvType_ = ftl::traits::OpenCVType<T>::value; } template <typename T> TextureObject<T> &TextureObject<T>::operator=(const TextureObject<T> &p) { + free(); texobj_ = p.texobj_; ptr_ = p.ptr_; width_ = p.width_; @@ -318,11 +343,13 @@ TextureObject<T> &TextureObject<T>::operator=(const TextureObject<T> &p) { pitch2_ = pitch_ / sizeof(T); cvType_ = ftl::traits::OpenCVType<T>::value; needsfree_ = false; + needsdestroy_ = false; return *this; } template <typename T> TextureObject<T> &TextureObject<T>::operator=(TextureObject<T> &&p) { + free(); texobj_ = p.texobj_; ptr_ = p.ptr_; width_ = p.width_; @@ -330,8 +357,10 @@ TextureObject<T> &TextureObject<T>::operator=(TextureObject<T> &&p) { pitch_ = p.pitch_; pitch2_ = pitch_ / sizeof(T); needsfree_ = p.needsfree_; + needsdestroy_ = p.needsdestroy_; p.texobj_ = 0; p.needsfree_ = false; + p.needsdestroy_ = false; p.ptr_ = nullptr; cvType_ = ftl::traits::OpenCVType<T>::value; return *this; diff --git a/components/common/cpp/src/configuration.cpp b/components/common/cpp/src/configuration.cpp index bd9d9feec20538cebc210cafba72a98f651b243c..89d539cbec5270b7d7a5bc57a0a113eb57456553 100644 --- a/components/common/cpp/src/configuration.cpp +++ b/components/common/cpp/src/configuration.cpp @@ -290,7 +290,8 @@ json_t &ftl::config::resolve(const std::string &puri, bool eager) { std::string u = uri.getBaseURI(); auto ix = config_index.find(u); if (ix == config_index.end()) { - LOG(FATAL) << "Cannot find resource: " << u; + LOG(WARNING) << "Cannot find resource: " << u; + return null_json; } auto ptr = nlohmann::json::json_pointer("/"+uri.getFragment()); diff --git a/components/common/cpp/src/timer.cpp b/components/common/cpp/src/timer.cpp index 328006ab82041d9613ab9ef7847d74727e3aa7f2..f13255e7359747406feb5c3d7da3b1ba779c617f 100644 --- a/components/common/cpp/src/timer.cpp +++ b/components/common/cpp/src/timer.cpp @@ -169,6 +169,8 @@ namespace ftl { extern bool running; } +//static MUTEX mtx; + void ftl::timer::start(bool block) { if (active) return; active = true; diff --git a/components/net/cpp/include/ftl/net_configurable.hpp b/components/net/cpp/include/ftl/net_configurable.hpp index 3a31ecc27675f2e37b72bc42ee83da0dee5b7e4c..bdd21c4700e7664cff996672585054c30b6e8e6a 100644 --- a/components/net/cpp/include/ftl/net_configurable.hpp +++ b/components/net/cpp/include/ftl/net_configurable.hpp @@ -13,7 +13,7 @@ namespace ftl { ~NetConfigurable(); protected: - void inject(const std::string name, nlohmann::json &value); + void inject(const std::string &name, nlohmann::json &value); private: ftl::UUID peer; diff --git a/components/net/cpp/src/net_configurable.cpp b/components/net/cpp/src/net_configurable.cpp index edcec65b8c4ba452472e30efd29b672b7a6e2c55..be98cf7edbb221c61e9732a9e59d8546f1185644 100644 --- a/components/net/cpp/src/net_configurable.cpp +++ b/components/net/cpp/src/net_configurable.cpp @@ -7,6 +7,6 @@ ftl::NetConfigurable::NetConfigurable(ftl::UUID peer, const std::string &suri, f ftl::NetConfigurable::~NetConfigurable(){} -void ftl::NetConfigurable::inject(const std::string name, nlohmann::json &value) { +void ftl::NetConfigurable::inject(const std::string &name, nlohmann::json &value) { ctrl.set(peer, suri + std::string("/") + name, value); } diff --git a/components/operators/CMakeLists.txt b/components/operators/CMakeLists.txt index c93a87f6693334c30e28039387db45554ebdb0d2..b025a39b357a628ee84d2f6d3d7b82c8aa6fcc3e 100644 --- a/components/operators/CMakeLists.txt +++ b/components/operators/CMakeLists.txt @@ -1,13 +1,41 @@ -add_library(ftloperators - src/smoothing.cpp +set(OPERSRC + src/smoothing.cpp src/smoothing.cu src/mls.cu src/smoothchan.cu src/operator.cpp src/colours.cpp src/normals.cpp + src/filling.cpp + src/filling.cu + src/disparity/disp2depth.cu + src/disparity/disparity_to_depth.cpp + src/disparity/bilateral_filter.cpp + src/segmentation.cu + src/segmentation.cpp + src/mask.cu + src/mask.cpp + src/antialiasing.cpp + src/antialiasing.cu + src/mvmls.cpp + src/correspondence.cu + src/clipping.cpp ) + +if (LIBSGM_FOUND) + list(APPEND OPERSRC src/disparity/fixstars_sgm.cpp) +endif (LIBSGM_FOUND) + +if (HAVE_OPTFLOW) + list(APPEND OPERSRC + src/nvopticalflow.cpp + src/disparity/optflow_smoothing.cu + src/disparity/optflow_smoothing.cpp) +endif() + +add_library(ftloperators ${OPERSRC}) + # These cause errors in CI build and are being removed from PCL in newer versions # target_compile_options(ftlrender PUBLIC ${PCL_DEFINITIONS}) diff --git a/components/operators/include/ftl/operators/antialiasing.hpp b/components/operators/include/ftl/operators/antialiasing.hpp new file mode 100644 index 0000000000000000000000000000000000000000..302631253f0d1aab6e758cf48c0af213bcf3f002 --- /dev/null +++ b/components/operators/include/ftl/operators/antialiasing.hpp @@ -0,0 +1,27 @@ +#ifndef _FTL_OPERATORS_ANTIALIASING_HPP_ +#define _FTL_OPERATORS_ANTIALIASING_HPP_ + +#include <ftl/operators/operator.hpp> +#include <ftl/cuda_common.hpp> + +namespace ftl { +namespace operators { + +/** + * Fast Approximate Anti-Aliasing by NVIDIA (2010) + */ +class FXAA : public ftl::operators::Operator { + public: + explicit FXAA(ftl::Configurable*); + ~FXAA(); + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + +}; + +} +} + +#endif // _FTL_OPERATORS_ANTIALIASING_HPP_ diff --git a/components/operators/include/ftl/operators/clipping.hpp b/components/operators/include/ftl/operators/clipping.hpp new file mode 100644 index 0000000000000000000000000000000000000000..590e714c9eeca9713e56139aa874f7c0db0a472d --- /dev/null +++ b/components/operators/include/ftl/operators/clipping.hpp @@ -0,0 +1,27 @@ +#ifndef _FTL_OPERATORS_CLIPPING_HPP_ +#define _FTL_OPERATORS_CLIPPING_HPP_ + +#include <ftl/operators/operator.hpp> +#include <ftl/cuda_common.hpp> + +namespace ftl { +namespace operators { + +/** + * Calculate rough normals from local depth gradients. + */ +class ClipScene : public ftl::operators::Operator { + public: + explicit ClipScene(ftl::Configurable*); + ~ClipScene(); + + inline Operator::Type type() const override { return Operator::Type::ManyToMany; } + + bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) override; + +}; + +} +} + +#endif // _FTL_OPERATORS_CLIPPING_HPP_ diff --git a/components/operators/include/ftl/operators/disparity.hpp b/components/operators/include/ftl/operators/disparity.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0c26c904fbfa8e2dce0d209347b612f6bb719b63 --- /dev/null +++ b/components/operators/include/ftl/operators/disparity.hpp @@ -0,0 +1,105 @@ +#pragma once + +#include <ftl/config.h> +#include <ftl/operators/operator.hpp> + +#ifdef HAVE_OPTFLOW +#include <opencv2/cudaoptflow.hpp> +#endif + +#ifdef HAVE_LIBSGM +#include <libsgm.h> +#endif + +namespace ftl { +namespace operators { + +#ifdef HAVE_LIBSGM +/* + * FixstarsSGM https://github.com/fixstars/libSGM + * + * Requires modified version https://gitlab.utu.fi/joseha/libsgm + */ +class FixstarsSGM : public ftl::operators::Operator { + public: + explicit FixstarsSGM(ftl::Configurable* cfg); + + ~FixstarsSGM(); + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + + private: + bool init(); + bool updateParameters(); + + sgm::StereoSGM *ssgm_; + cv::Size size_; + cv::cuda::GpuMat lbw_; + cv::cuda::GpuMat rbw_; + cv::cuda::GpuMat disp_int_; + + int P1_; + int P2_; + int max_disp_; + float uniqueness_; +}; +#endif + +class DisparityBilateralFilter : public::ftl::operators::Operator { + public: + explicit DisparityBilateralFilter(ftl::Configurable*); + + ~DisparityBilateralFilter() {}; + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + + private: + cv::Ptr<cv::cuda::DisparityBilateralFilter> filter_; + cv::cuda::GpuMat disp_int_; + cv::cuda::GpuMat disp_int_result_; + double scale_; + int radius_; + int iter_; + int n_disp_; +}; + +/* + * Calculate depth from disparity + */ +class DisparityToDepth : public ftl::operators::Operator { + public: + explicit DisparityToDepth(ftl::Configurable* cfg) : + ftl::operators::Operator(cfg) {} + + ~DisparityToDepth() {}; + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; +}; + +/* + * Optical flow smoothing for depth + */ +#ifdef HAVE_OPTFLOW +class OpticalFlowTemporalSmoothing : public ftl::operators::Operator { + public: + explicit OpticalFlowTemporalSmoothing(ftl::Configurable*); + ~OpticalFlowTemporalSmoothing(); + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + + private: + bool init(); + + const ftl::codecs::Channel channel_ = ftl::codecs::Channel::Disparity; + cv::cuda::GpuMat history_; + cv::Size size_; + int n_max_; + float threshold_; +}; +#endif + +} +} diff --git a/components/operators/include/ftl/operators/filling.hpp b/components/operators/include/ftl/operators/filling.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2825dde5f6840658d7c49dd374792e80952e9391 --- /dev/null +++ b/components/operators/include/ftl/operators/filling.hpp @@ -0,0 +1,38 @@ +#ifndef _FTL_OPERATORS_FILLING_HPP_ +#define _FTL_OPERATORS_FILLING_HPP_ + +#include <ftl/operators/operator.hpp> +#include <ftl/cuda_common.hpp> + +namespace ftl { +namespace operators { + +/** + * Use colour distance field to scanline correct depth edges and fill holes. + */ +class ScanFieldFill : public ftl::operators::Operator { + public: + explicit ScanFieldFill(ftl::Configurable*); + ~ScanFieldFill(); + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + +}; + +class CrossSupportFill : public ftl::operators::Operator { + public: + explicit CrossSupportFill(ftl::Configurable*); + ~CrossSupportFill(); + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + +}; + +} +} + +#endif // _FTL_OPERATORS_FILLING_HPP_ diff --git a/components/operators/include/ftl/operators/mask.hpp b/components/operators/include/ftl/operators/mask.hpp new file mode 100644 index 0000000000000000000000000000000000000000..579b1f6fe83daf7402041fe139116abe09ee3234 --- /dev/null +++ b/components/operators/include/ftl/operators/mask.hpp @@ -0,0 +1,43 @@ +#ifndef _FTL_OPERATORS_MASK_HPP_ +#define _FTL_OPERATORS_MASK_HPP_ + +#include <ftl/operators/operator.hpp> +#include <ftl/cuda_common.hpp> + +namespace ftl { +namespace operators { + +/** + * Generate a masking channel that indicates depth discontinuities within a + * specified radius from a pixel. This is useful for culling potentially bad + * depth and colour values when merging and smoothing. + */ +class DiscontinuityMask : public ftl::operators::Operator { + public: + explicit DiscontinuityMask(ftl::Configurable*); + ~DiscontinuityMask(); + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + +}; + +/** + * Remove depth values marked with the discontinuity mask. + */ +class CullDiscontinuity : public ftl::operators::Operator { + public: + explicit CullDiscontinuity(ftl::Configurable*); + ~CullDiscontinuity(); + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + +}; + +} +} + +#endif // _FTL_OPERATORS_MASK_HPP_ diff --git a/components/operators/include/ftl/operators/mvmls.hpp b/components/operators/include/ftl/operators/mvmls.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6b8eff1be304693cfe7dee96152f9aa4cd3452b4 --- /dev/null +++ b/components/operators/include/ftl/operators/mvmls.hpp @@ -0,0 +1,28 @@ +#ifndef _FTL_OPERATORS_MVMLS_HPP_ +#define _FTL_OPERATORS_MVMLS_HPP_ + +#include <ftl/operators/operator.hpp> + +namespace ftl { +namespace operators { + +class MultiViewMLS : public ftl::operators::Operator { + public: + explicit MultiViewMLS(ftl::Configurable*); + ~MultiViewMLS(); + + inline Operator::Type type() const override { return Operator::Type::ManyToMany; } + + bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) override; + + private: + std::vector<ftl::cuda::TextureObject<float4>> centroid_horiz_; + std::vector<ftl::cuda::TextureObject<float4>> centroid_vert_; + std::vector<ftl::cuda::TextureObject<float4>> normals_horiz_; + std::vector<ftl::cuda::TextureObject<float>> contributions_; +}; + +} +} + +#endif diff --git a/components/operators/include/ftl/operators/operator.hpp b/components/operators/include/ftl/operators/operator.hpp index fe9b52f2c6b7d6d3364c1d1722f70bd691602170..bc47afef9a976d21e1cb934a4d491a1575f7022d 100644 --- a/components/operators/include/ftl/operators/operator.hpp +++ b/components/operators/include/ftl/operators/operator.hpp @@ -23,7 +23,7 @@ namespace operators { class Operator { public: explicit Operator(ftl::Configurable *cfg); - virtual ~Operator(); + virtual ~Operator(); enum class Type { OneToOne, // Frame to Frame (Filter or generator) @@ -56,7 +56,7 @@ class Operator { namespace detail { struct ConstructionHelperBase { - ConstructionHelperBase(ftl::Configurable *cfg) : config(cfg) {} + explicit ConstructionHelperBase(ftl::Configurable *cfg) : config(cfg) {} virtual ~ConstructionHelperBase() {} virtual ftl::operators::Operator *make()=0; @@ -65,7 +65,7 @@ struct ConstructionHelperBase { template <typename T> struct ConstructionHelper : public ConstructionHelperBase { - ConstructionHelper(ftl::Configurable *cfg) : ConstructionHelperBase(cfg) {} + explicit ConstructionHelper(ftl::Configurable *cfg) : ConstructionHelperBase(cfg) {} ~ConstructionHelper() {} ftl::operators::Operator *make() override { return new T(config); @@ -87,7 +87,7 @@ struct OperatorNode { class Graph : public ftl::Configurable { public: explicit Graph(nlohmann::json &config); - ~Graph(); + ~Graph(); template <typename T> ftl::Configurable *append(const std::string &name); @@ -99,6 +99,7 @@ class Graph : public ftl::Configurable { private: std::list<ftl::operators::detail::OperatorNode> operators_; std::map<std::string, ftl::Configurable*> configs_; + cudaStream_t stream_; ftl::Configurable *_append(ftl::operators::detail::ConstructionHelperBase*); }; diff --git a/components/operators/include/ftl/operators/opticalflow.hpp b/components/operators/include/ftl/operators/opticalflow.hpp new file mode 100644 index 0000000000000000000000000000000000000000..81afc3914b3a3d31975deca14383f319588209d5 --- /dev/null +++ b/components/operators/include/ftl/operators/opticalflow.hpp @@ -0,0 +1,38 @@ +#pragma once + +#include <ftl/operators/operator.hpp> +#include <opencv2/cudaoptflow.hpp> + +namespace ftl { +namespace operators { + +/* + * Compute Optical flow from Channel::Colour (Left) and save the result in + * Channel::Flow using NVidia Optical Flow 1.0 (via OpenCV wrapper). + */ +class NVOpticalFlow : public ftl::operators::Operator { + public: + explicit NVOpticalFlow(ftl::Configurable*); + ~NVOpticalFlow(); + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + + protected: + bool init(); + + private: + cv::Size size_; + + // TODO: Colour to Flow always assumed, could also calculate something else? + const ftl::codecs::Channel channel_in_ = ftl::codecs::Channel::Colour; + const ftl::codecs::Channel channel_out_ = ftl::codecs::Channel::Flow; + + cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> nvof_; + cv::cuda::GpuMat left_gray_; + cv::cuda::GpuMat left_gray_prev_; +}; + +} +} diff --git a/components/operators/include/ftl/operators/segmentation.hpp b/components/operators/include/ftl/operators/segmentation.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f905ce96966d0f68171ee94d209b6d888581005a --- /dev/null +++ b/components/operators/include/ftl/operators/segmentation.hpp @@ -0,0 +1,40 @@ +#ifndef _FTL_OPERATORS_SEGMENTATION_HPP_ +#define _FTL_OPERATORS_SEGMENTATION_HPP_ + +#include <ftl/operators/operator.hpp> + +namespace ftl { +namespace operators { + +/** + * Generate the cross support regions channel. + */ +class CrossSupport : public ftl::operators::Operator { + public: + explicit CrossSupport(ftl::Configurable*); + ~CrossSupport(); + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + +}; + +/** + * Visualise the cross support regions channel. + */ +class VisCrossSupport : public ftl::operators::Operator { + public: + explicit VisCrossSupport(ftl::Configurable*); + ~VisCrossSupport(); + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + +}; + +} +} + +#endif // _FTL_OPERATORS_SEGMENTATION_HPP_ diff --git a/components/operators/include/ftl/operators/smoothing.hpp b/components/operators/include/ftl/operators/smoothing.hpp index b5362ea6f1a6c16de81cdb7c55339058476da09d..6e0c6ff75e382f8a54d99587f545195c70d59e6b 100644 --- a/components/operators/include/ftl/operators/smoothing.hpp +++ b/components/operators/include/ftl/operators/smoothing.hpp @@ -13,17 +13,17 @@ namespace operators { * first derivative. Only the depth channel is used and modified. */ class HFSmoother : public ftl::operators::Operator { - public: - explicit HFSmoother(ftl::Configurable*); - ~HFSmoother(); + public: + explicit HFSmoother(ftl::Configurable*); + ~HFSmoother(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } - bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; - private: - cv::cuda::GpuMat temp_; - ftl::rgbd::Frame frames_[4]; + private: + cv::cuda::GpuMat temp_; + ftl::rgbd::Frame frames_[4]; }; /** @@ -34,13 +34,13 @@ class HFSmoother : public ftl::operators::Operator { * no smoothing. */ class SmoothChannel : public ftl::operators::Operator { - public: - explicit SmoothChannel(ftl::Configurable*); - ~SmoothChannel(); + public: + explicit SmoothChannel(ftl::Configurable*); + ~SmoothChannel(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } - bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; private: ftl::rgbd::Frame temp_[6]; @@ -52,15 +52,15 @@ class SmoothChannel : public ftl::operators::Operator { * Also outputs Depth + Normals. */ class SimpleMLS : public ftl::operators::Operator { - public: - explicit SimpleMLS(ftl::Configurable*); - ~SimpleMLS(); + public: + explicit SimpleMLS(ftl::Configurable*); + ~SimpleMLS(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } - bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; - private: + private: }; /** @@ -68,15 +68,60 @@ class SimpleMLS : public ftl::operators::Operator { * by a simple colour similarity weighting. In practice this is too naive. */ class ColourMLS : public ftl::operators::Operator { - public: - explicit ColourMLS(ftl::Configurable*); - ~ColourMLS(); + public: + explicit ColourMLS(ftl::Configurable*); + ~ColourMLS(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } - bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; - private: + private: +}; + +/** + * Use cross aggregation of neighbors for faster MLS smoothing. Utilises the + * cross support regions and colour weighting. This algorithm is based upon the + * cross aggregation method in the papers below. MLS was adapted into an + * approximate form which is not identical to real MLS as the weights are not + * perfectly correct for each point. The errors are visually minimal and it + * has linear performance wrt radius, rather than quadratic. + * + * Zhang K, Lu J, Lafruit G. Cross-based local stereo matching using orthogonal + * integral images. (2009). + * + * Better explained in: + * X. Mei, X. Sun, M. Zhou et al. On building an accurate stereo matching system + * on graphics hardware. (2011). + * + * The use of colour weighting is done as in: + * C. Kuster et al. Spatio-temporal geometry fusion for multiple hybrid cameras + * using moving least squares surfaces. (2014) + * + * The above paper also indicates the importance of stopping MLS at depth + * boundaries. They use k-means clustering (K=2) of depths, but we are using + * an approach (discontinuity mask) indicated in the following paper combined + * with the cross support regions to define the extent of the MLS neighbourhood. + * + * S. Orts-Escolano et al. Holoportation: Virtual 3D teleportation in real-time. + * (2016). + * + * The use of all these approaches in this combination is novel I believe. + * + */ +class AggreMLS : public ftl::operators::Operator { + public: + explicit AggreMLS(ftl::Configurable*); + ~AggreMLS(); + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + + private: + ftl::cuda::TextureObject<float4> centroid_horiz_; + ftl::cuda::TextureObject<float4> centroid_vert_; + ftl::cuda::TextureObject<float4> normals_horiz_; }; /** @@ -87,15 +132,15 @@ class ColourMLS : public ftl::operators::Operator { * it can be only a few or even no pixels with a zero smoothing factor. */ class AdaptiveMLS : public ftl::operators::Operator { - public: - explicit AdaptiveMLS(ftl::Configurable*); - ~AdaptiveMLS(); + public: + explicit AdaptiveMLS(ftl::Configurable*); + ~AdaptiveMLS(); inline Operator::Type type() const override { return Operator::Type::OneToOne; } - bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; - private: + private: }; } diff --git a/components/operators/src/antialiasing.cpp b/components/operators/src/antialiasing.cpp new file mode 100644 index 0000000000000000000000000000000000000000..575cc26a5183f38dbaa0c861bac5a0cbe70ace18 --- /dev/null +++ b/components/operators/src/antialiasing.cpp @@ -0,0 +1,29 @@ +#include <ftl/operators/antialiasing.hpp> +#include "antialiasing_cuda.hpp" + +using ftl::operators::FXAA; +using ftl::codecs::Channel; + +FXAA::FXAA(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +FXAA::~FXAA() { + +} + +bool FXAA::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) { + ftl::cuda::fxaa( + in.getTexture<uchar4>(Channel::Colour), + stream + ); + + if (in.hasChannel(Channel::Right)) { + ftl::cuda::fxaa( + in.getTexture<uchar4>(Channel::Right), + stream + ); + } + + return true; +} diff --git a/components/operators/src/antialiasing.cu b/components/operators/src/antialiasing.cu new file mode 100644 index 0000000000000000000000000000000000000000..7ad851a2af3508b6a0728b0081fc00ec879ac088 --- /dev/null +++ b/components/operators/src/antialiasing.cu @@ -0,0 +1,88 @@ +#include "antialiasing_cuda.hpp" + +#define T_PER_BLOCK 8 + +__device__ inline uchar4 toChar(const float4 rgba) { + return make_uchar4(rgba.x*255.0f, rgba.y*255.0f, rgba.z*255.0f, 255); +} + +__global__ void filter_fxaa2(ftl::cuda::TextureObject<uchar4> data) { + + int x = blockIdx.x*blockDim.x + threadIdx.x; + int y = blockIdx.y*blockDim.y + threadIdx.y; + + if(x >= data.width() || y >= data.height()) + { + return; + } + + uchar4 out_color; + cudaTextureObject_t texRef = data.cudaTexture(); + + const float FXAA_SPAN_MAX = 8.0f; + const float FXAA_REDUCE_MUL = 1.0f/8.0f; + const float FXAA_REDUCE_MIN = (1.0f/128.0f); + + float u = x + 0.5f; + float v = y + 0.5f; + + float4 rgbNW = tex2D<float4>( texRef, u-1.0f,v-1.0f); + float4 rgbNE = tex2D<float4>( texRef, u+1.0f,v-1.0f); + float4 rgbSW = tex2D<float4>( texRef, u-1.0f,v+1.0f); + float4 rgbSE = tex2D<float4>( texRef, u+1.0f,v+1.0f); + float4 rgbM = tex2D<float4>( texRef, u,v); + + const float4 luma = make_float4(0.299f, 0.587f, 0.114f,0.0f); + float lumaNW = dot(rgbNW, luma); + float lumaNE = dot(rgbNE, luma); + float lumaSW = dot(rgbSW, luma); + float lumaSE = dot(rgbSE, luma); + float lumaM = dot( rgbM, luma); + + float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE))); + float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE))); + + float2 dir; + dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE)); + dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE)); + + float dirReduce = max((lumaNW + lumaNE + lumaSW + lumaSE) * (0.25f * FXAA_REDUCE_MUL), FXAA_REDUCE_MIN); + + float rcpDirMin = 1.0f/(min(abs(dir.x), abs(dir.y)) + dirReduce); + + + float2 test = dir * rcpDirMin; + dir = clamp(test,-FXAA_SPAN_MAX,FXAA_SPAN_MAX); + + float4 rgbA = (1.0f/2.0f) * ( + tex2D<float4>( texRef,u+ dir.x * (1.0f/3.0f - 0.5f),v+ dir.y * (1.0f/3.0f - 0.5f))+ + tex2D<float4>( texRef,u+ dir.x * (2.0f/3.0f - 0.5f),v+ dir.y * (2.0f/3.0f - 0.5f))); + float4 rgbB = rgbA * (1.0f/2.0f) + (1.0f/4.0f) * ( + tex2D<float4>( texRef,u+ dir.x * (0.0f/3.0f - 0.5f),v+ dir.y * (0.0f/3.0f - 0.5f))+ + tex2D<float4>( texRef,u+ dir.x * (3.0f/3.0f - 0.5f),v+ dir.y * (3.0f/3.0f - 0.5f))); + float lumaB = dot(rgbB, luma); + + if((lumaB < lumaMin) || (lumaB > lumaMax)){ + out_color=toChar(rgbA); + } else { + out_color=toChar(rgbB); + } + + + //surf2Dwrite<uchar4>(out_color, surfaceWrite, x*sizeof(uchar4), y); + + // FIXME: Should not output to same texture object. + data(x,y) = out_color; +} + +void ftl::cuda::fxaa(ftl::cuda::TextureObject<uchar4> &colour, cudaStream_t stream) { + const dim3 gridSize((colour.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + filter_fxaa2<<<gridSize, blockSize, 0, stream>>>(colour); + cudaSafeCall( cudaGetLastError() ); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif +} diff --git a/components/operators/src/antialiasing_cuda.hpp b/components/operators/src/antialiasing_cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..afe2c5246f43f1027c2cecfe25b368bbb4cf3e20 --- /dev/null +++ b/components/operators/src/antialiasing_cuda.hpp @@ -0,0 +1,14 @@ +#ifndef _FTL_CUDA_ANTIALIASING_HPP_ +#define _FTL_CUDA_ANTIALIASING_HPP_ + +#include <ftl/cuda_common.hpp> + +namespace ftl { +namespace cuda { + +void fxaa(ftl::cuda::TextureObject<uchar4> &colour, cudaStream_t stream); + +} +} + +#endif diff --git a/components/operators/src/clipping.cpp b/components/operators/src/clipping.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b37abe2c33b55b86b697565d1565b523c073d80f --- /dev/null +++ b/components/operators/src/clipping.cpp @@ -0,0 +1,60 @@ +#include <ftl/operators/clipping.hpp> +#include <ftl/cuda/points.hpp> +#include <ftl/utility/matrix_conversion.hpp> + +using ftl::operators::ClipScene; +using ftl::codecs::Channel; +using ftl::rgbd::Format; + +ClipScene::ClipScene(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +ClipScene::~ClipScene() { + +} + +// TODO: Put in common place +static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) { + Eigen::Affine3d rx = + Eigen::Affine3d(Eigen::AngleAxisd(ax, Eigen::Vector3d(1, 0, 0))); + Eigen::Affine3d ry = + Eigen::Affine3d(Eigen::AngleAxisd(ay, Eigen::Vector3d(0, 1, 0))); + Eigen::Affine3d rz = + Eigen::Affine3d(Eigen::AngleAxisd(az, Eigen::Vector3d(0, 0, 1))); + return rz * rx * ry; +} + +bool ClipScene::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) { + auto &c = *config(); + float rx = c.value("pitch", 0.0f); + float ry = c.value("yaw", 0.0f); + float rz = c.value("roll", 0.0f); + float x = c.value("x", 0.0f); + float y = c.value("y", 0.0f); + float z = c.value("z", 0.0f); + float width = c.value("width", 1.0f); + float height = c.value("height", 1.0f); + float depth = c.value("depth", 1.0f); + + Eigen::Affine3f r = create_rotation_matrix(rx, ry, rz).cast<float>(); + Eigen::Translation3f trans(Eigen::Vector3f(x,y,z)); + Eigen::Affine3f t(trans); + + ftl::cuda::ClipSpace clip; + clip.origin = MatrixConversion::toCUDA(r.matrix() * t.matrix()); + clip.size = make_float3(width, height, depth); + + for (size_t i=0; i<in.frames.size(); ++i) { + auto &f = in.frames[i]; + auto *s = in.sources[i]; + + auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); + + auto sclip = clip; + sclip.origin = sclip.origin * pose; + ftl::cuda::clipping(f.createTexture<float>(Channel::Depth), s->parameters(), sclip, stream); + } + + return true; +} diff --git a/components/operators/src/colours.cpp b/components/operators/src/colours.cpp index 3174c42e41de75e30f852c5bb187266ad26b61aa..9c6fff8b887fc645da71fbc99e018a6dc99b6313 100644 --- a/components/operators/src/colours.cpp +++ b/components/operators/src/colours.cpp @@ -12,6 +12,8 @@ ColourChannels::~ColourChannels() { } bool ColourChannels::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) { + auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + // Convert colour from BGR to BGRA if needed if (in.get<cv::cuda::GpuMat>(Channel::Colour).type() == CV_8UC3) { //cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); @@ -19,8 +21,11 @@ bool ColourChannels::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgb auto &col = in.get<cv::cuda::GpuMat>(Channel::Colour); temp_.create(col.size(), CV_8UC4); cv::cuda::swap(col, temp_); - cv::cuda::cvtColor(temp_,col, cv::COLOR_BGR2BGRA, 0); + cv::cuda::cvtColor(temp_,col, cv::COLOR_BGR2BGRA, 0, cvstream); } + //in.resetTexture(Channel::Colour); + in.createTexture<uchar4>(Channel::Colour, true); + return true; } diff --git a/components/operators/src/correspondence.cu b/components/operators/src/correspondence.cu new file mode 100644 index 0000000000000000000000000000000000000000..36e91d9f483d62164b284f43b82443641eadd664 --- /dev/null +++ b/components/operators/src/correspondence.cu @@ -0,0 +1,548 @@ +#include "mvmls_cuda.hpp" +#include <ftl/cuda/weighting.hpp> +#include <ftl/cuda/mask.hpp> + +using ftl::cuda::TextureObject; +using ftl::rgbd::Camera; +using ftl::cuda::Mask; +using ftl::cuda::MvMLSParams; + +#define T_PER_BLOCK 8 + +template<int FUNCTION> +__device__ float weightFunction(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight); + +template <> +__device__ inline float weightFunction<0>(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight) { + return (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight); +} + +template <> +__device__ inline float weightFunction<1>(const ftl::cuda::MvMLSParams ¶m, float dweight, float cweight) { + return (cweight * cweight * dweight); +} + +template <> +__device__ inline float weightFunction<2>(const ftl::cuda::MvMLSParams ¶m, float dweight, float cweight) { + return (dweight * dweight * cweight); +} + +template <> +__device__ inline float weightFunction<3>(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight) { + return (dweight == 0.0f) ? 0.0f : (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight); +} + +template <> +__device__ inline float weightFunction<4>(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight) { + return cweight; +} + +template <> +__device__ inline float weightFunction<5>(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight) { + return (cweight > 0.0f) ? dweight : 0.0f; +} + +template<int COR_STEPS, int FUNCTION> +__global__ void corresponding_point_kernel( + TextureObject<float> d1, + TextureObject<float> d2, + TextureObject<uchar4> c1, + TextureObject<uchar4> c2, + TextureObject<short2> screenOut, + TextureObject<float> conf, + TextureObject<int> mask, + float4x4 pose1, + float4x4 pose1_inv, + float4x4 pose2, // Inverse + Camera cam1, + Camera cam2, ftl::cuda::MvMLSParams params) { + + // Each warp picks point in p1 + //const int tid = (threadIdx.x + threadIdx.y * blockDim.x); + const int x = (blockIdx.x*blockDim.x + threadIdx.x); // / WARP_SIZE; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x >= 0 && y >=0 && x < screenOut.width() && y < screenOut.height()) { + screenOut(x,y) = make_short2(-1,-1); + + //const float3 world1 = make_float3(p1.tex2D(x, y)); + const float depth1 = d1.tex2D(x,y); //(pose1_inv * world1).z; // Initial starting depth + if (depth1 < cam1.minDepth || depth1 > cam1.maxDepth) return; + + // TODO: Temporary hack to ensure depth1 is present + //const float4 temp = vout.tex2D(x,y); + //vout(x,y) = make_float4(depth1, 0.0f, temp.z, temp.w); + + const float3 world1 = pose1 * cam1.screenToCam(x,y,depth1); + + const auto colour1 = c1.tex2D((float)x+0.5f, (float)y+0.5f); + + //float bestdepth = 0.0f; + short2 bestScreen = make_short2(-1,-1); + float bestdepth = 0.0f; + float bestdepth2 = 0.0f; + float bestweight = 0.0f; + float bestcolour = 0.0f; + float bestdweight = 0.0f; + float totalcolour = 0.0f; + int count = 0; + float contrib = 0.0f; + + const float step_interval = params.range / (COR_STEPS / 2); + + const float3 rayStep_world = pose1.getFloat3x3() * cam1.screenToCam(x,y,step_interval); + const float3 rayStart_2 = pose2 * world1; + const float3 rayStep_2 = pose2.getFloat3x3() * rayStep_world; + + // Project to p2 using cam2 + // Each thread takes a possible correspondence and calculates a weighting + //const int lane = tid % WARP_SIZE; + for (int i=0; i<COR_STEPS; ++i) { + const int j = i - (COR_STEPS/2); + const float depth_adjust = (float)j * step_interval; + + // Calculate adjusted depth 3D point in camera 2 space + const float3 worldPos = world1 + j * rayStep_world; //(pose1 * cam1.screenToCam(x, y, depth_adjust)); + const float3 camPos = rayStart_2 + j * rayStep_2; //pose2 * worldPos; + const float2 screen = cam2.camToScreen<float2>(camPos); + + float weight = (screen.x >= cam2.width || screen.y >= cam2.height) ? 0.0f : 1.0f; + + // Generate a colour correspondence value + const auto colour2 = c2.tex2D(screen.x, screen.y); + const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth); + + // Generate a depth correspondence value + const float depth2 = d2.tex2D(int(screen.x+0.5f), int(screen.y+0.5f)); + + if (FUNCTION == 1) { + weight *= ftl::cuda::weighting(fabs(depth2 - camPos.z), cweight*params.spatial_smooth); + } else { + const float dweight = ftl::cuda::weighting(fabs(depth2 - camPos.z), params.spatial_smooth); + weight *= weightFunction<FUNCTION>(params, dweight, cweight); + } + //const float dweight = ftl::cuda::weighting(fabs(depth_adjust), 10.0f*params.range); + + //weight *= weightFunction<FUNCTION>(params, dweight, cweight); + + ++count; + contrib += weight; + bestcolour = max(cweight, bestcolour); + //bestdweight = max(dweight, bestdweight); + totalcolour += cweight; + bestdepth = (weight > bestweight) ? depth_adjust : bestdepth; + //bestdepth2 = (weight > bestweight) ? camPos.z : bestdepth2; + //bestScreen = (weight > bestweight) ? make_short2(screen.x+0.5f, screen.y+0.5f) : bestScreen; + bestweight = max(bestweight, weight); + //bestweight = weight; + //bestdepth = depth_adjust; + //bestScreen = make_short2(screen.x+0.5f, screen.y+0.5f); + //} + } + + const float avgcolour = totalcolour/(float)count; + const float confidence = bestcolour / totalcolour; //bestcolour - avgcolour; + + //Mask m(mask.tex2D(x,y)); + + //if (bestweight > 0.0f) { + float old = conf.tex2D(x,y); + + if (bestweight * confidence > old) { + d1(x,y) = 0.4f*bestdepth + depth1; + //d2(bestScreen.x, bestScreen.y) = bestdepth2; + //screenOut(x,y) = bestScreen; + conf(x,y) = bestweight * confidence; + } + //} + + // If a good enough match is found, mark dodgy depth as solid + //if ((m.isFilled() || m.isDiscontinuity()) && (bestweight > params.match_threshold)) mask(x,y) = 0; + } +} + +void ftl::cuda::correspondence( + TextureObject<float> &d1, + TextureObject<float> &d2, + TextureObject<uchar4> &c1, + TextureObject<uchar4> &c2, + TextureObject<short2> &screen, + TextureObject<float> &conf, + TextureObject<int> &mask, + float4x4 &pose1, + float4x4 &pose1_inv, + float4x4 &pose2, + const Camera &cam1, + const Camera &cam2, const MvMLSParams ¶ms, int func, + cudaStream_t stream) { + + const dim3 gridSize((d1.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (d1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + //printf("COR SIZE %d,%d\n", p1.width(), p1.height()); + + switch (func) { + case 0: corresponding_point_kernel<16,0><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; + case 1: corresponding_point_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; + case 2: corresponding_point_kernel<16,2><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; + case 3: corresponding_point_kernel<16,3><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; + case 4: corresponding_point_kernel<16,4><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; + case 5: corresponding_point_kernel<16,5><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; + } + + cudaSafeCall( cudaGetLastError() ); +} + +// ==== Remove zero-confidence ================================================= + +__global__ void zero_confidence_kernel( + TextureObject<float> conf, + TextureObject<float> depth) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < depth.width() && y < depth.height()) { + const float c = conf.tex2D((int)x,(int)y); + + if (c == 0.0f) { + depth(x,y) = 1000.0f; + } + } +} + +void ftl::cuda::zero_confidence(TextureObject<float> &conf, TextureObject<float> &depth, cudaStream_t stream) { + const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + zero_confidence_kernel<<<gridSize, blockSize, 0, stream>>>(conf, depth); + cudaSafeCall( cudaGetLastError() ); +} + + +// ==== MultiViewMLS Aggregate ================================================= + +__device__ inline short3 getScreenPos(int x, int y, float d, const Camera &cam1, const Camera &cam2, const float4x4 &transform) { + const float3 campos = transform * cam1.screenToCam(x,y,d); + const int2 screen = cam2.camToScreen<int2>(campos); + return make_short3(screen.x, screen.y, campos.z); +} + +__device__ inline short2 packScreen(int x, int y, int id) { + return make_short2((id << 12) + x, y); +} + +__device__ inline short2 packScreen(const short3 &p, int id) { + return make_short2((id << 12) + p.x, p.y); +} + +__device__ inline int supportSize(uchar4 support) { + return (support.x+support.y) * (support.z+support.w); +} + +__device__ inline short2 choosePoint(uchar4 sup1, uchar4 sup2, float dot1, float dot2, short2 screen1, short2 screen2) { + //return (float(supportSize(sup2))*dot1 > float(supportSize(sup1))*dot2) ? screen2 : screen1; + return (dot1 > dot2) ? screen2 : screen1; +} + +__device__ inline int unpackCameraID(short2 p) { + return p.x >> 12; +} + +/** + * Identify which source has the best support region for a given pixel. + */ +__global__ void best_sources_kernel( + TextureObject<float4> normals1, + TextureObject<float4> normals2, + TextureObject<uchar4> support1, + TextureObject<uchar4> support2, + TextureObject<float> depth1, + TextureObject<float> depth2, + TextureObject<short2> screen, + float4x4 transform, + //float3x3 transformR, + ftl::rgbd::Camera cam1, + ftl::rgbd::Camera cam2, + int id1, + int id2) { + + const int x = (blockIdx.x*blockDim.x + threadIdx.x); + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x >= 0 && y >= 0 && x < screen.width() && y < screen.height()) { + const float d1 = depth1.tex2D(x,y); + + const short3 scr2 = getScreenPos(x, y, d1, cam1, cam2, transform); + short2 bestPoint = packScreen(x,y,0); + + if (scr2.x >= 0 && scr2.y >= 0 && scr2.x < cam2.width && scr2.y < cam2.height) { + uchar4 sup1 = support1.tex2D(x,y); + uchar4 sup2 = support2.tex2D(scr2.x,scr2.y); + const float d2 = depth2.tex2D(scr2.x,scr2.y); + float3 n1 = transform.getFloat3x3() * make_float3(normals1.tex2D(x,y)); + float3 n2 = make_float3(normals2.tex2D(scr2.x,scr2.y)); + + float3 camray = cam2.screenToCam(scr2.x,scr2.y,1.0f); + camray /= length(camray); + const float dot1 = dot(camray, n1); + const float dot2 = dot(camray, n2); + + bestPoint = (fabs(scr2.z - d2) < 0.04f) ? choosePoint(sup1, sup2, dot1, dot2, packScreen(x,y,id1), packScreen(scr2,id2)) : packScreen(x,y,6); + //bestPoint = choosePoint(sup1, sup2, dot1, dot2, packScreen(x,y,id1), packScreen(scr2,id2)); + //bestPoint = (d1 < d2) ? packScreen(x,y,id1) : packScreen(x,y,id2); + + bestPoint = (fabs(scr2.z - d2) < 0.04f) ? packScreen(scr2,id2) : packScreen(scr2,id1); + } + + screen(x,y) = bestPoint; + + /*if (s.x >= 0 && s.y >= 0) { + auto norm1 = make_float3(n1.tex2D(x,y)); + const auto norm2 = make_float3(n2.tex2D(s.x,s.y)); + //n2(s.x,s.y) = norm1; + + float3 cent1 = make_float3(c1.tex2D(x,y)); + const auto cent2 = make_float3(c2.tex2D(s.x,s.y)); + + if (cent2.x+cent2.y+cent2.z > 0.0f && norm2.x+norm2.y+norm2.z > 0.0f) { + norm1 += poseInv1.getFloat3x3() * (pose2.getFloat3x3() * norm2); + n1(x,y) = make_float4(norm1, 0.0f); + cent1 += poseInv1 * (pose2 * cent2); // FIXME: Transform between camera spaces + cent1 /= 2.0f; + c1(x,y) = make_float4(cent1, 0.0f); + //c2(s.x,s.y) = cent1; + + //contribs1(x,y) = contribs1.tex2D(x,y) + 1.0f; + } + // contribs2(s.x,s.y) = contribs2.tex2D(s.x,s.y) + 1.0f; + }*/ + } +} + +void ftl::cuda::best_sources( + ftl::cuda::TextureObject<float4> &normals1, + ftl::cuda::TextureObject<float4> &normals2, + ftl::cuda::TextureObject<uchar4> &support1, + ftl::cuda::TextureObject<uchar4> &support2, + ftl::cuda::TextureObject<float> &depth1, + ftl::cuda::TextureObject<float> &depth2, + ftl::cuda::TextureObject<short2> &screen, + const float4x4 &transform, + const ftl::rgbd::Camera &cam1, + const ftl::rgbd::Camera &cam2, + int id1, + int id2, + cudaStream_t stream) { + + const dim3 gridSize((screen.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (screen.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + best_sources_kernel<<<gridSize, blockSize, 0, stream>>>(normals1, normals2, support1, support2, depth1, depth2, screen, transform, cam1, cam2, id1, id2); + cudaSafeCall( cudaGetLastError() ); +} + +/** + * Identify which source has the best support region for a given pixel. + */ + __global__ void aggregate_sources_kernel( + TextureObject<float4> n1, + TextureObject<float4> n2, + TextureObject<float4> c1, + TextureObject<float4> c2, + TextureObject<float> depth1, + //TextureObject<float> depth2, + //TextureObject<short2> screen, + float4x4 transform, + //float3x3 transformR, + ftl::rgbd::Camera cam1, + ftl::rgbd::Camera cam2) { + + const int x = (blockIdx.x*blockDim.x + threadIdx.x); + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x >= 0 && y >= 0 && x < n1.width() && y < n1.height()) { + const float d1 = depth1.tex2D(x,y); + + if (d1 > cam1.minDepth && d1 < cam1.maxDepth) { + //const short3 s = getScreenPos(x, y, d1, cam1, cam2, transform); + const float3 camPos = transform * cam1.screenToCam(x, y, d1); + const int2 s = cam2.camToScreen<int2>(camPos); + + if (s.x >= 0 && s.y >= 0 && s.x < n2.width() && s.y < n2.height()) { + auto norm1 = make_float3(n1.tex2D(x,y)); + const auto norm2 = make_float3(n2.tex2D(s.x,s.y)); + //n2(s.x,s.y) = norm1; + + float3 cent1 = make_float3(c1.tex2D(x,y)); + const auto cent2 = transform.getInverse() * make_float3(c2.tex2D(s.x,s.y)); + + //printf("MERGING %f\n", length(cent2-cent1)); + + if (cent2.x+cent2.y+cent2.z > 0.0f && norm2.x+norm2.y+norm2.z > 0.0f && length(cent2-cent1) < 0.04f) { + norm1 += norm2; + norm1 /= 2.0f; + n1(x,y) = make_float4(norm1, 0.0f); + cent1 += cent2; + cent1 /= 2.0f; + c1(x,y) = make_float4(cent1, 0.0f); + //c2(s.x,s.y) = cent1; + + //contribs1(x,y) = contribs1.tex2D(x,y) + 1.0f; + } + // contribs2(s.x,s.y) = contribs2.tex2D(s.x,s.y) + 1.0f; + } + } + } +} + +void ftl::cuda::aggregate_sources( + ftl::cuda::TextureObject<float4> &n1, + ftl::cuda::TextureObject<float4> &n2, + ftl::cuda::TextureObject<float4> &c1, + ftl::cuda::TextureObject<float4> &c2, + ftl::cuda::TextureObject<float> &depth1, + //ftl::cuda::TextureObject<float> &depth2, + //ftl::cuda::TextureObject<short2> &screen, + const float4x4 &transform, + const ftl::rgbd::Camera &cam1, + const ftl::rgbd::Camera &cam2, + cudaStream_t stream) { + + const dim3 gridSize((n1.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (n1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + aggregate_sources_kernel<<<gridSize, blockSize, 0, stream>>>(n1, n2, c1, c2, depth1, transform, cam1, cam2); + cudaSafeCall( cudaGetLastError() ); +} + +__device__ static uchar4 HSVtoRGB(int H, float S, float V) { + const float C = S * V; + const float X = C * (1 - fabs(fmodf(H / 60.0f, 2) - 1)); + const float m = V - C; + float Rs, Gs, Bs; + + if(H >= 0 && H < 60) { + Rs = C; + Gs = X; + Bs = 0; + } + else if(H >= 60 && H < 120) { + Rs = X; + Gs = C; + Bs = 0; + } + else if(H >= 120 && H < 180) { + Rs = 0; + Gs = C; + Bs = X; + } + else if(H >= 180 && H < 240) { + Rs = 0; + Gs = X; + Bs = C; + } + else if(H >= 240 && H < 300) { + Rs = X; + Gs = 0; + Bs = C; + } + else { + Rs = C; + Gs = 0; + Bs = X; + } + + return make_uchar4((Bs + m) * 255, (Gs + m) * 255, (Rs + m) * 255, 0); +} + +/** + * Render each pixel is a colour corresponding to the source camera with the + * best support window. + */ + __global__ void vis_best_sources_kernel( + TextureObject<short2> screen, + TextureObject<uchar4> colour, + int myid, + int count) { + + const int x = (blockIdx.x*blockDim.x + threadIdx.x); + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x >= 0 && y >= 0 && x < colour.width() && y < colour.height()) { + short2 s = screen.tex2D(x,y); + int id = unpackCameraID(s); + + uchar4 c = HSVtoRGB((360 / count) * id, 0.6f, 0.85f); + if (myid != id) colour(x,y) = c; + //colour(x,y) = c; + } +} + +void ftl::cuda::vis_best_sources( + ftl::cuda::TextureObject<short2> &screen, + ftl::cuda::TextureObject<uchar4> &colour, + int myid, + int count, + cudaStream_t stream) { + + const dim3 gridSize((colour.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + vis_best_sources_kernel<<<gridSize, blockSize, 0, stream>>>(screen, colour, myid, count); + cudaSafeCall( cudaGetLastError() ); +} + +/*void ftl::cuda::aggregate_sources( + ftl::cuda::TextureObject<float4> &n1, + ftl::cuda::TextureObject<float4> &n2, + ftl::cuda::TextureObject<float4> &c1, + ftl::cuda::TextureObject<float4> &c2, + ftl::cuda::TextureObject<float> &contribs1, + ftl::cuda::TextureObject<float> &contribs2, + ftl::cuda::TextureObject<short2> &screen, + const float4x4 &poseInv1, + const float4x4 &pose2, + cudaStream_t stream) { + + const dim3 gridSize((screen.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (screen.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + aggregate_sources_kernel<<<gridSize, blockSize, 0, stream>>>(n1, n2, c1, c2, contribs1, contribs2, screen, poseInv1, pose2); + cudaSafeCall( cudaGetLastError() ); +}*/ + +// ==== Normalise aggregations ================================================= + +__global__ void normalise_aggregations_kernel( + TextureObject<float4> norms, + TextureObject<float4> cents, + TextureObject<float> contribs) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < norms.width() && y < norms.height()) { + const float contrib = contribs.tex2D((int)x,(int)y); + + const auto a = norms.tex2D((int)x,(int)y); + const auto b = cents.tex2D(x,y); + //const float4 normal = normals.tex2D((int)x,(int)y); + + //out(x,y) = (contrib == 0.0f) ? make<B>(a) : make<B>(a / contrib); + + if (contrib > 0.0f) { + norms(x,y) = a / (contrib+1.0f); + cents(x,y) = b / (contrib+1.0f); + } + } +} + +void ftl::cuda::normalise_aggregations(TextureObject<float4> &norms, TextureObject<float4> ¢s, TextureObject<float> &contribs, cudaStream_t stream) { + const dim3 gridSize((norms.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (norms.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + normalise_aggregations_kernel<<<gridSize, blockSize, 0, stream>>>(norms, cents, contribs); + cudaSafeCall( cudaGetLastError() ); +} + diff --git a/components/operators/src/disparity/bilateral_filter.cpp b/components/operators/src/disparity/bilateral_filter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8c91fac6f317250f7a8af9b85a0c975e217bdfe7 --- /dev/null +++ b/components/operators/src/disparity/bilateral_filter.cpp @@ -0,0 +1,35 @@ +#include <ftl/operators/disparity.hpp> + +using cv::cuda::GpuMat; +using cv::Size; + +using ftl::codecs::Channel; +using ftl::operators::DisparityBilateralFilter; + +DisparityBilateralFilter::DisparityBilateralFilter(ftl::Configurable* cfg) : + ftl::operators::Operator(cfg) { + + scale_ = 16.0; + n_disp_ = cfg->value("n_disp", 256); + radius_ = cfg->value("radius", 7); + iter_ = cfg->value("iter", 13); + filter_ = cv::cuda::createDisparityBilateralFilter(n_disp_ * scale_, radius_, iter_); +} + +bool DisparityBilateralFilter::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, + ftl::rgbd::Source *src, cudaStream_t stream) { + + if (!in.hasChannel(Channel::Disparity) || !in.hasChannel(Channel::Colour)) { + return false; + } + auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + const GpuMat &rgb = in.get<GpuMat>(Channel::Colour); + GpuMat &disp_in = in.get<GpuMat>(Channel::Disparity); + GpuMat &disp_out = out.create<GpuMat>(Channel::Disparity); + disp_out.create(disp_in.size(), disp_in.type()); + + disp_in.convertTo(disp_int_, CV_16SC1, scale_, cvstream); + filter_->apply(disp_int_, rgb, disp_int_result_, cvstream); + disp_int_result_.convertTo(disp_out, disp_in.type(), 1.0/scale_, cvstream); + return true; +} \ No newline at end of file diff --git a/components/operators/src/disparity/cuda.hpp b/components/operators/src/disparity/cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d4cf06aaec772869ee3e8f2e8fafab9157115f57 --- /dev/null +++ b/components/operators/src/disparity/cuda.hpp @@ -0,0 +1,15 @@ +#include <ftl/cuda_common.hpp> + +namespace ftl { +namespace cuda { + +void disparity_to_depth(const cv::cuda::GpuMat &disparity, cv::cuda::GpuMat &depth, + const ftl::rgbd::Camera &c, cudaStream_t &stream); + + +void optflow_filter(cv::cuda::GpuMat &disp, const cv::cuda::GpuMat &optflow, + cv::cuda::GpuMat &history, int n_max, float threshold, + cv::cuda::Stream &stream); + +} +} diff --git a/components/rgbd-sources/src/algorithms/disp2depth.cu b/components/operators/src/disparity/disp2depth.cu similarity index 74% rename from components/rgbd-sources/src/algorithms/disp2depth.cu rename to components/operators/src/disparity/disp2depth.cu index 4b707c4795017cdf38999cd6606d53ff4870ccb5..86a3fc290967fc0a64dd1a61df61badaef0aa833 100644 --- a/components/rgbd-sources/src/algorithms/disp2depth.cu +++ b/components/operators/src/disparity/disp2depth.cu @@ -16,12 +16,12 @@ __global__ void d2d_kernel(cv::cuda::PtrStepSz<float> disp, cv::cuda::PtrStepSz< namespace ftl { namespace cuda { void disparity_to_depth(const cv::cuda::GpuMat &disparity, cv::cuda::GpuMat &depth, - const ftl::rgbd::Camera &c, cv::cuda::Stream &stream) { + const ftl::rgbd::Camera &c, cudaStream_t &stream) { dim3 grid(1,1,1); - dim3 threads(128, 1, 1); - grid.x = cv::cuda::device::divUp(disparity.cols, 128); + dim3 threads(128, 1, 1); + grid.x = cv::cuda::device::divUp(disparity.cols, 128); grid.y = cv::cuda::device::divUp(disparity.rows, 1); - d2d_kernel<<<grid, threads, 0, cv::cuda::StreamAccessor::getStream(stream)>>>( + d2d_kernel<<<grid, threads, 0, stream>>>( disparity, depth, c); cudaSafeCall( cudaGetLastError() ); } diff --git a/components/operators/src/disparity/disparity_to_depth.cpp b/components/operators/src/disparity/disparity_to_depth.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5951802f0adc960d387a20a4f7f77abd264fb6e8 --- /dev/null +++ b/components/operators/src/disparity/disparity_to_depth.cpp @@ -0,0 +1,22 @@ +#include "ftl/operators/disparity.hpp" +#include "disparity/cuda.hpp" + +using ftl::operators::DisparityToDepth; +using ftl::codecs::Channel; + +using cv::cuda::GpuMat; + +bool DisparityToDepth::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, + ftl::rgbd::Source *src, cudaStream_t stream) { + + if (!in.hasChannel(Channel::Disparity)) { return false; } + + const auto params = src->parameters(); + const GpuMat &disp = in.get<GpuMat>(Channel::Disparity); + + GpuMat &depth = out.create<GpuMat>(Channel::Depth); + depth.create(disp.size(), CV_32FC1); + + ftl::cuda::disparity_to_depth(disp, depth, params, stream); + return true; +} \ No newline at end of file diff --git a/components/operators/src/disparity/fixstars_sgm.cpp b/components/operators/src/disparity/fixstars_sgm.cpp new file mode 100644 index 0000000000000000000000000000000000000000..79a18685ee3ac8732f6721358f8f9d7dc5be4da9 --- /dev/null +++ b/components/operators/src/disparity/fixstars_sgm.cpp @@ -0,0 +1,136 @@ +#include <loguru.hpp> + +#include "ftl/operators/disparity.hpp" + +using cv::Size; +using cv::cuda::GpuMat; + +using ftl::rgbd::Format; +using ftl::codecs::Channel; +using ftl::rgbd::Frame; +using ftl::rgbd::Source; +using ftl::operators::FixstarsSGM; + +FixstarsSGM::FixstarsSGM(ftl::Configurable* cfg) : + ftl::operators::Operator(cfg) { + + ssgm_ = nullptr; + size_ = Size(0, 0); + + uniqueness_ = cfg->value("uniqueness", 0.95f); + P1_ = cfg->value("P1", 10); + P2_ = cfg->value("P2", 120); + max_disp_ = cfg->value("max_disp", 256); + + if (uniqueness_ < 0.0 || uniqueness_ > 1.0) { + uniqueness_ = 1.0; + LOG(WARNING) << "Invalid uniqueness, using default (1.0)"; + } + + if (P1_ <= 0 ) { + P1_ = 10; + LOG(WARNING) << "Invalid value for P1, using default (10)"; + } + + if (P2_ < P1_) { + P2_ = P1_; + LOG(WARNING) << "Invalid value for P2, using value of P1 (" << P1_ << ")"; + } + + if (!(max_disp_ == 256 || max_disp_ == 128)) { + max_disp_ = 256; + LOG(WARNING) << "Invalid value for max_disp, using default value (256)"; + } + + cfg->on("P1", [this, cfg](const ftl::config::Event&) { + int P1 = cfg->value("P1", 0); + if (P1 <= 0) { + LOG(WARNING) << "Invalid value for P1 (" << P1 << ")"; + } + else { + P1_ = P1; + updateParameters(); + } + }); + + cfg->on("P2", [this, cfg](const ftl::config::Event&) { + int P2 = cfg->value("P2", 0); + if (P2 < P1_) { + LOG(WARNING) << "Invalid value for P2 (" << P2 << ")"; + } + else { + P2_ = P2; + updateParameters(); + } + }); + + cfg->on("uniqueness", [this, cfg](const ftl::config::Event&) { + double uniqueness = cfg->value("uniqueness", 0.0); + if (uniqueness < 0.0 || uniqueness > 1.0) { + LOG(WARNING) << "Invalid value for uniqueness (" << uniqueness << ")"; + } + else { + uniqueness_ = uniqueness; + updateParameters(); + } + }); +} + +FixstarsSGM::~FixstarsSGM() { + if (ssgm_) { + delete ssgm_; + } +} + +bool FixstarsSGM::init() { + if (size_ == Size(0, 0)) { return false; } + if (ssgm_) { delete ssgm_; } + lbw_.create(size_, CV_8UC1); + rbw_.create(size_, CV_8UC1); + disp_int_.create(size_, CV_16SC1); + + ssgm_ = new sgm::StereoSGM(size_.width, size_.height, max_disp_, 8, 16, + lbw_.step, disp_int_.step / sizeof(short), + sgm::EXECUTE_INOUT_CUDA2CUDA, + sgm::StereoSGM::Parameters(P1_, P2_, uniqueness_, true) + ); + + return true; +} + +bool FixstarsSGM::updateParameters() { + if (ssgm_ == nullptr) { return false; } + return this->ssgm_->updateParameters( + sgm::StereoSGM::Parameters(P1_, P2_, uniqueness_, true)); +} + +bool FixstarsSGM::apply(Frame &in, Frame &out, Source *src, cudaStream_t stream) { + if (!in.hasChannel(Channel::Left) || !in.hasChannel(Channel::Right)) { + return false; + } + + const auto &l = in.get<GpuMat>(Channel::Left); + const auto &r = in.get<GpuMat>(Channel::Right); + + if (l.size() != size_) { + size_ = l.size(); + if (!init()) { return false; } + } + + auto &disp = out.create<GpuMat>(Channel::Disparity, Format<float>(l.size())); + + auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + cv::cuda::cvtColor(l, lbw_, cv::COLOR_BGR2GRAY, 0, cvstream); + cv::cuda::cvtColor(r, rbw_, cv::COLOR_BGR2GRAY, 0, cvstream); + + cvstream.waitForCompletion(); + ssgm_->execute(lbw_.data, rbw_.data, disp_int_.data); + + // GpuMat left_pixels(dispt_, cv::Rect(0, 0, max_disp_, dispt_.rows)); + // left_pixels.setTo(0); + + cv::cuda::threshold(disp_int_, disp_int_, 4096.0f, 0.0f, cv::THRESH_TOZERO_INV, cvstream); + + disp_int_.convertTo(disp, CV_32F, 1.0f / 16.0f, cvstream); + return true; +} diff --git a/components/operators/src/disparity/optflow_smoothing.cpp b/components/operators/src/disparity/optflow_smoothing.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5304da7dc7fc4d7c5e0e4531c90851c3f41a37a4 --- /dev/null +++ b/components/operators/src/disparity/optflow_smoothing.cpp @@ -0,0 +1,95 @@ +#include <loguru.hpp> + +#include "ftl/operators/disparity.hpp" +#include "ftl/offilter.hpp" +#include "disparity/cuda.hpp" + +#ifdef HAVE_OPTFLOW + +using ftl::operators::OpticalFlowTemporalSmoothing; + +using ftl::codecs::Channel; +using ftl::rgbd::Frame; +using ftl::rgbd::Source; + +using cv::Mat; +using cv::Size; + +using std::vector; + +template<typename T> static bool inline isValidDisparity(T d) { return (0.0 < d) && (d < 256.0); } // TODO + +OpticalFlowTemporalSmoothing::OpticalFlowTemporalSmoothing(ftl::Configurable* cfg) : + ftl::operators::Operator(cfg) { + + size_ = Size(0, 0); + + n_max_ = cfg->value("history_size", 7); + if (n_max_ < 1) { + LOG(WARNING) << "History length must be larger than 0, using default (0)"; + n_max_ = 7; + } + + if (n_max_ > 32) { + // TODO: cuda kernel uses fixed size buffer + LOG(WARNING) << "History length can't be larger than 32 (TODO)"; + n_max_ = 32; + } + + threshold_ = cfg->value("threshold", 1.0); + + cfg->on("threshold", [this, &cfg](const ftl::config::Event&) { + float threshold = cfg->value("threshold", 1.0); + if (threshold < 0.0) { + LOG(WARNING) << "invalid threshold " << threshold << ", value must be positive"; + } + else { + threshold_ = threshold; + init(); + } + }); + + cfg->on("history_size", [this, &cfg](const ftl::config::Event&) { + int n_max = cfg->value("history_size", 1.0); + + if (n_max < 1) { + LOG(WARNING) << "History length must be larger than 0"; + } + else if (n_max_ > 32) { + // TODO: cuda kernel uses fixed size buffer + LOG(WARNING) << "History length can't be larger than 32 (TODO)"; + } + else { + n_max_ = n_max; + init(); + } + }); +} + +OpticalFlowTemporalSmoothing::~OpticalFlowTemporalSmoothing() {} + +bool OpticalFlowTemporalSmoothing::init() { + if (size_ == Size(0, 0)) { return false; } + history_.create(cv::Size(size_.width * n_max_, size_.height), CV_32FC1); + history_.setTo(0.0); + return true; +} + +bool OpticalFlowTemporalSmoothing::apply(Frame &in, Frame &out, Source *src, cudaStream_t stream) { + if (!out.hasChannel(channel_) || !in.hasChannel(Channel::Flow)) { return false; } + + auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + const cv::cuda::GpuMat &optflow = in.get<cv::cuda::GpuMat>(Channel::Flow); + cv::cuda::GpuMat &data = out.get<cv::cuda::GpuMat>(channel_); + + if (data.size() != size_) { + size_ = data.size(); + if (!init()) { return false; } + } + + ftl::cuda::optflow_filter(data, optflow, history_, n_max_, threshold_, cvstream); + + return true; +} + +#endif // HAVE_OPTFLOW diff --git a/components/rgbd-sources/src/algorithms/offilter.cu b/components/operators/src/disparity/optflow_smoothing.cu similarity index 97% rename from components/rgbd-sources/src/algorithms/offilter.cu rename to components/operators/src/disparity/optflow_smoothing.cu index 6feee5ae1daf9fc8b4b165370dbb8646b1d7b8a1..a7633036fae1e9d20c6a52ae5617466f1b234202 100644 --- a/components/rgbd-sources/src/algorithms/offilter.cu +++ b/components/operators/src/disparity/optflow_smoothing.cu @@ -1,7 +1,9 @@ #include <ftl/cuda_common.hpp> #include <ftl/rgbd/camera.hpp> #include <opencv2/core/cuda_stream_accessor.hpp> -#include <qsort.h> + +#include "disparity/qsort.h" +#include "disparity/cuda.hpp" __device__ void quicksort(float A[], size_t n) { diff --git a/components/rgbd-sources/include/qsort.h b/components/operators/src/disparity/qsort.h similarity index 100% rename from components/rgbd-sources/include/qsort.h rename to components/operators/src/disparity/qsort.h diff --git a/components/operators/src/filling.cpp b/components/operators/src/filling.cpp new file mode 100644 index 0000000000000000000000000000000000000000..da7efea6bdf085eb797bc56500f4e783e30f6162 --- /dev/null +++ b/components/operators/src/filling.cpp @@ -0,0 +1,52 @@ +#include <ftl/operators/filling.hpp> + +#include "filling_cuda.hpp" + +using ftl::operators::ScanFieldFill; +using ftl::operators::CrossSupportFill; +using ftl::codecs::Channel; + +ScanFieldFill::ScanFieldFill(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +ScanFieldFill::~ScanFieldFill() { + +} + +bool ScanFieldFill::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) { + float thresh = config()->value("threshold", 0.1f); + + ftl::cuda::scan_field_fill( + in.createTexture<float>(Channel::Depth), + in.createTexture<float>(Channel::Depth), + in.createTexture<float>(Channel::Smoothing), + thresh, + s->parameters(), stream + ); + + return true; +} + + +CrossSupportFill::CrossSupportFill(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +CrossSupportFill::~CrossSupportFill() { + +} + +bool CrossSupportFill::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) { + + /*ftl::cuda::filling_csr( + in.createTexture<uchar4>(Channel::Colour2), + in.createTexture<float4>(Channel::Normals), + in.createTexture<float>(Channel::Depth), + in.createTexture<float>(Channel::Depth), + in.createTexture<uchar4>(Channel::Colour), + s->parameters(), 0 + );*/ + + return true; +} diff --git a/components/operators/src/filling.cu b/components/operators/src/filling.cu new file mode 100644 index 0000000000000000000000000000000000000000..7780530ff69ab178c46803a38f05de85d037f033 --- /dev/null +++ b/components/operators/src/filling.cu @@ -0,0 +1,201 @@ +#include "filling_cuda.hpp" + +#define T_PER_BLOCK 8 +#define WARP_SIZE 32 +#define MAX_EDGES 128 + +using ftl::cuda::TextureObject; + +__device__ inline bool isValid(const ftl::rgbd::Camera &camera, float d) { + return d > camera.minDepth && d < camera.maxDepth; +} + +__global__ void scan_field_fill_kernel( + TextureObject<float> depth_in, // Virtual depth map + TextureObject<float> depth_out, // Accumulated output + TextureObject<float> smoothing, + float thresh, + ftl::rgbd::Camera camera) { + + __shared__ int counter; + __shared__ int offsets[MAX_EDGES]; + + //const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (y >= depth_in.height()) return; + if (threadIdx.x == 0) counter = 0; + __syncthreads(); + + for (STRIDE_X(x,depth_in.width()-1)) { + + // Use entire block to find depth edges and make a list of them + float d0 = depth_in.tex2D(x,y); + float d1 = depth_in.tex2D(x+1,y); + + if (isValid(camera, d0) != isValid(camera, d1)) { + int ix = atomicAdd(&counter, 1); + if (ix >= MAX_EDGES) break; + offsets[ix] = x; + } + } + + __syncthreads(); + + // For each identified pixel, a thread takes on a filling job + for (int i=threadIdx.x; i<counter; i += blockDim.x) { + // Get smoothing gradient to decide action + int x = offsets[i]; + + float s0 = smoothing.tex2D(x,y); + float d0 = depth_in.tex2D(x,y); + float d1 = depth_in.tex2D(x+1,y); + + // More than 8 pixels from edge + if (s0 < thresh) continue; + //if (s0 < 2.0f / 100.0f) continue; + + /*bool increase = false; + for (int u=1; u<20; ++u) { + float s1 = smoothing.tex2D(x+u,y); + if (s1 != s0) { + increase = s1 > s0; + break; + } + }*/ + + // TODO: Smoothing channel has discrete steps meaning gradient can't be determined? CHECK + // The above would be why some slices are not filled + + //bool fill_right = (isValid(camera, d0) && !increase); + //bool clear_right = (isValid(camera, d1) && !increase); + //bool fill_left = (isValid(camera, d1) && increase); + //bool clear_left = (isValid(camera, d0) && increase); + //bool clear = clear_left || clear_right; + + bool fill_right = isValid(camera, d0); + + //if (s1 == s0 || max(s1,s0) > 0.1f) continue; + + // Set up fill value and direction + //float value = ((clear_left || clear_right) && s0 < thresh) ? 1000.0f : (isValid(camera, d0) ? d0 : d1); + //int dir = (fill_right || clear_right) ? 1 : -1; + + float value = (isValid(camera, d0)) ? d0 : d1; + int dir = (fill_right) ? 1 : -1; + + // TODO: The minimum needs to be search for and not crossed into, the filling + // must stop before the minimum. Requires lookahead. + + /*int end_x = 0; + for (int u=1; u<1000; ++u) { + float s = smoothing(x+u*dir, y); + if (s < thresh) break; + end_x = u; + } + + float gradient = */ + + // Fill + int end_x = 0; + float end_d; + for (int u=1; u<1000; ++u) { + end_d = depth_in(x+u*dir, y); + float s = smoothing(x+u*dir, y); + if ((s < thresh) || (isValid(camera, end_d))) break; + end_x = u; + } + + float gradient = (end_d - value) / (float)end_x; + + for (int u=1; u<end_x; ++u) { + depth_out(x+u*dir,y) = value; + value += gradient; + } + } +} + +void ftl::cuda::scan_field_fill( + TextureObject<float> &depth_in, // Virtual depth map + TextureObject<float> &depth_out, // Accumulated output + TextureObject<float> &smoothing, + float thresh, + const ftl::rgbd::Camera &camera, + cudaStream_t stream) { + + const dim3 gridSize(1, smoothing.height()); + const dim3 blockSize(128, 1); + + scan_field_fill_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, smoothing, thresh, camera); + cudaSafeCall( cudaGetLastError() ); +} + + +//===== Cross Support Region Filling =========================================== + +__global__ void filling_csr_kernel( + TextureObject<uchar4> region, + TextureObject<float4> normals_in, + TextureObject<float> depth_in, // Virtual depth map + TextureObject<float> depth_out, // Accumulated output + TextureObject<uchar4> colour_in, + ftl::rgbd::Camera camera) { + + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return; + + float3 aX = make_float3(0.0f,0.0f,0.0f); + float3 nX = make_float3(0.0f,0.0f,0.0f); + float contrib = 0.0f; + + float d0 = depth_in.tex2D(x, y); + //depth_out(x,y) = d0; + //normals_out(x,y) = normals_in(x,y); + if (d0 < camera.minDepth || d0 > camera.maxDepth) return; + float3 X = camera.screenToCam((int)(x),(int)(y),d0); + + uchar4 c0 = colour_in.tex2D(x, y); + + // Neighbourhood + uchar4 base = region.tex2D(x,y); + + for (int v=-base.z; v<=base.w; ++v) { + uchar4 baseY = region.tex2D(x,y+v); + + for (int u=-baseY.x; u<=baseY.y; ++u) { + const float d = depth_in.tex2D(x+u, y+v); + if (d < camera.minDepth || d > camera.maxDepth) continue; + + // Point and normal of neighbour + const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y)+v,d); + const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y)+v)); + + + } + } + + +} + +void ftl::cuda::filling_csr( + ftl::cuda::TextureObject<uchar4> ®ion, + ftl::cuda::TextureObject<float4> &normals_in, + ftl::cuda::TextureObject<float> &depth_in, + ftl::cuda::TextureObject<float> &depth_out, + ftl::cuda::TextureObject<uchar4> &colour_in, + const ftl::rgbd::Camera &camera, + cudaStream_t stream) { + + const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + filling_csr_kernel<<<gridSize, blockSize, 0, stream>>>(region, normals_in, depth_in, depth_out, colour_in, camera); + cudaSafeCall( cudaGetLastError() ); + + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + #endif +} diff --git a/components/operators/src/filling_cuda.hpp b/components/operators/src/filling_cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e47781ac7fdd3656809de496dad21d1bb3a48862 --- /dev/null +++ b/components/operators/src/filling_cuda.hpp @@ -0,0 +1,30 @@ +#ifndef _FTL_CUDA_FILLING_HPP_ +#define _FTL_CUDA_FILLING_HPP_ + +#include <ftl/rgbd/camera.hpp> +#include <ftl/cuda_common.hpp> + +namespace ftl { +namespace cuda { + +void scan_field_fill( + ftl::cuda::TextureObject<float> &depth_in, + ftl::cuda::TextureObject<float> &depth_out, + ftl::cuda::TextureObject<float> &smoothing, + float thresh, + const ftl::rgbd::Camera &camera, + cudaStream_t stream); + +void filling_csr( + ftl::cuda::TextureObject<uchar4> ®ion, + ftl::cuda::TextureObject<float4> &normals_in, + ftl::cuda::TextureObject<float> &depth_in, + ftl::cuda::TextureObject<float> &depth_out, + ftl::cuda::TextureObject<uchar4> &colour_in, + const ftl::rgbd::Camera &camera, + cudaStream_t stream); + +} +} + +#endif \ No newline at end of file diff --git a/components/operators/src/mask.cpp b/components/operators/src/mask.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f923f11d06a39df882eaf83289b37296aba0ada5 --- /dev/null +++ b/components/operators/src/mask.cpp @@ -0,0 +1,49 @@ +#include <ftl/operators/mask.hpp> +#include "mask_cuda.hpp" + +using ftl::operators::DiscontinuityMask; +using ftl::operators::CullDiscontinuity; +using ftl::codecs::Channel; +using ftl::rgbd::Format; + +DiscontinuityMask::DiscontinuityMask(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +DiscontinuityMask::~DiscontinuityMask() { + +} + +bool DiscontinuityMask::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) { + int radius = config()->value("radius", 2); + float threshold = config()->value("threshold", 0.1f); + + ftl::cuda::discontinuity( + out.createTexture<int>(Channel::Mask, ftl::rgbd::Format<int>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + in.createTexture<uchar4>(Channel::Support1), + in.createTexture<float>(Channel::Depth), + s->parameters(), radius, threshold, stream + ); + + return true; +} + + + +CullDiscontinuity::CullDiscontinuity(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +CullDiscontinuity::~CullDiscontinuity() { + +} + +bool CullDiscontinuity::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) { + ftl::cuda::cull_discontinuity( + in.createTexture<int>(Channel::Mask), + out.createTexture<float>(Channel::Depth), + stream + ); + + return true; +} \ No newline at end of file diff --git a/components/operators/src/mask.cu b/components/operators/src/mask.cu new file mode 100644 index 0000000000000000000000000000000000000000..e385f41b14459802dbf52ef85aef2f891eceff08 --- /dev/null +++ b/components/operators/src/mask.cu @@ -0,0 +1,80 @@ +#include "mask_cuda.hpp" + +#define T_PER_BLOCK 8 + +using ftl::cuda::Mask; + +__global__ void discontinuity_kernel(ftl::cuda::TextureObject<int> mask_out, ftl::cuda::TextureObject<uchar4> support, ftl::cuda::TextureObject<float> depth, ftl::rgbd::Camera params, float threshold, int radius) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < params.width && y < params.height) { + Mask mask(0); + + const float d = depth.tex2D((int)x, (int)y); + + if (d >= params.minDepth && d <= params.maxDepth) { + /* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. */ + + // If colour cross support region terminates within the requested + // radius, and the absolute depth difference on the other side is + // greater than threshold, then is is a discontinuity. + // Repeat for left, right, up and down. + const uchar4 sup = support.tex2D((int)x, (int)y); + if (sup.x <= radius) { + float dS = depth.tex2D((int)x - sup.x - radius, (int)y); + if (fabs(dS - d) > threshold) mask.isDiscontinuity(true); + } + if (sup.y <= radius) { + float dS = depth.tex2D((int)x + sup.y + radius, (int)y); + if (fabs(dS - d) > threshold) mask.isDiscontinuity(true); + } + if (sup.z <= radius) { + float dS = depth.tex2D((int)x, (int)y - sup.z - radius); + if (fabs(dS - d) > threshold) mask.isDiscontinuity(true); + } + if (sup.w <= radius) { + float dS = depth.tex2D((int)x, (int)y + sup.w + radius); + if (fabs(dS - d) > threshold) mask.isDiscontinuity(true); + } + } + + mask_out(x,y) = (int)mask; + } +} + +void ftl::cuda::discontinuity(ftl::cuda::TextureObject<int> &mask_out, ftl::cuda::TextureObject<uchar4> &support, ftl::cuda::TextureObject<float> &depth, const ftl::rgbd::Camera ¶ms, int discon, float thresh, cudaStream_t stream) { + const dim3 gridSize((params.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (params.height + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + discontinuity_kernel<<<gridSize, blockSize, 0, stream>>>(mask_out, support, depth, params, thresh, discon); + cudaSafeCall( cudaGetLastError() ); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif +} + + + +__global__ void cull_discontinuity_kernel(ftl::cuda::TextureObject<int> mask, ftl::cuda::TextureObject<float> depth) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < depth.width() && y < depth.height()) { + Mask m(mask.tex2D((int)x,(int)y)); + if (m.isDiscontinuity()) depth(x,y) = 1000.0f; + } +} + +void ftl::cuda::cull_discontinuity(ftl::cuda::TextureObject<int> &mask, ftl::cuda::TextureObject<float> &depth, cudaStream_t stream) { + const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + cull_discontinuity_kernel<<<gridSize, blockSize, 0, stream>>>(mask, depth); + cudaSafeCall( cudaGetLastError() ); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif +} diff --git a/components/operators/src/mask_cuda.hpp b/components/operators/src/mask_cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6a02aafdbbdfbbbd200355bddc3c7ba33a605483 --- /dev/null +++ b/components/operators/src/mask_cuda.hpp @@ -0,0 +1,59 @@ +#ifndef _FTL_CUDA_MASK_HPP_ +#define _FTL_CUDA_MASK_HPP_ + +#include <ftl/cuda_common.hpp> +#include <ftl/rgbd/camera.hpp> + +namespace ftl { +namespace cuda { + +/** + * Wrap an int mask value used to flag individual depth pixels. + */ +class Mask { + public: + __device__ inline Mask() : v_(0) {} + __device__ explicit inline Mask(int v) : v_(v) {} + #ifdef __CUDACC__ + __device__ inline Mask(const ftl::cuda::TextureObject<int> &m, int x, int y) : v_(m.tex2D(x,y)) {} + #endif + __device__ inline operator int() const { return v_; } + + __device__ inline bool is(int m) const { return v_ & m; } + + __device__ inline bool isFilled() const { return v_ & kMask_Filled; } + __device__ inline bool isDiscontinuity() const { return v_ & kMask_Discontinuity; } + __device__ inline bool hasCorrespondence() const { return v_ & kMask_Correspondence; } + __device__ inline bool isBad() const { return v_ & kMask_Bad; } + + __device__ inline void isFilled(bool v) { v_ = (v) ? v_ | kMask_Filled : v_ & (~kMask_Filled); } + __device__ inline void isDiscontinuity(bool v) { v_ = (v) ? v_ | kMask_Discontinuity : v_ & (~kMask_Discontinuity); } + __device__ inline void hasCorrespondence(bool v) { v_ = (v) ? v_ | kMask_Correspondence : v_ & (~kMask_Correspondence); } + __device__ inline void isBad(bool v) { v_ = (v) ? v_ | kMask_Bad : v_ & (~kMask_Bad); } + + static constexpr int kMask_Filled = 0x0001; + static constexpr int kMask_Discontinuity = 0x0002; + static constexpr int kMask_Correspondence = 0x0004; + static constexpr int kMask_Bad = 0x0008; + + private: + int v_; +}; + +void discontinuity( + ftl::cuda::TextureObject<int> &mask, + ftl::cuda::TextureObject<uchar4> &support, + ftl::cuda::TextureObject<float> &depth, + const ftl::rgbd::Camera ¶ms, + int radius, float threshold, + cudaStream_t stream); + +void cull_discontinuity( + ftl::cuda::TextureObject<int> &mask, + ftl::cuda::TextureObject<float> &depth, + cudaStream_t stream); + +} +} + +#endif diff --git a/components/operators/src/mls.cu b/components/operators/src/mls.cu index e030d1967695bd71760c0bc2af26706d21f0f2e3..e07178cdf9606444c307f59a2adbb8818b049ed4 100644 --- a/components/operators/src/mls.cu +++ b/components/operators/src/mls.cu @@ -5,6 +5,7 @@ using ftl::cuda::TextureObject; #define T_PER_BLOCK 8 +#define WARP_SIZE 32 // ===== MLS Smooth ============================================================ @@ -131,10 +132,11 @@ void ftl::cuda::mls_smooth( float d0 = depth_in.tex2D(x, y); depth_out(x,y) = d0; + normals_out(x,y) = normals_in(x,y); if (d0 < camera.minDepth || d0 > camera.maxDepth) return; float3 X = camera.screenToCam((int)(x),(int)(y),d0); - uchar4 c0 = colour_in.tex2D(x, y); + float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f); // Neighbourhood for (int v=-SEARCH_RADIUS; v<=SEARCH_RADIUS; ++v) { @@ -146,7 +148,9 @@ void ftl::cuda::mls_smooth( const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y)+v,d); const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y)+v)); - const uchar4 c = colour_in.tex2D(x+u, y+v); + if (Ni.x+Ni.y+Ni.z == 0.0f) continue; + + const float4 c = colour_in.tex2D(float(x+u) + 0.5f, float(y+v) + 0.5f); const float cw = ftl::cuda::colourWeighting(c0,c,colour_smoothing); // Gauss approx weighting function using point distance @@ -157,6 +161,8 @@ void ftl::cuda::mls_smooth( contrib += w; } } + + if (contrib == 0.0f) return; nX /= contrib; // Weighted average normal aX /= contrib; // Weighted average point (centroid) @@ -207,6 +213,436 @@ void ftl::cuda::colour_mls_smooth( } +// ===== Colour MLS using cross support region ================================= + +__device__ inline int segmentID(int u, int v) { + if (u < 0 && v < 0) return 0x001; + if (u > 0 && v < 0) return 0x002; + if (u > 0 && v > 0) return 0x004; + if (u < 0 && v > 0) return 0x008; + return 0; +} + +/* + * Smooth depth map using Moving Least Squares. This version uses colour + * similarity weights to adjust the spatial smoothing factor. It is naive in + * that similar colours may exist on both sides of an edge and colours at the + * level of single pixels can be subject to noise. Colour noise should first + * be removed from the image. + */ + template <bool FILLING, int RADIUS> + __global__ void colour_mls_smooth_csr_kernel( + TextureObject<uchar4> region, + TextureObject<float4> normals_in, + TextureObject<float4> normals_out, + TextureObject<float> depth_in, // Virtual depth map + TextureObject<float> depth_out, // Accumulated output + TextureObject<uchar4> colour_in, + float smoothing, + float colour_smoothing, + ftl::rgbd::Camera camera) { + + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return; + + float3 aX = make_float3(0.0f,0.0f,0.0f); + float3 nX = make_float3(0.0f,0.0f,0.0f); + float contrib = 0.0f; + + float d0 = depth_in.tex2D(x, y); + depth_out(x,y) = d0; + normals_out(x,y) = normals_in(x,y); + if (d0 < camera.minDepth || d0 > camera.maxDepth) { + if(FILLING) d0 = 0.0f; + else return; + } + float3 X = camera.screenToCam((int)(x),(int)(y),d0); + + float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f); + + // Neighbourhood + uchar4 base = region.tex2D(x,y); + int segment_check = 0; + + // TODO: Does using a fixed loop size with range test work better? + // Or with warp per pixel version, this would be less of a problem... + // TODO: Do a separate vote fill step? + for (int v=-RADIUS; v<=RADIUS; ++v) { + uchar4 baseY = region.tex2D(x,y+v); + + #pragma unroll + for (int u=-RADIUS; u<=RADIUS; ++u) { + const float d = depth_in.tex2D(x+u, y+v); + //if (d > camera.minDepth && d < camera.maxDepth) { + + float w = (d <= camera.minDepth || d >= camera.maxDepth || u < -baseY.x || u > baseY.y || v < -base.z || v > base.z) ? 0.0f : 1.0f; + + // Point and normal of neighbour + const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y)+v,d); + const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y)+v)); + + // FIXME: Ensure bad normals are removed by setting depth invalid + //if (Ni.x+Ni.y+Ni.z == 0.0f) continue; + + const float4 c = colour_in.tex2D(float(x+u) + 0.5f, float(y+v) + 0.5f); + w *= ftl::cuda::colourWeighting(c0,c,colour_smoothing); + + // Allow missing point to borrow z value + // TODO: This is a bad choice of Z! Perhaps try histogram vote approach + //if (FILLING && d0 == 0.0f) X = camera.screenToCam((int)(x),(int)(y),Xi.z); + + // Gauss approx weighting function using point distance + w = ftl::cuda::spatialWeighting(X,Xi,smoothing*w); + + aX += Xi*w; + nX += Ni*w; + contrib += w; + //if (FILLING && w > 0.0f && v > -base.z+1 && v < base.w-1 && u > -baseY.x+1 && u < baseY.y-1) segment_check |= segmentID(u,v); + //} + } + } + + if (contrib > 0.0f) { + nX /= contrib; // Weighted average normal + aX /= contrib; // Weighted average point (centroid) + + if (FILLING && d0 == 0.0f) { + if (__popc(segment_check) < 3) return; + X = camera.screenToCam((int)(x),(int)(y),aX.z); + } + + // Signed-Distance Field function + float fX = nX.x * (X.x - aX.x) + nX.y * (X.y - aX.y) + nX.z * (X.z - aX.z); + + // Calculate new point using SDF function to adjust depth (and position) + X = X - nX * fX; + + //uint2 screen = camera.camToScreen<uint2>(X); + + //if (screen.x < depth_out.width() && screen.y < depth_out.height()) { + // depth_out(screen.x,screen.y) = X.z; + //} + depth_out(x,y) = X.z; + normals_out(x,y) = make_float4(nX / length(nX), 0.0f); + } +} + +void ftl::cuda::colour_mls_smooth_csr( + ftl::cuda::TextureObject<uchar4> ®ion, + ftl::cuda::TextureObject<float4> &normals_in, + ftl::cuda::TextureObject<float4> &normals_out, + ftl::cuda::TextureObject<float> &depth_in, + ftl::cuda::TextureObject<float> &depth_out, + ftl::cuda::TextureObject<uchar4> &colour_in, + float smoothing, + float colour_smoothing, + bool filling, + const ftl::rgbd::Camera &camera, + cudaStream_t stream) { + + const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + if (filling) { + colour_mls_smooth_csr_kernel<true,5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera); + } else { + colour_mls_smooth_csr_kernel<false,5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera); + } + + cudaSafeCall( cudaGetLastError() ); + + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + #endif +} + + +// ===== Cross Aggregate MLS =================================================== + +/* + * Smooth depth map using Moving Least Squares. This version uses colour + * similarity weights to adjust the spatial smoothing factor. It also uses + * cross support windows to prevent unwanted edge crossing. This function does + * the weighted aggregation of centroids and normals in the horizontal + * direction. + */ + template <int RADIUS> + __global__ void mls_aggr_horiz_kernel( + TextureObject<uchar4> region, + TextureObject<float4> normals_in, + TextureObject<float4> normals_out, + TextureObject<float> depth_in, // Virtual depth map + TextureObject<float4> centroid_out, // Accumulated output + TextureObject<uchar4> colour_in, + float smoothing, + float colour_smoothing, + ftl::rgbd::Camera camera) { + + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return; + + float3 aX = make_float3(0.0f,0.0f,0.0f); + float3 nX = make_float3(0.0f,0.0f,0.0f); + float contrib = 0.0f; + + float d0 = depth_in.tex2D(x, y); + + // Note: x and y flipped as output is rotated. + centroid_out(y,x) = make_float4(0.0f); + normals_out(y,x) = normals_in(x,y); + + if (d0 <= camera.minDepth || d0 >= camera.maxDepth) return; + + float3 X = camera.screenToCam((int)(x),(int)(y),d0); + float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f); + + // Cross-Support Neighbourhood + uchar4 base = region.tex2D(x,y); + + #pragma unroll + for (int u=-RADIUS; u<=RADIUS; ++u) { + const float d = depth_in.tex2D(x+u, y); + + // If outside of cross support range, set weight to 0 to ignore + float w = (d <= camera.minDepth || d >= camera.maxDepth || u < -base.x || u > base.y) ? 0.0f : 1.0f; + + // Point and normal of neighbour + const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y),d); + const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y))); + + // Bad or missing normals should be ignored + if (Ni.x+Ni.y+Ni.z == 0.0f) w = 0.0f; + + // Gauss approx colour weighting. + const float4 c = colour_in.tex2D(float(x+u) + 0.5f, float(y) + 0.5f); + w *= ftl::cuda::colourWeighting(c0,c,colour_smoothing); + + // Gauss approx weighting function using point distance + w = ftl::cuda::spatialWeighting(X,Xi,smoothing*w); + + aX += Xi*w; + nX += Ni*w; + contrib += w; + } + + if (contrib > 0.0f) { + nX /= contrib; // Weighted average normal + aX /= contrib; // Weighted average point (centroid) + + // Note: x and y flipped since output is rotated 90 degrees. + centroid_out(y,x) = make_float4(aX, 0.0f); + normals_out(y,x) = make_float4(nX / length(nX), 0.0f); + } +} + +/** + * This function does a vertical weighted aggregation of the centroids and + * normals generated by the horizontal aggregation. + */ +template <int RADIUS> + __global__ void mls_aggr_vert_kernel( + TextureObject<uchar4> region, + TextureObject<float4> normals_in, + TextureObject<float4> normals_out, + TextureObject<float4> centroid_in, // Virtual depth map + TextureObject<float4> centroid_out, // Accumulated output + TextureObject<uchar4> colour_in, + TextureObject<float> depth_in, + float smoothing, + float colour_smoothing, + ftl::rgbd::Camera camera) { + + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return; + + float3 aX = make_float3(0.0f,0.0f,0.0f); + float3 nX = make_float3(0.0f,0.0f,0.0f); + float contrib = 0.0f; + + float d0 = depth_in.tex2D(x, y); + if (d0 <= camera.minDepth || d0 >= camera.maxDepth) return; + + float3 X = camera.screenToCam((int)(x),(int)(y),d0); + + centroid_out(x,y) = make_float4(0.0f); + normals_out(x,y) = make_float4(0.0f); + + float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f); + + // Cross-Support Neighbourhood + uchar4 base = region.tex2D(x,y); + + #pragma unroll + for (int v=-RADIUS; v<=RADIUS; ++v) { + const float d = depth_in.tex2D(x, y+v); + const float3 Xi = camera.screenToCam(x,y+v,d); + + // Note: x and y flipped, input image is rotated. + float3 Ai = make_float3(centroid_in.tex2D(y+v, x)); + + // If outside the cross support range, set weight to 0 to ignore + float w = (Ai.z <= camera.minDepth || Ai.z >= camera.maxDepth || v < -base.z || v > base.w) ? 0.0f : 1.0f; + + // Note: x and y flipped, input image is rotated. + const float3 Ni = make_float3(normals_in.tex2D(y+v, x)); + + // Bad normals should be ignored. + if (Ni.x+Ni.y+Ni.z == 0.0f) w = 0.0f; + + // Gauss approx colour weighting. + const float4 c = colour_in.tex2D(float(x) + 0.5f, float(y+v) + 0.5f); + w *= ftl::cuda::colourWeighting(c0,c,colour_smoothing); + + // Gauss approx weighting function using point distance + w = ftl::cuda::spatialWeighting(X,Xi,smoothing*w); + + aX += Ai*w; + nX += Ni*w; + contrib += w; + } + + // Normalise the summed points and normals + if (contrib > 0.0f) { + nX /= contrib; // Weighted average normal + aX /= contrib; // Weighted average point (centroid) + centroid_out(x,y) = make_float4(aX, 0.0f); + normals_out(x,y) = make_float4(nX / length(nX), 0.0f); + } +} + +/** + * Using the aggregated centroids and normals, calculate the signed-distance- + * field and move the depth value accordingly using the calculated normal. + */ +__global__ void mls_adjust_depth_kernel( + TextureObject<float4> normals_in, + TextureObject<float4> centroid_in, // Virtual depth map + TextureObject<float> depth_in, + TextureObject<float> depth_out, + ftl::rgbd::Camera camera) { + + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < 0 || y < 0 || x >= depth_out.width() || y >= depth_out.height()) return; + + float3 aX = make_float3(centroid_in(x,y)); + float3 nX = make_float3(normals_in(x,y)); + + float d0 = depth_in.tex2D(x, y); + depth_out(x,y) = d0; + + if (d0 > camera.minDepth && d0 < camera.maxDepth && aX.z > camera.minDepth && aX.z < camera.maxDepth) { + float3 X = camera.screenToCam((int)(x),(int)(y),d0); + + // Signed-Distance Field function + float fX = nX.x * (X.x - aX.x) + nX.y * (X.y - aX.y) + nX.z * (X.z - aX.z); + + // Calculate new point using SDF function to adjust depth (and position) + X = X - nX * fX; + + depth_out(x,y) = X.z; + } +} + + +void ftl::cuda::mls_aggr_horiz( + ftl::cuda::TextureObject<uchar4> ®ion, + ftl::cuda::TextureObject<float4> &normals_in, + ftl::cuda::TextureObject<float4> &normals_out, + ftl::cuda::TextureObject<float> &depth_in, + ftl::cuda::TextureObject<float4> ¢roid_out, + ftl::cuda::TextureObject<uchar4> &colour_in, + float smoothing, + float colour_smoothing, + int radius, + const ftl::rgbd::Camera &camera, + cudaStream_t stream) { + + const dim3 gridSize((normals_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (normals_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + switch(radius) { + case 1: mls_aggr_horiz_kernel<1><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break; + case 2: mls_aggr_horiz_kernel<2><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break; + case 3: mls_aggr_horiz_kernel<3><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break; + case 5: mls_aggr_horiz_kernel<5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break; + case 10: mls_aggr_horiz_kernel<10><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break; + case 15: mls_aggr_horiz_kernel<15><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break; + case 20: mls_aggr_horiz_kernel<20><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break; + default: return; + } + cudaSafeCall( cudaGetLastError() ); + + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + #endif +} + +void ftl::cuda::mls_aggr_vert( + ftl::cuda::TextureObject<uchar4> ®ion, + ftl::cuda::TextureObject<float4> &normals_in, + ftl::cuda::TextureObject<float4> &normals_out, + ftl::cuda::TextureObject<float4> ¢roid_in, + ftl::cuda::TextureObject<float4> ¢roid_out, + ftl::cuda::TextureObject<uchar4> &colour_in, + ftl::cuda::TextureObject<float> &depth_in, + float smoothing, + float colour_smoothing, + int radius, + const ftl::rgbd::Camera &camera, + cudaStream_t stream) { + + const dim3 gridSize((normals_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (normals_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + switch(radius) { + case 1: mls_aggr_vert_kernel<1><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break; + case 2: mls_aggr_vert_kernel<2><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break; + case 3: mls_aggr_vert_kernel<3><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break; + case 5: mls_aggr_vert_kernel<5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break; + case 10: mls_aggr_vert_kernel<10><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break; + case 15: mls_aggr_vert_kernel<15><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break; + case 20: mls_aggr_vert_kernel<20><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break; + default: return; + } + cudaSafeCall( cudaGetLastError() ); + + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + #endif +} + +void ftl::cuda::mls_adjust_depth( + ftl::cuda::TextureObject<float4> &normals_in, + ftl::cuda::TextureObject<float4> ¢roid_in, + ftl::cuda::TextureObject<float> &depth_in, + ftl::cuda::TextureObject<float> &depth_out, + const ftl::rgbd::Camera &camera, + cudaStream_t stream) { + + const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + mls_adjust_depth_kernel<<<gridSize, blockSize, 0, stream>>>(normals_in, centroid_in, depth_in, depth_out, camera); + cudaSafeCall( cudaGetLastError() ); + + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + #endif +} + + // ===== Adaptive MLS Smooth ===================================================== /* diff --git a/components/operators/src/mvmls.cpp b/components/operators/src/mvmls.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4c02a7607f1d6a506c1f60cbd1f25eabd2cb5732 --- /dev/null +++ b/components/operators/src/mvmls.cpp @@ -0,0 +1,340 @@ +#include <ftl/operators/mvmls.hpp> +#include "smoothing_cuda.hpp" +#include <ftl/utility/matrix_conversion.hpp> +#include "mvmls_cuda.hpp" + +using ftl::operators::MultiViewMLS; +using ftl::codecs::Channel; +using cv::cuda::GpuMat; +using ftl::rgbd::Format; + +MultiViewMLS::MultiViewMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +MultiViewMLS::~MultiViewMLS() { + +} + +bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) { + cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + + float thresh = config()->value("mls_threshold", 0.4f); + float col_smooth = config()->value("mls_colour_smoothing", 30.0f); + int iters = config()->value("mls_iterations", 3); + int radius = config()->value("mls_radius",5); + //bool aggre = config()->value("aggregation", true); + int win = config()->value("cost_function",1); + bool do_corr = config()->value("merge_corresponding", true); + bool do_aggr = config()->value("merge_mls", false); + bool cull_zero = config()->value("cull_no_confidence", false); + //bool show_best_source = config()->value("show_pixel_source", false); + + ftl::cuda::MvMLSParams params; + params.range = config()->value("search_range", 0.05f); + params.fill_match = config()->value("fill_match", 50.0f); + params.fill_threshold = config()->value("fill_threshold", 0.0f); + params.match_threshold = config()->value("match_threshold", 0.3f); + params.colour_smooth = config()->value("colour_smooth", 150.0f); + params.spatial_smooth = config()->value("spatial_smooth", 0.04f); + params.cost_ratio = config()->value("cost_ratio", 0.2f); + params.cost_threshold = config()->value("cost_threshold", 1.0f); + + // Make sure we have enough buffers + if (normals_horiz_.size() < in.frames.size()) { + normals_horiz_.resize(in.frames.size()); + centroid_horiz_.resize(in.frames.size()); + centroid_vert_.resize(in.frames.size()); + contributions_.resize(in.frames.size()); + } + + // Make sure all buffers are at correct resolution and are allocated + for (size_t i=0; i<in.frames.size(); ++i) { + auto &f = in.frames[i]; + auto size = f.get<GpuMat>(Channel::Depth).size(); + centroid_horiz_[i].create(size.height, size.width); + normals_horiz_[i].create(size.height, size.width); + centroid_vert_[i].create(size.width, size.height); + contributions_[i].create(size.width, size.height); + + if (!f.hasChannel(Channel::Normals)) { + LOG(ERROR) << "Required normals channel missing for MLS"; + return false; + } + if (!f.hasChannel(Channel::Support1)) { + LOG(ERROR) << "Required cross support channel missing for MLS"; + return false; + } + + // Create required channels + f.create<GpuMat>(Channel::Confidence, Format<float>(size)); + f.createTexture<float>(Channel::Confidence); + f.create<GpuMat>(Channel::Screen, Format<short2>(size)); + f.createTexture<short2>(Channel::Screen); + } + + for (int iter=0; iter<iters; ++iter) { + // Step 1: + // Calculate correspondences and adjust depth values + // Step 2: + // Find corresponding points and perform aggregation of any correspondences + // For each camera combination + if (do_corr) { + for (size_t i=0; i<in.frames.size(); ++i) { + auto &f1 = in.frames[i]; + //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream); + f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); + + Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0); + d1 = in.sources[i]->getPose() * d1; + + for (size_t j=0; j<in.frames.size(); ++j) { + if (i == j) continue; + + //LOG(INFO) << "Running phase1"; + + auto &f2 = in.frames[j]; + auto s1 = in.sources[i]; + auto s2 = in.sources[j]; + + // Are cameras facing similar enough direction? + Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0); + d2 = in.sources[j]->getPose() * d2; + // No, so skip this combination + if (d1.dot(d2) <= 0.0) continue; + + auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>()); + auto pose1_inv = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse()); + auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse()); + auto pose2_inv = MatrixConversion::toCUDA(s2->getPose().cast<float>()); + + auto transform = pose2 * pose1; + + //Calculate screen positions of estimated corresponding points + ftl::cuda::correspondence( + f1.getTexture<float>(Channel::Depth), + f2.getTexture<float>(Channel::Depth), + f1.getTexture<uchar4>(Channel::Colour), + f2.getTexture<uchar4>(Channel::Colour), + // TODO: Add normals and other things... + f1.getTexture<short2>(Channel::Screen), + f1.getTexture<float>(Channel::Confidence), + f1.getTexture<int>(Channel::Mask), + pose1, + pose1_inv, + pose2, + s1->parameters(), + s2->parameters(), + params, + win, + stream + ); + + // Also calculate best source for each point + /*ftl::cuda::best_sources( + f1.getTexture<uchar4>(Channel::Support1), + f2.getTexture<uchar4>(Channel::Support1), + f1.getTexture<float>(Channel::Depth), + f2.getTexture<float>(Channel::Depth), + f1.getTexture<short2>(Channel::Screen), + transform, + s1->parameters(), + s2->parameters(), + i, j, stream + );*/ + } + } + } + + // Find best source for every pixel + /*for (size_t i=0; i<in.frames.size(); ++i) { + auto &f1 = in.frames[i]; + //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream); + //f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); + + Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0); + d1 = in.sources[i]->getPose() * d1; + + for (size_t j=0; j<in.frames.size(); ++j) { + if (i == j) continue; + + //LOG(INFO) << "Running phase1"; + + auto &f2 = in.frames[j]; + auto s1 = in.sources[i]; + auto s2 = in.sources[j]; + + // Are cameras facing similar enough direction? + Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0); + d2 = in.sources[j]->getPose() * d2; + // No, so skip this combination + if (d1.dot(d2) <= 0.0) continue; + + auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>()); + auto pose1_inv = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse()); + auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse()); + auto pose2_inv = MatrixConversion::toCUDA(s2->getPose().cast<float>()); + + auto transform = pose2 * pose1; + + // Also calculate best source for each point + ftl::cuda::best_sources( + f1.getTexture<float4>(Channel::Normals), + f2.getTexture<float4>(Channel::Normals), + f1.getTexture<uchar4>(Channel::Support1), + f2.getTexture<uchar4>(Channel::Support1), + f1.getTexture<float>(Channel::Depth), + f2.getTexture<float>(Channel::Depth), + f1.getTexture<short2>(Channel::Screen), + transform, + s1->parameters(), + s2->parameters(), + i, j, stream + ); + } + }*/ + + // Step 2: + // Do the horizontal and vertical MLS aggregations for each source + // But don't do the final move step. + for (size_t i=0; i<in.frames.size(); ++i) { + auto &f = in.frames[i]; + auto *s = in.sources[i]; + + // Clear data + cv::cuda::GpuMat data(contributions_[i].height(), contributions_[i].width(), CV_32F, contributions_[i].pixelPitch()); + data.setTo(cv::Scalar(0.0f), cvstream); + + if (cull_zero && iter == iters-1) { + ftl::cuda::zero_confidence( + f.getTexture<float>(Channel::Confidence), + f.getTexture<float>(Channel::Depth), + stream + ); + } + + ftl::cuda::mls_aggr_horiz( + f.createTexture<uchar4>(Channel::Support1), + f.createTexture<float4>(Channel::Normals), + normals_horiz_[i], + f.createTexture<float>(Channel::Depth), + centroid_horiz_[i], + f.createTexture<uchar4>(Channel::Colour), + thresh, + col_smooth, + radius, + s->parameters(), + stream + ); + + ftl::cuda::mls_aggr_vert( + f.getTexture<uchar4>(Channel::Support1), + normals_horiz_[i], + f.getTexture<float4>(Channel::Normals), + centroid_horiz_[i], + centroid_vert_[i], + f.getTexture<uchar4>(Channel::Colour), + f.getTexture<float>(Channel::Depth), + thresh, + col_smooth, + radius, + s->parameters(), + stream + ); + } + + // Step 3: + // Find corresponding points and perform aggregation of any correspondences + // For each camera combination + if (do_aggr) { + for (size_t i=0; i<in.frames.size(); ++i) { + auto &f1 = in.frames[i]; + //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream); + f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); + + Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0); + d1 = in.sources[i]->getPose() * d1; + + for (size_t j=0; j<in.frames.size(); ++j) { + if (i == j) continue; + + //LOG(INFO) << "Running phase1"; + + auto &f2 = in.frames[j]; + auto s1 = in.sources[i]; + auto s2 = in.sources[j]; + + // Are cameras facing similar enough direction? + Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0); + d2 = in.sources[j]->getPose() * d2; + // No, so skip this combination + if (d1.dot(d2) <= 0.0) continue; + + auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>()); + auto pose1_inv = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse()); + auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse()); + auto pose2_inv = MatrixConversion::toCUDA(s2->getPose().cast<float>()); + + auto transform = pose2 * pose1; + + // For the corresponding points, combine normals and centroids + ftl::cuda::aggregate_sources( + f1.getTexture<float4>(Channel::Normals), + f2.getTexture<float4>(Channel::Normals), + centroid_vert_[i], + centroid_vert_[j], + f1.getTexture<float>(Channel::Depth), + //contributions_[i], + //contributions_[j], + //f1.getTexture<short2>(Channel::Screen), + transform, + s1->parameters(), + s2->parameters(), + stream + ); + + //LOG(INFO) << "Correspondences done... " << i; + } + } + } + + // Step 3: + // Normalise aggregations and move the points + for (size_t i=0; i<in.frames.size(); ++i) { + auto &f = in.frames[i]; + auto *s = in.sources[i]; + auto size = f.get<GpuMat>(Channel::Depth).size(); + + /*if (do_corr) { + ftl::cuda::normalise_aggregations( + f.getTexture<float4>(Channel::Normals), + centroid_vert_[i], + contributions_[i], + stream + ); + }*/ + + ftl::cuda::mls_adjust_depth( + f.getTexture<float4>(Channel::Normals), + centroid_vert_[i], + f.getTexture<float>(Channel::Depth), + f.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(size)), + s->parameters(), + stream + ); + + f.swapChannels(Channel::Depth, Channel::Depth2); + + /*if (show_best_source) { + ftl::cuda::vis_best_sources( + f.getTexture<short2>(Channel::Screen), + f.getTexture<uchar4>(Channel::Colour), + i, + 7, stream + ); + }*/ + } + } + + return true; +} diff --git a/components/operators/src/mvmls_cuda.hpp b/components/operators/src/mvmls_cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..93b1e8d882848490aec96a291d58805c2d2dbf03 --- /dev/null +++ b/components/operators/src/mvmls_cuda.hpp @@ -0,0 +1,101 @@ +#ifndef _FTL_CUDA_MVMLS_HPP_ +#define _FTL_CUDA_MVMLS_HPP_ + +#include <ftl/rgbd/camera.hpp> +#include <ftl/cuda_common.hpp> +#include <ftl/cuda_matrix_util.hpp> + +namespace ftl { +namespace cuda { + +struct MvMLSParams { + float spatial_smooth; + float colour_smooth; + float fill_match; + float fill_threshold; + float match_threshold; + float cost_ratio; + float cost_threshold; + float range; + uint flags; +}; + +void correspondence( + ftl::cuda::TextureObject<float> &d1, + ftl::cuda::TextureObject<float> &d2, + ftl::cuda::TextureObject<uchar4> &c1, + ftl::cuda::TextureObject<uchar4> &c2, + ftl::cuda::TextureObject<short2> &screen, + ftl::cuda::TextureObject<float> &conf, + ftl::cuda::TextureObject<int> &mask, + float4x4 &pose1, + float4x4 &pose1_inv, + float4x4 &pose2, + const ftl::rgbd::Camera &cam1, + const ftl::rgbd::Camera &cam2, const ftl::cuda::MvMLSParams ¶ms, int func, + cudaStream_t stream); + +void zero_confidence( + ftl::cuda::TextureObject<float> &conf, + ftl::cuda::TextureObject<float> &depth, + cudaStream_t stream); + +/*void aggregate_sources( + ftl::cuda::TextureObject<float4> &n1, + ftl::cuda::TextureObject<float4> &n2, + ftl::cuda::TextureObject<float4> &c1, + ftl::cuda::TextureObject<float4> &c2, + ftl::cuda::TextureObject<float> &contribs1, + ftl::cuda::TextureObject<float> &contribs2, + ftl::cuda::TextureObject<short2> &screen, + //const float4x4 &pose1, + //const float4x4 &poseInv2, + const float4x4 &poseInv1, + const float4x4 &pose2, + cudaStream_t stream);*/ + +void aggregate_sources( + ftl::cuda::TextureObject<float4> &n1, + ftl::cuda::TextureObject<float4> &n2, + ftl::cuda::TextureObject<float4> &c1, + ftl::cuda::TextureObject<float4> &c2, + ftl::cuda::TextureObject<float> &depth1, + //ftl::cuda::TextureObject<float> &depth2, + //ftl::cuda::TextureObject<short2> &screen, + const float4x4 &transform, + const ftl::rgbd::Camera &cam1, + const ftl::rgbd::Camera &cam2, + cudaStream_t stream); + +void best_sources( + ftl::cuda::TextureObject<float4> &normals1, + ftl::cuda::TextureObject<float4> &normals2, + ftl::cuda::TextureObject<uchar4> &support1, + ftl::cuda::TextureObject<uchar4> &suppor2, + ftl::cuda::TextureObject<float> &depth1, + ftl::cuda::TextureObject<float> &depth2, + ftl::cuda::TextureObject<short2> &screen, + const float4x4 &transform, + const ftl::rgbd::Camera &cam1, + const ftl::rgbd::Camera &cam2, + int id1, + int id2, + cudaStream_t stream); + +void vis_best_sources( + ftl::cuda::TextureObject<short2> &screen, + ftl::cuda::TextureObject<uchar4> &colour, + int myid, + int count, + cudaStream_t stream); + +void normalise_aggregations( + ftl::cuda::TextureObject<float4> &norms, + ftl::cuda::TextureObject<float4> ¢s, + ftl::cuda::TextureObject<float> &contribs, + cudaStream_t stream); + +} +} + +#endif diff --git a/components/operators/src/normals.cpp b/components/operators/src/normals.cpp index 5f8554c2986f27de3fe5bd0ab2fb03fbf2226c35..57903aa1db741c0e99f51aedf5c162b694f011cb 100644 --- a/components/operators/src/normals.cpp +++ b/components/operators/src/normals.cpp @@ -28,7 +28,7 @@ bool Normals::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Sour ftl::cuda::normals( out.createTexture<float4>(Channel::Normals, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), in.createTexture<float>(Channel::Depth), - s->parameters(), 0 + s->parameters(), stream ); return true; diff --git a/components/operators/src/nvopticalflow.cpp b/components/operators/src/nvopticalflow.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a8b6d406bc7421d48d0cfb15de40d0eb48cafebe --- /dev/null +++ b/components/operators/src/nvopticalflow.cpp @@ -0,0 +1,48 @@ +#include <ftl/operators/opticalflow.hpp> + +using ftl::rgbd::Frame; +using ftl::rgbd::Source; +using ftl::codecs::Channel; + +using ftl::operators::NVOpticalFlow; + +using cv::Size; +using cv::cuda::GpuMat; + +NVOpticalFlow::NVOpticalFlow(ftl::Configurable* cfg) : + ftl::operators::Operator(cfg) { + size_ = Size(0, 0); +} + +NVOpticalFlow::~NVOpticalFlow() { +} + +bool NVOpticalFlow::init() { + nvof_ = cv::cuda::NvidiaOpticalFlow_1_0::create( + size_.width, size_.height, + cv::cuda::NvidiaOpticalFlow_1_0::NV_OF_PERF_LEVEL_SLOW, + true, false, false, 0); + + left_gray_.create(size_, CV_8UC1); + left_gray_prev_.create(size_, CV_8UC1); + return true; +} + +bool NVOpticalFlow::apply(Frame &in, Frame &out, Source *src, cudaStream_t stream) { + if (!in.hasChannel(channel_in_)) { return false; } + + if (in.get<GpuMat>(channel_in_).size() != size_) { + size_ = in.get<GpuMat>(channel_in_).size(); + init(); + } + + auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + auto &flow = out.create<GpuMat>(channel_out_); + + cv::cuda::cvtColor(in.get<GpuMat>(channel_in_), left_gray_, cv::COLOR_BGR2GRAY, 0, cvstream); + + nvof_->calc(left_gray_, left_gray_prev_, flow, cvstream); + std::swap(left_gray_, left_gray_prev_); + + return true; +} \ No newline at end of file diff --git a/components/operators/src/operator.cpp b/components/operators/src/operator.cpp index 91dada28b6adddfcdb3e71861aca07591c57fa9c..dd821ee6fdd2b967e5ac46f3a4f77c57d59aed6b 100644 --- a/components/operators/src/operator.cpp +++ b/components/operators/src/operator.cpp @@ -31,33 +31,56 @@ bool Operator::apply(FrameSet &in, Frame &out, Source *os, cudaStream_t stream) Graph::Graph(nlohmann::json &config) : ftl::Configurable(config) { - + cudaSafeCall( cudaStreamCreate(&stream_) ); } Graph::~Graph() { - + cudaStreamDestroy(stream_); } bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) { if (!value("enabled", true)) return false; + auto stream_actual = (stream == 0) ? stream_ : stream; + if (in.frames.size() != out.frames.size()) return false; for (auto &i : operators_) { - // Make sure there are enough instances - while (i.instances.size() < in.frames.size()) { + if (i.instances.size() < 1) { i.instances.push_back(i.maker->make()); } - for (int j=0; j<in.frames.size(); ++j) { - auto *instance = i.instances[j]; + if (i.instances[0]->type() == Operator::Type::OneToOne) { + // Make sure there are enough instances + while (i.instances.size() < in.frames.size()) { + i.instances.push_back(i.maker->make()); + } + + for (int j=0; j<in.frames.size(); ++j) { + auto *instance = i.instances[j]; + + if (instance->enabled()) { + instance->apply(in.frames[j], out.frames[j], in.sources[j], stream_actual); + } + } + } else if (i.instances[0]->type() == Operator::Type::ManyToMany) { + auto *instance = i.instances[0]; if (instance->enabled()) { - instance->apply(in.frames[j], out.frames[j], in.sources[j], stream); + try { + instance->apply(in, out, stream_actual); + } catch (const std::exception &e) { + LOG(ERROR) << "Operator exception: " << e.what(); + } } } } + if (stream == 0) { + cudaStreamSynchronize(stream_actual); + cudaSafeCall( cudaGetLastError() ); + } + return true; } diff --git a/components/operators/src/segmentation.cpp b/components/operators/src/segmentation.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fb015b6b16e1aa7e1dcac9735103c8c6af41567e --- /dev/null +++ b/components/operators/src/segmentation.cpp @@ -0,0 +1,98 @@ +#include <ftl/operators/segmentation.hpp> +#include "segmentation_cuda.hpp" + +using ftl::operators::CrossSupport; +using ftl::operators::VisCrossSupport; +using ftl::codecs::Channel; + +CrossSupport::CrossSupport(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +CrossSupport::~CrossSupport() { + +} + +bool CrossSupport::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) { + bool use_depth = config()->value("depth_region", false); + + if (use_depth) { + ftl::cuda::support_region( + in.createTexture<float>(Channel::Depth), + out.createTexture<uchar4>(Channel::Support2, ftl::rgbd::Format<uchar4>(in.get<cv::cuda::GpuMat>(Channel::Colour).size())), + config()->value("depth_tau", 0.04f), + config()->value("v_max", 5), + config()->value("h_max", 5), + config()->value("symmetric", true), stream + ); + } //else { + ftl::cuda::support_region( + in.createTexture<uchar4>(Channel::Colour), + out.createTexture<uchar4>(Channel::Support1, ftl::rgbd::Format<uchar4>(in.get<cv::cuda::GpuMat>(Channel::Colour).size())), + config()->value("tau", 5.0f), + config()->value("v_max", 5), + config()->value("h_max", 5), + config()->value("symmetric", true), stream + ); + //} + + return true; +} + + + + +VisCrossSupport::VisCrossSupport(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +VisCrossSupport::~VisCrossSupport() { + +} + +bool VisCrossSupport::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) { + bool show_depth = false; + if (in.hasChannel(Channel::Support2) && config()->value("show_depth_support", false)) { + show_depth = true; + } + + bool show_bad = config()->value("show_bad", false) && in.hasChannel(Channel::Support2); + + if (show_bad) { + ftl::cuda::vis_bad_region( + in.createTexture<uchar4>(Channel::Colour), + in.createTexture<float>(Channel::Depth), + in.createTexture<uchar4>(Channel::Support1), + in.createTexture<uchar4>(Channel::Support2), + stream + ); + } else { + ftl::cuda::vis_support_region( + in.createTexture<uchar4>(Channel::Colour), + in.createTexture<uchar4>(Channel::Support1), + make_uchar4(0,0,255,0), + make_uchar4(255,0,0,0), + config()->value("offset_x", 0), + config()->value("offset_y", 0), + config()->value("spacing_x", 50), + config()->value("spacing_y", 50), + stream + ); + + if (show_depth) { + ftl::cuda::vis_support_region( + in.createTexture<uchar4>(Channel::Colour), + in.createTexture<uchar4>(Channel::Support2), + make_uchar4(0,0,255,0), + make_uchar4(0,255,0,0), + config()->value("offset_x", 0), + config()->value("offset_y", 0), + config()->value("spacing_x", 50), + config()->value("spacing_y", 50), + stream + ); + } + } + + return true; +} \ No newline at end of file diff --git a/components/operators/src/segmentation.cu b/components/operators/src/segmentation.cu new file mode 100644 index 0000000000000000000000000000000000000000..c16e647931f7d4c4d92551f44f6242fc415bd91d --- /dev/null +++ b/components/operators/src/segmentation.cu @@ -0,0 +1,253 @@ +#include "segmentation_cuda.hpp" + +#define T_PER_BLOCK 8 + +using ftl::cuda::TextureObject; + +template <typename T> +__device__ inline float cross(T p1, T p2); + +template <> +__device__ inline float cross<uchar4>(uchar4 p1, uchar4 p2) { + return max(max(__sad(p1.x,p2.x,0),__sad(p1.y,p2.y,0)), __sad(p1.z,p2.z,0)); +} + +template <> +__device__ inline float cross<float4>(float4 p1, float4 p2) { + return max(max(fabsf(p1.x - p2.x),fabsf(p1.y - p2.y)), fabsf(p1.z - p2.z)); +} + +template <> +__device__ inline float cross<float>(float p1, float p2) { + return fabs(p1-p2); +} + +template <typename T, bool SYM> +__device__ uchar4 calculate_support_region(const TextureObject<T> &img, int x, int y, float tau, int v_max, int h_max) { + int x_min = max(0, x - h_max); + int x_max = min(img.width()-1, x + h_max); + int y_min = max(0, y - v_max); + int y_max = min(img.height()-1, y + v_max); + + uchar4 result = make_uchar4(0, 0, 0, 0); + + auto colour = img.tex2D((float)x+0.5f,(float)y+0.5f); + auto prev_colour = colour; + + int u; + for (u=x-1; u >= x_min; --u) { + auto next_colour = img.tex2D((float)u+0.5f,(float)y+0.5f); + if (cross(prev_colour, next_colour) > tau) { + result.x = x - u - 1; + break; + } + prev_colour = next_colour; + } + if (u < x_min) result.x = x - x_min; + + prev_colour = colour; + for (u=x+1; u <= x_max; ++u) { + auto next_colour = img.tex2D((float)u+0.5f,(float)y+0.5f); + if (cross(prev_colour, next_colour) > tau) { + result.y = u - x - 1; + break; + } + prev_colour = next_colour; + } + if (u > x_max) result.y = x_max - x; + + int v; + prev_colour = colour; + for (v=y-1; v >= y_min; --v) { + auto next_colour = img.tex2D((float)x+0.5f,(float)v+0.5f); + if (cross(prev_colour, next_colour) > tau) { + result.z = y - v - 1; + break; + } + prev_colour = next_colour; + } + if (v < y_min) result.z = y - y_min; + + prev_colour = colour; + for (v=y+1; v <= y_max; ++v) { + auto next_colour = img.tex2D((float)x+0.5f,(float)v+0.5f); + if (cross(prev_colour, next_colour) > tau) { + result.w = v - y - 1; + break; + } + prev_colour = next_colour; + } + if (v > y_max) result.w = y_max - y; + + // Make symetric left/right and up/down + if (SYM) { + result.x = min(result.x, result.y); + result.y = result.x; + result.z = min(result.z, result.w); + result.w = result.z; + } + return result; +} + +template <typename T, bool SYM> +__global__ void support_region_kernel(TextureObject<T> img, TextureObject<uchar4> region, float tau, int v_max, int h_max) { + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < 0 || y < 0 || x >= img.width() || y >= img.height()) return; + + region(x,y) = calculate_support_region<T,SYM>(img, x, y, tau, v_max, h_max); +} + +void ftl::cuda::support_region( + ftl::cuda::TextureObject<uchar4> &colour, + ftl::cuda::TextureObject<uchar4> ®ion, + float tau, + int v_max, + int h_max, + bool sym, + cudaStream_t stream) { + + const dim3 gridSize((region.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (region.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + if (sym) support_region_kernel<uchar4, true><<<gridSize, blockSize, 0, stream>>>(colour, region, tau, v_max, h_max); + else support_region_kernel<uchar4, false><<<gridSize, blockSize, 0, stream>>>(colour, region, tau, v_max, h_max); + cudaSafeCall( cudaGetLastError() ); + + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + #endif +} + +void ftl::cuda::support_region( + ftl::cuda::TextureObject<float> &depth, + ftl::cuda::TextureObject<uchar4> ®ion, + float tau, + int v_max, + int h_max, + bool sym, + cudaStream_t stream) { + + const dim3 gridSize((region.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (region.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + support_region_kernel<float, true><<<gridSize, blockSize, 0, stream>>>(depth, region, tau, v_max, h_max); + cudaSafeCall( cudaGetLastError() ); + + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + #endif +} + +__global__ void vis_support_region_kernel(TextureObject<uchar4> colour, TextureObject<uchar4> region, uchar4 bcolour, uchar4 acolour, + int ox, int oy, int dx, int dy) { + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < 0 || y < 0 || x >= colour.width() || y >= colour.height()) return; + + // Grid pattern + if (x % dx != ox || y % dy != oy) return; + + uchar4 base = region.tex2D(x,y); + + // Edge pattern + //if (base.x != 1) return; + + for (int v=-base.z; v<=base.w; ++v) { + uchar4 baseY = region.tex2D(x,y+v); + + for (int u=-baseY.x; u<=baseY.y; ++u) { + if (x+u < 0 || y+v < 0 || x+u >= colour.width() || y+v >= colour.height()) continue; + auto col = colour.tex2D(float(x+u)+0.5f, float(y+v)+0.5f); + colour(x+u, y+v) = (u==0 || v == 0) ? + make_uchar4(max(bcolour.x, (unsigned char)col.x), max(bcolour.y, (unsigned char)col.y), max(bcolour.z, (unsigned char)col.z), 0) : + make_uchar4(max(acolour.x, (unsigned char)col.x), max(acolour.y, (unsigned char)col.y), max(acolour.z, (unsigned char)col.z), 0); + } + } +} + +void ftl::cuda::vis_support_region( + ftl::cuda::TextureObject<uchar4> &colour, + ftl::cuda::TextureObject<uchar4> ®ion, + uchar4 bar_colour, + uchar4 area_colour, + int ox, int oy, int dx, int dy, + cudaStream_t stream) { + + const dim3 gridSize((region.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (region.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + vis_support_region_kernel<<<gridSize, blockSize, 0, stream>>>( + colour, + region, + bar_colour, + area_colour, + ox,oy,dx,dy + ); + cudaSafeCall( cudaGetLastError() ); + + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + #endif +} + +// ===== Vis bad edges ========================================================= + +__global__ void vis_bad_region_kernel( + TextureObject<uchar4> colour, + TextureObject<float> depth, + TextureObject<uchar4> region, + TextureObject<uchar4> dregion) { + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < 0 || y < 0 || x >= colour.width() || y >= colour.height()) return; + + // Grid pattern + //if (x % 50 != 0 || y % 50 != 0) return; + + uchar4 base = region.tex2D(x,y); + uchar4 baseD = dregion.tex2D(x,y); + auto col = colour.tex2D((float)x+0.5f,(float)y+0.5f); + float d = depth.tex2D(x,y); + + if (baseD.x > base.x && baseD.y < base.y) { + uchar4 baseR = region.tex2D(x+baseD.y+1, y); + float dR = depth.tex2D(x+baseD.y+1, y); + //if (x+baseD.y+1-baseR.x <= x) { + if (d > 0.0f && d < 30.0f && (dR <= 0.0f || dR >= 30.0f)) { + colour(x,y) = make_uchar4(col.x, col.y, 255, 0); + depth(x,y) = 0.0f; + } + //} + } +} + +void ftl::cuda::vis_bad_region( + ftl::cuda::TextureObject<uchar4> &colour, + ftl::cuda::TextureObject<float> &depth, + ftl::cuda::TextureObject<uchar4> ®ion, + ftl::cuda::TextureObject<uchar4> &dregion, + cudaStream_t stream) { + + const dim3 gridSize((region.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (region.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + vis_bad_region_kernel<<<gridSize, blockSize, 0, stream>>>( + colour, + depth, + region, + dregion + ); + cudaSafeCall( cudaGetLastError() ); + + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + #endif +} diff --git a/components/operators/src/segmentation_cuda.hpp b/components/operators/src/segmentation_cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c2cb390d9c0ee62a33127eec1720cf0d6fee8cae --- /dev/null +++ b/components/operators/src/segmentation_cuda.hpp @@ -0,0 +1,40 @@ +#ifndef _FTL_CUDA_SEGMENTATION_HPP_ +#define _FTL_CUDA_SEGMENTATION_HPP_ + +#include <ftl/cuda_common.hpp> + +namespace ftl { +namespace cuda { + +void support_region( + ftl::cuda::TextureObject<uchar4> &colour, + ftl::cuda::TextureObject<uchar4> ®ion, + float tau, int v_max, int h_max, bool sym, + cudaStream_t stream); + +void support_region( + ftl::cuda::TextureObject<float> &depth, + ftl::cuda::TextureObject<uchar4> ®ion, + float tau, int v_max, int h_max, bool sym, + cudaStream_t stream); + +void vis_support_region( + ftl::cuda::TextureObject<uchar4> &colour, + ftl::cuda::TextureObject<uchar4> ®ion, + uchar4 bar_colour, + uchar4 area_colour, + int ox, int oy, int dx, int dy, + cudaStream_t stream); + +void vis_bad_region( + ftl::cuda::TextureObject<uchar4> &colour, + ftl::cuda::TextureObject<float> &depth, + ftl::cuda::TextureObject<uchar4> ®ion, + ftl::cuda::TextureObject<uchar4> &dregion, + cudaStream_t stream); + + +} +} + +#endif diff --git a/components/operators/src/smoothchan.cu b/components/operators/src/smoothchan.cu index af9ec35243ed6304f859ab16c2177aa761c8eb14..ea40854fccd7579fc3973a6acb41543ea7bc75d5 100644 --- a/components/operators/src/smoothchan.cu +++ b/components/operators/src/smoothchan.cu @@ -32,6 +32,7 @@ __global__ void smooth_chan_kernel( //const float3 pos = camera.screenToCam(ix, iy, d0); + //int v=0; for (int v=-RADIUS; v<=RADIUS; ++v) { #pragma unroll for (int u=-RADIUS; u<=RADIUS; ++u) { diff --git a/components/operators/src/smoothing.cpp b/components/operators/src/smoothing.cpp index 8e4f458bf9f996a6d38bef816ab7bcec75b69b11..2eafe7977a827cd0a1e90017f749fd1245ce1b68 100644 --- a/components/operators/src/smoothing.cpp +++ b/components/operators/src/smoothing.cpp @@ -6,6 +6,7 @@ using ftl::operators::HFSmoother; using ftl::operators::SimpleMLS; using ftl::operators::ColourMLS; +using ftl::operators::AggreMLS; using ftl::operators::AdaptiveMLS; using ftl::operators::SmoothChannel; using ftl::codecs::Channel; @@ -33,7 +34,7 @@ bool HFSmoother::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::S in.createTexture<float>(Channel::Energy, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), in.createTexture<float>(Channel::Smoothing, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), var_thresh, - s->parameters(), 0 + s->parameters(), stream ); } @@ -158,7 +159,7 @@ bool SimpleMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::So thresh, radius, s->parameters(), - 0 + stream ); in.swapChannels(Channel::Depth, Channel::Depth2); @@ -179,10 +180,12 @@ ColourMLS::~ColourMLS() { } bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) { - float thresh = config()->value("mls_threshold", 0.04f); + float thresh = config()->value("mls_threshold", 0.4f); float col_smooth = config()->value("mls_colour_smoothing", 30.0f); - int iters = config()->value("mls_iterations", 1); - int radius = config()->value("mls_radius",2); + int iters = config()->value("mls_iterations", 3); + int radius = config()->value("mls_radius",3); + bool crosssup = config()->value("cross_support", true); + bool filling = config()->value("filling", false); if (!in.hasChannel(Channel::Normals)) { LOG(ERROR) << "Required normals channel missing for MLS"; @@ -191,18 +194,34 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::So // FIXME: Assume in and out are the same frame. for (int i=0; i<iters; ++i) { - ftl::cuda::colour_mls_smooth( - in.createTexture<float4>(Channel::Normals), - in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), - in.createTexture<float>(Channel::Depth), - in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), - in.createTexture<uchar4>(Channel::Colour), - thresh, - col_smooth, - radius, - s->parameters(), - 0 - ); + if (!crosssup) { + ftl::cuda::colour_mls_smooth( + in.createTexture<float4>(Channel::Normals), + in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + in.createTexture<float>(Channel::Depth), + in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + in.createTexture<uchar4>(Channel::Colour), + thresh, + col_smooth, + radius, + s->parameters(), + stream + ); + } else { + ftl::cuda::colour_mls_smooth_csr( + in.createTexture<uchar4>(Channel::Support1), + in.createTexture<float4>(Channel::Normals), + in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + in.createTexture<float>(Channel::Depth), + in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + in.createTexture<uchar4>(Channel::Colour), + thresh, + col_smooth, + filling, + s->parameters(), + stream + ); + } in.swapChannels(Channel::Depth, Channel::Depth2); in.swapChannels(Channel::Normals, Channel::Points); @@ -212,6 +231,107 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::So } +// ====== Aggregating MLS ====================================================== + +AggreMLS::AggreMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +AggreMLS::~AggreMLS() { + +} + +bool AggreMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) { + float thresh = config()->value("mls_threshold", 0.4f); + float col_smooth = config()->value("mls_colour_smoothing", 30.0f); + int iters = config()->value("mls_iterations", 3); + int radius = config()->value("mls_radius",5); + bool aggre = config()->value("aggregation", true); + + if (!in.hasChannel(Channel::Normals)) { + LOG(ERROR) << "Required normals channel missing for MLS"; + return false; + } + + if (!in.hasChannel(Channel::Support1)) { + LOG(ERROR) << "Required support channel missing for MLS"; + return false; + } + + auto size = in.get<GpuMat>(Channel::Depth).size(); + centroid_horiz_.create(size.height, size.width); + normals_horiz_.create(size.height, size.width); + centroid_vert_.create(size.width, size.height); + + // FIXME: Assume in and out are the same frame. + for (int i=0; i<iters; ++i) { + + if (aggre) { + ftl::cuda::mls_aggr_horiz( + in.createTexture<uchar4>(Channel::Support1), + in.createTexture<float4>(Channel::Normals), + normals_horiz_, + in.createTexture<float>(Channel::Depth), + centroid_horiz_, + in.createTexture<uchar4>(Channel::Colour), + thresh, + col_smooth, + radius, + s->parameters(), + stream + ); + + ftl::cuda::mls_aggr_vert( + in.createTexture<uchar4>(Channel::Support1), + normals_horiz_, + in.createTexture<float4>(Channel::Normals), + centroid_horiz_, + centroid_vert_, + in.createTexture<uchar4>(Channel::Colour), + in.createTexture<float>(Channel::Depth), + thresh, + col_smooth, + radius, + s->parameters(), + stream + ); + + ftl::cuda::mls_adjust_depth( + in.createTexture<float4>(Channel::Normals), + centroid_vert_, + in.createTexture<float>(Channel::Depth), + in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(size)), + s->parameters(), + stream + ); + + in.swapChannels(Channel::Depth, Channel::Depth2); + //in.swapChannels(Channel::Normals, Channel::Points); + + } else { + ftl::cuda::colour_mls_smooth_csr( + in.createTexture<uchar4>(Channel::Support1), + in.createTexture<float4>(Channel::Normals), + in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + in.createTexture<float>(Channel::Depth), + in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + in.createTexture<uchar4>(Channel::Colour), + thresh, + col_smooth, + false, + s->parameters(), + stream + ); + + in.swapChannels(Channel::Depth, Channel::Depth2); + in.swapChannels(Channel::Normals, Channel::Points); + } + } + + return true; +} + + // ====== Adaptive MLS ========================================================= AdaptiveMLS::AdaptiveMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { @@ -241,7 +361,7 @@ bool AdaptiveMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd:: in.createTexture<float>(Channel::Smoothing), radius, s->parameters(), - 0 + stream ); in.swapChannels(Channel::Depth, Channel::Depth2); diff --git a/components/operators/src/smoothing_cuda.hpp b/components/operators/src/smoothing_cuda.hpp index 53e8cf898c4e99875dc4a9881df58700ee31c127..6681c1a2a0383fdf69c7a62ab0b78346fede5f3a 100644 --- a/components/operators/src/smoothing_cuda.hpp +++ b/components/operators/src/smoothing_cuda.hpp @@ -29,6 +29,54 @@ void colour_mls_smooth( const ftl::rgbd::Camera &camera, cudaStream_t stream); +void colour_mls_smooth_csr( + ftl::cuda::TextureObject<uchar4> ®ion, + ftl::cuda::TextureObject<float4> &normals_in, + ftl::cuda::TextureObject<float4> &normals_out, + ftl::cuda::TextureObject<float> &depth_in, + ftl::cuda::TextureObject<float> &depth_out, + ftl::cuda::TextureObject<uchar4> &colour_in, + float smoothing, + float colour_smoothing, + bool filling, + const ftl::rgbd::Camera &camera, + cudaStream_t stream); + +void mls_adjust_depth( + ftl::cuda::TextureObject<float4> &normals_in, + ftl::cuda::TextureObject<float4> ¢roid_in, + ftl::cuda::TextureObject<float> &depth_in, + ftl::cuda::TextureObject<float> &depth_out, + const ftl::rgbd::Camera &camera, + cudaStream_t stream); + +void mls_aggr_horiz( + ftl::cuda::TextureObject<uchar4> ®ion, + ftl::cuda::TextureObject<float4> &normals_in, + ftl::cuda::TextureObject<float4> &normals_out, + ftl::cuda::TextureObject<float> &depth_in, + ftl::cuda::TextureObject<float4> ¢roid_out, + ftl::cuda::TextureObject<uchar4> &colour_in, + float smoothing, + float colour_smoothing, + int radius, + const ftl::rgbd::Camera &camera, + cudaStream_t stream); + +void mls_aggr_vert( + ftl::cuda::TextureObject<uchar4> ®ion, + ftl::cuda::TextureObject<float4> &normals_in, + ftl::cuda::TextureObject<float4> &normals_out, + ftl::cuda::TextureObject<float4> ¢roid_in, + ftl::cuda::TextureObject<float4> ¢roid_out, + ftl::cuda::TextureObject<uchar4> &colour_in, + ftl::cuda::TextureObject<float> &depth_in, + float smoothing, + float colour_smoothing, + int radius, + const ftl::rgbd::Camera &camera, + cudaStream_t stream); + void adaptive_mls_smooth( ftl::cuda::TextureObject<float4> &normals_in, ftl::cuda::TextureObject<float4> &normals_out, diff --git a/components/renderers/cpp/include/ftl/cuda/weighting.hpp b/components/renderers/cpp/include/ftl/cuda/weighting.hpp index bffff673d2009ac698c3c604ec565a2aa1a89901..b0c3f58b9c5a959c58fb9b0cba43bf27823aac4f 100644 --- a/components/renderers/cpp/include/ftl/cuda/weighting.hpp +++ b/components/renderers/cpp/include/ftl/cuda/weighting.hpp @@ -44,6 +44,20 @@ __device__ inline float colourDistance(uchar4 a, uchar4 b) { return ch*ch*ch*ch; } +/* + * Colour weighting as suggested in: + * C. Kuster et al. Spatio-Temporal Geometry Fusion for Multiple Hybrid Cameras using Moving Least Squares Surfaces. 2014. + * c = colour distance + */ + __device__ inline float colourWeighting(const float4 &a, const float4 &b, float h) { + const float3 delta = make_float3(a.x - b.x, a.y - b.y, a.z - b.z); + const float c = length(delta); + if (c >= h) return 0.0f; + float ch = c / h; + ch = 1.0f - ch*ch; + return ch*ch*ch*ch; +} + } } diff --git a/components/renderers/cpp/include/ftl/render/tri_render.hpp b/components/renderers/cpp/include/ftl/render/tri_render.hpp index 60d3dcbf6a68576981c1d72194dae731e8592fec..3d9183643e9f2fe183499cf8bbcc97699328ef60 100644 --- a/components/renderers/cpp/include/ftl/render/tri_render.hpp +++ b/components/renderers/cpp/include/ftl/render/tri_render.hpp @@ -43,6 +43,9 @@ class Triangular : public ftl::render::Renderer { cudaStream_t stream_; float3 light_pos_; + cv::cuda::GpuMat env_image_; + ftl::cuda::TextureObject<uchar4> env_tex_; + //ftl::Filters *filters_; template <typename T> diff --git a/components/renderers/cpp/src/reprojection.cu b/components/renderers/cpp/src/reprojection.cu index e74b4bb60963ab7d7d1798a96b22c90916a7ffc3..e6490c47eaf9033c2077cdd06457b3ff4f59103e 100644 --- a/components/renderers/cpp/src/reprojection.cu +++ b/components/renderers/cpp/src/reprojection.cu @@ -66,7 +66,7 @@ __global__ void reprojection_kernel( TextureObject<B> out, // Accumulated output TextureObject<float> contrib, SplatParams params, - Camera camera, float4x4 poseInv) { + Camera camera, float4x4 transform, float3x3 transformR) { const int x = (blockIdx.x*blockDim.x + threadIdx.x); const int y = blockIdx.y*blockDim.y + threadIdx.y; @@ -74,25 +74,25 @@ __global__ void reprojection_kernel( const float d = depth_in.tex2D((int)x, (int)y); if (d < params.camera.minDepth || d > params.camera.maxDepth) return; - const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d); + //const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d); //if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return; - const float3 camPos = poseInv * worldPos; + const float3 camPos = transform * params.camera.screenToCam(x, y, d); if (camPos.z < camera.minDepth) return; if (camPos.z > camera.maxDepth) return; - const uint2 screenPos = camera.camToScreen<uint2>(camPos); + const float2 screenPos = camera.camToScreen<float2>(camPos); // Not on screen so stop now... if (screenPos.x >= depth_src.width() || screenPos.y >= depth_src.height()) return; // Calculate the dot product of surface normal and camera ray - const float3 n = poseInv.getFloat3x3() * make_float3(normals.tex2D((int)x, (int)y)); + const float3 n = transformR * make_float3(normals.tex2D((int)x, (int)y)); float3 ray = camera.screenToCam(screenPos.x, screenPos.y, 1.0f); ray = ray / length(ray); const float dotproduct = max(dot(ray,n),0.0f); - const float d2 = depth_src.tex2D((int)screenPos.x, (int)screenPos.y); - const A input = in.tex2D((int)screenPos.x, (int)screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos); + const float d2 = depth_src.tex2D(int(screenPos.x+0.5f), int(screenPos.y+0.5f)); + const auto input = in.tex2D(screenPos.x, screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos); // TODO: Z checks need to interpolate between neighbors if large triangles are used float weight = ftl::cuda::weighting(fabs(camPos.z - d2), 0.02f); @@ -123,7 +123,7 @@ void ftl::cuda::reproject( TextureObject<B> &out, // Accumulated output TextureObject<float> &contrib, const SplatParams ¶ms, - const Camera &camera, const float4x4 &poseInv, cudaStream_t stream) { + const Camera &camera, const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream) { const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); @@ -136,7 +136,8 @@ void ftl::cuda::reproject( contrib, params, camera, - poseInv + transform, + transformR ); cudaSafeCall( cudaGetLastError() ); } @@ -150,7 +151,7 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &contrib, const ftl::render::SplatParams ¶ms, const ftl::rgbd::Camera &camera, - const float4x4 &poseInv, cudaStream_t stream); + const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream); template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &in, // Original colour image @@ -161,7 +162,7 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &contrib, const ftl::render::SplatParams ¶ms, const ftl::rgbd::Camera &camera, - const float4x4 &poseInv, cudaStream_t stream); + const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream); template void ftl::cuda::reproject( ftl::cuda::TextureObject<float4> &in, // Original colour image @@ -172,7 +173,7 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &contrib, const ftl::render::SplatParams ¶ms, const ftl::rgbd::Camera &camera, - const float4x4 &poseInv, cudaStream_t stream); + const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream); //============================================================================== // Without normals @@ -197,19 +198,19 @@ __global__ void reprojection_kernel( const float d = depth_in.tex2D((int)x, (int)y); if (d < params.camera.minDepth || d > params.camera.maxDepth) return; - const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d); + //const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d); //if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return; - const float3 camPos = poseInv * worldPos; + const float3 camPos = poseInv * params.camera.screenToCam(x, y, d); if (camPos.z < camera.minDepth) return; if (camPos.z > camera.maxDepth) return; - const uint2 screenPos = camera.camToScreen<uint2>(camPos); + const float2 screenPos = camera.camToScreen<float2>(camPos); // Not on screen so stop now... if (screenPos.x >= depth_src.width() || screenPos.y >= depth_src.height()) return; - const float d2 = depth_src.tex2D((int)screenPos.x, (int)screenPos.y); - const A input = in.tex2D((int)screenPos.x, (int)screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos); + const float d2 = depth_src.tex2D((int)(screenPos.x+0.5f), (int)(screenPos.y+0.5f)); + const auto input = in.tex2D(screenPos.x, screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos); float weight = ftl::cuda::weighting(fabs(camPos.z - d2), 0.02f); const B weighted = make<B>(input) * weight; @@ -274,3 +275,51 @@ template void ftl::cuda::reproject( const ftl::render::SplatParams ¶ms, const ftl::rgbd::Camera &camera, const float4x4 &poseInv, cudaStream_t stream); + + +// ===== Equirectangular Reprojection ========================================== + +__device__ inline float2 equirect_reprojection(int x_img, int y_img, double f, const float3x3 &rot, int w1, int h1, const ftl::rgbd::Camera &cam) { + float3 ray3d = cam.screenToCam(x_img, y_img, 1.0f); + ray3d /= length(ray3d); + ray3d = rot * ray3d; + + //inverse formula for spherical projection, reference Szeliski book "Computer Vision: Algorithms and Applications" p439. + float theta = atan2(ray3d.y,sqrt(ray3d.x*ray3d.x+ray3d.z*ray3d.z)); + float phi = atan2(ray3d.x, ray3d.z); + + const float pi = 3.14f; + + //get 2D point on equirectangular map + float x_sphere = (((phi*w1)/pi+w1)/2); + float y_sphere = (theta+ pi/2)*h1/pi; + + return make_float2(x_sphere,y_sphere); +}; + +__global__ void equirectangular_kernel( + TextureObject<uchar4> image_in, + TextureObject<uchar4> image_out, + Camera camera, float3x3 pose) { + + const int x = (blockIdx.x*blockDim.x + threadIdx.x); + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x >= 0 && y >= 0 && x < image_out.width() && y < image_out.height()) { + const float2 p = equirect_reprojection(x,y, camera.fx, pose, image_in.width(), image_in.height(), camera); + const float4 colour = image_in.tex2D(p.x, p.y); + image_out(x,y) = make_uchar4(colour.x, colour.y, colour.z, 0); + } +} + +void ftl::cuda::equirectangular_reproject( + ftl::cuda::TextureObject<uchar4> &image_in, + ftl::cuda::TextureObject<uchar4> &image_out, + const ftl::rgbd::Camera &camera, const float3x3 &pose, cudaStream_t stream) { + + const dim3 gridSize((image_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (image_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + equirectangular_kernel<<<gridSize, blockSize, 0, stream>>>(image_in, image_out, camera, pose); + cudaSafeCall( cudaGetLastError() ); +} diff --git a/components/renderers/cpp/src/splatter.cu b/components/renderers/cpp/src/splatter.cu index 6346daa883019453bef1d2f80be36ba5afa0aaf2..2986234bb3bbc24f762ff5ba0103ba173f4a0093 100644 --- a/components/renderers/cpp/src/splatter.cu +++ b/components/renderers/cpp/src/splatter.cu @@ -103,6 +103,40 @@ using ftl::cuda::warpSum; } } +/* + * Pass 1: Directly render each camera into virtual view but with no upsampling + * for sparse points. + */ + __global__ void dibr_merge_kernel(TextureObject<float> depth, + TextureObject<int> depth_out, + float4x4 transform, + ftl::rgbd::Camera cam, + SplatParams params) { + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float d0 = depth.tex2D(x, y); + if (d0 <= cam.minDepth || d0 >= cam.maxDepth) return; + + const float3 camPos = transform * cam.screenToCam(x,y,d0); + //if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return; + + // Find the virtual screen position of current point + //const float3 camPos = params.m_viewMatrix * make_float3(worldPos); + //if (camPos.z < params.camera.minDepth) return; + //if (camPos.z > params.camera.maxDepth) return; + + const float d = camPos.z; + + const uint2 screenPos = params.camera.camToScreen<uint2>(camPos); + const unsigned int cx = screenPos.x; + const unsigned int cy = screenPos.y; + if (d > params.camera.minDepth && d < params.camera.maxDepth && cx < depth.width() && cy < depth.height()) { + // Transform estimated point to virtual cam space and output z + atomicMin(&depth_out(cx,cy), d * 100000.0f); + } +} + void ftl::cuda::dibr_merge(TextureObject<float4> &points, TextureObject<float4> &normals, TextureObject<int> &depth, SplatParams params, bool culling, cudaStream_t stream) { const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); @@ -120,6 +154,14 @@ void ftl::cuda::dibr_merge(TextureObject<float4> &points, TextureObject<int> &de cudaSafeCall( cudaGetLastError() ); } +void ftl::cuda::dibr_merge(TextureObject<float> &depth, TextureObject<int> &depth_out, const float4x4 &transform, const ftl::rgbd::Camera &cam, SplatParams params, cudaStream_t stream) { + const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>(depth, depth_out, transform, cam, params); + cudaSafeCall( cudaGetLastError() ); +} + //============================================================================== @@ -441,9 +483,11 @@ __global__ void dibr_normalise_kernel( const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; if (x < in.width() && y < in.height()) { + const float contrib = contribs.tex2D((int)x,(int)y); const A a = in.tex2D((int)x,(int)y); //const float4 normal = normals.tex2D((int)x,(int)y); - const float contrib = contribs.tex2D((int)x,(int)y); + + //out(x,y) = (contrib == 0.0f) ? make<B>(a) : make<B>(a / contrib); if (contrib > 0.0f) { out(x,y) = make<B>(a / contrib); @@ -464,3 +508,39 @@ void ftl::cuda::dibr_normalise(TextureObject<A> &in, TextureObject<B> &out, Text template void ftl::cuda::dibr_normalise<float4,uchar4>(TextureObject<float4> &in, TextureObject<uchar4> &out, TextureObject<float> &contribs, cudaStream_t stream); template void ftl::cuda::dibr_normalise<float,float>(TextureObject<float> &in, TextureObject<float> &out, TextureObject<float> &contribs, cudaStream_t stream); template void ftl::cuda::dibr_normalise<float4,float4>(TextureObject<float4> &in, TextureObject<float4> &out, TextureObject<float> &contribs, cudaStream_t stream); + + +// ===== Show bad colour normalise ============================================= + +__global__ void show_missing_colour_kernel( + TextureObject<float> depth, + TextureObject<uchar4> out, + TextureObject<float> contribs, + uchar4 bad_colour, + ftl::rgbd::Camera cam) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < out.width() && y < out.height()) { + const float contrib = contribs.tex2D((int)x,(int)y); + const float d = depth.tex2D(x,y); + + if (contrib < 0.0000001f && d > cam.minDepth && d < cam.maxDepth) { + out(x,y) = bad_colour; + } + } +} + +void ftl::cuda::show_missing_colour( + TextureObject<float> &depth, + TextureObject<uchar4> &out, + TextureObject<float> &contribs, + uchar4 bad_colour, + const ftl::rgbd::Camera &cam, + cudaStream_t stream) { + const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + show_missing_colour_kernel<<<gridSize, blockSize, 0, stream>>>(depth, out, contribs, bad_colour, cam); + cudaSafeCall( cudaGetLastError() ); +} diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp index 838eda409761c7420a35944a45900ace0912f124..c81977cf38a4380df4143d2cd8d0609c69de8897 100644 --- a/components/renderers/cpp/src/splatter_cuda.hpp +++ b/components/renderers/cpp/src/splatter_cuda.hpp @@ -43,6 +43,14 @@ namespace cuda { ftl::render::SplatParams params, cudaStream_t stream); + void dibr_merge( + ftl::cuda::TextureObject<float> &depth, + ftl::cuda::TextureObject<int> &depth_out, + const float4x4 &transform, + const ftl::rgbd::Camera &cam, + ftl::render::SplatParams params, + cudaStream_t stream); + template <typename T> void splat( ftl::cuda::TextureObject<float4> &normals, @@ -72,7 +80,7 @@ namespace cuda { ftl::cuda::TextureObject<float> &contrib, const ftl::render::SplatParams ¶ms, const ftl::rgbd::Camera &camera, - const float4x4 &poseInv, cudaStream_t stream); + const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream); template <typename A, typename B> void reproject( @@ -85,6 +93,11 @@ namespace cuda { const ftl::rgbd::Camera &camera, const float4x4 &poseInv, cudaStream_t stream); + void equirectangular_reproject( + ftl::cuda::TextureObject<uchar4> &image_in, + ftl::cuda::TextureObject<uchar4> &image_out, + const ftl::rgbd::Camera &camera, const float3x3 &pose, cudaStream_t stream); + template <typename A, typename B> void dibr_normalise( ftl::cuda::TextureObject<A> &in, @@ -92,6 +105,14 @@ namespace cuda { ftl::cuda::TextureObject<float> &contribs, cudaStream_t stream); + void show_missing_colour( + ftl::cuda::TextureObject<float> &depth, + ftl::cuda::TextureObject<uchar4> &out, + ftl::cuda::TextureObject<float> &contribs, + uchar4 bad_colour, + const ftl::rgbd::Camera &cam, + cudaStream_t stream); + void show_mask( ftl::cuda::TextureObject<uchar4> &colour, ftl::cuda::TextureObject<int> &mask, diff --git a/components/renderers/cpp/src/tri_render.cpp b/components/renderers/cpp/src/tri_render.cpp index 6d60a5f6685ac7d292d4a67b0d23df8b231f3b21..5912b6480b109db2c3cce960ae5bca6abe6531f8 100644 --- a/components/renderers/cpp/src/tri_render.cpp +++ b/components/renderers/cpp/src/tri_render.cpp @@ -130,6 +130,19 @@ Triangular::Triangular(nlohmann::json &config, ftl::rgbd::FrameSet *fs) : ftl::r on("light_y", [this](const ftl::config::Event &e) { light_pos_.y = value("light_y", 0.3f); }); on("light_z", [this](const ftl::config::Event &e) { light_pos_.z = value("light_z", 0.3f); }); + // Load any environment texture + std::string envimage = value("environment", std::string("")); + if (envimage.size() > 0) { + cv::Mat envim = cv::imread(envimage); + if (!envim.empty()) { + if (envim.type() == CV_8UC3) { + cv::cvtColor(envim,envim, cv::COLOR_BGR2BGRA); + } + env_image_.upload(envim); + env_tex_ = std::move(ftl::cuda::TextureObject<uchar4>(env_image_, true)); + } + } + cudaSafeCall(cudaStreamCreate(&stream_)); //filters_ = ftl::create<ftl::Filters>(this, "filters"); @@ -215,7 +228,8 @@ void Triangular::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Chann cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA); } - auto poseInv = MatrixConversion::toCUDA(s->getPose().cast<float>().inverse()); + auto transform = MatrixConversion::toCUDA(s->getPose().cast<float>().inverse()) * params_.m_viewMatrixInverse; + auto transformR = MatrixConversion::toCUDA(s->getPose().cast<float>().inverse()).getFloat3x3(); if (mesh_) { ftl::cuda::reproject( @@ -227,7 +241,7 @@ void Triangular::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Chann temp_.getTexture<float>(Channel::Contribution), params_, s->parameters(), - poseInv, stream + transform, transformR, stream ); } else { // Can't use normals with point cloud version @@ -239,7 +253,7 @@ void Triangular::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Chann temp_.getTexture<float>(Channel::Contribution), params_, s->parameters(), - poseInv, stream + transform, stream ); } } @@ -287,9 +301,13 @@ void Triangular::_dibr(ftl::rgbd::Frame &out, cudaStream_t stream) { continue; } + auto transform = params_.m_viewMatrix * MatrixConversion::toCUDA(s->getPose().cast<float>()); + ftl::cuda::dibr_merge( - f.createTexture<float4>(Channel::Points), + f.createTexture<float>(Channel::Depth), temp_.createTexture<int>(Channel::Depth2), + transform, + s->parameters(), params_, stream ); } @@ -398,6 +416,53 @@ void Triangular::_renderChannel( _reprojectChannel(out, channel_in, channel_out, stream); } +/* + * H(Hue): 0 - 360 degree (integer) + * S(Saturation): 0 - 1.00 (double) + * V(Value): 0 - 1.00 (double) + * + * output[3]: Output, array size 3, int + */ +static cv::Scalar HSVtoRGB(int H, double S, double V) { + double C = S * V; + double X = C * (1 - abs(fmod(H / 60.0, 2) - 1)); + double m = V - C; + double Rs, Gs, Bs; + + if(H >= 0 && H < 60) { + Rs = C; + Gs = X; + Bs = 0; + } + else if(H >= 60 && H < 120) { + Rs = X; + Gs = C; + Bs = 0; + } + else if(H >= 120 && H < 180) { + Rs = 0; + Gs = C; + Bs = X; + } + else if(H >= 180 && H < 240) { + Rs = 0; + Gs = X; + Bs = C; + } + else if(H >= 240 && H < 300) { + Rs = X; + Gs = 0; + Bs = C; + } + else { + Rs = C; + Gs = 0; + Bs = X; + } + + return cv::Scalar((Bs + m) * 255, (Gs + m) * 255, (Rs + m) * 255, 0); +} + bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { SHARED_LOCK(scene_->mtx, lk); if (!src->isReady()) return false; @@ -411,6 +476,7 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { out.create<GpuMat>(Channel::Depth, Format<float>(camera.width, camera.height)); out.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height)); + out.createTexture<uchar4>(Channel::Colour, true); // Force interpolated colour if (scene_->frames.size() == 0) return false; @@ -437,21 +503,36 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { // Clear all channels to 0 or max depth out.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(1000.0f), cvstream); - out.get<GpuMat>(Channel::Colour).setTo(background_, cvstream); + + if (env_image_.empty() || !value("environment_enabled", false)) { + out.get<GpuMat>(Channel::Colour).setTo(background_, cvstream); + } else { + auto pose = params.m_viewMatrixInverse.getFloat3x3(); + ftl::cuda::equirectangular_reproject( + env_tex_, + out.createTexture<uchar4>(Channel::Colour, true), + camera, pose, stream_); + } //LOG(INFO) << "Render ready: " << camera.width << "," << camera.height; bool show_discon = value("show_discontinuity_mask", false); bool show_fill = value("show_filled", false); + bool colour_sources = value("colour_sources", false); temp_.createTexture<int>(Channel::Depth); //temp_.get<GpuMat>(Channel::Normals).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream); - // Display mask values + // Display mask values or otherwise alter colour image for (int i=0; i<scene_->frames.size(); ++i) { auto &f = scene_->frames[i]; auto s = scene_->sources[i]; + if (colour_sources) { + auto colour = HSVtoRGB(360 / scene_->frames.size() * i, 0.6, 0.85); //(i == 0) ? cv::Scalar(255,0,0,0) : cv::Scalar(0,255,0,0); + f.get<GpuMat>(Channel::Colour).setTo(colour, cvstream); + } + if (f.hasChannel(Channel::Mask)) { if (show_discon) { ftl::cuda::show_mask(f.getTexture<uchar4>(Channel::Colour), f.getTexture<int>(Channel::Mask), Mask::kMask_Discontinuity, make_uchar4(0,0,255,255), stream_); @@ -500,19 +581,19 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { if (aligned_source >= 0 && aligned_source < scene_->frames.size()) { // FIXME: Output may not be same resolution as source! cudaSafeCall(cudaStreamSynchronize(stream_)); - scene_->frames[aligned_source].copyTo(Channel::Depth + Channel::Colour + Channel::Smoothing, out); + scene_->frames[aligned_source].copyTo(Channel::Depth + Channel::Colour + Channel::Smoothing + Channel::Confidence, out); - if (chan == Channel::Normals) { + if (chan == Channel::ColourNormals) { // Convert normal to single float value - temp_.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height)).setTo(cv::Scalar(0,0,0,0), cvstream); - ftl::cuda::normal_visualise(scene_->frames[aligned_source].getTexture<float4>(Channel::Normals), temp_.createTexture<uchar4>(Channel::Colour), + out.create<GpuMat>(Channel::ColourNormals, Format<uchar4>(out.get<GpuMat>(Channel::Colour).size())).setTo(cv::Scalar(0,0,0,0), cvstream); + ftl::cuda::normal_visualise(scene_->frames[aligned_source].getTexture<float4>(Channel::Normals), out.createTexture<uchar4>(Channel::ColourNormals), light_pos_, light_diffuse_, light_ambient_, stream_); // Put in output as single float - cv::cuda::swap(temp_.get<GpuMat>(Channel::Colour), out.create<GpuMat>(Channel::Normals)); - out.resetTexture(Channel::Normals); + //cv::cuda::swap(temp_.get<GpuMat>(Channel::Colour), out.create<GpuMat>(Channel::Normals)); + //out.resetTexture(Channel::Normals); } return true; @@ -527,20 +608,35 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { // Reprojection of colours onto surface _renderChannel(out, Channel::Colour, Channel::Colour, stream_); + + if (value("show_bad_colour", false)) { + ftl::cuda::show_missing_colour( + out.getTexture<float>(Channel::Depth), + out.getTexture<uchar4>(Channel::Colour), + temp_.getTexture<float>(Channel::Contribution), + make_uchar4(255,0,0,0), + camera, + stream_ + ); + } if (chan == Channel::Depth) { // Just convert int depth to float depth //temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 100000.0f, cvstream); - } else if (chan == Channel::Normals) { + } else if (chan == Channel::ColourNormals) { // Visualise normals to RGBA - accum_.create<GpuMat>(Channel::Normals, Format<uchar4>(camera.width, camera.height)).setTo(cv::Scalar(0,0,0,0), cvstream); - ftl::cuda::normal_visualise(out.getTexture<float4>(Channel::Normals), accum_.createTexture<uchar4>(Channel::Normals), + out.create<GpuMat>(Channel::ColourNormals, Format<uchar4>(camera.width, camera.height)).setTo(cv::Scalar(0,0,0,0), cvstream); + + ftl::cuda::normal_visualise(out.getTexture<float4>(Channel::Normals), out.createTexture<uchar4>(Channel::ColourNormals), light_pos_, light_diffuse_, light_ambient_, stream_); - accum_.swapTo(Channels(Channel::Normals), out); + //accum_.swapTo(Channels(Channel::Normals), out); + //cv::cuda::swap(accum_.get<GpuMat>(Channel::Normals), out.get<GpuMat>(Channel::Normals)); + //out.resetTexture(Channel::Normals); + //accum_.resetTexture(Channel::Normals); } //else if (chan == Channel::Contribution) //{ @@ -561,7 +657,7 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { Eigen::Matrix4f transform = Eigen::Matrix4f::Identity(); transform(0, 3) = baseline; - Eigen::Matrix4f matrix = transform.inverse() * src->getPose().cast<float>(); + Eigen::Matrix4f matrix = src->getPose().cast<float>() * transform.inverse(); params.m_viewMatrix = MatrixConversion::toCUDA(matrix.inverse()); params.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix); @@ -570,6 +666,7 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { out.create<GpuMat>(Channel::Right, Format<uchar4>(camera.width, camera.height)); out.get<GpuMat>(Channel::Right).setTo(background_, cvstream); + out.createTexture<uchar4>(Channel::Right, true); // Need to re-dibr due to pose change if (mesh_) { diff --git a/components/rgbd-sources/CMakeLists.txt b/components/rgbd-sources/CMakeLists.txt index 0dbd5ccf8cca5aecea9b270c6fc28d4b53b3b4ac..6027ab17a7d64110a72dd51e6cfde098c0b0abc7 100644 --- a/components/rgbd-sources/CMakeLists.txt +++ b/components/rgbd-sources/CMakeLists.txt @@ -1,12 +1,11 @@ set(RGBDSRC src/sources/stereovideo/calibrate.cpp src/sources/stereovideo/local.cpp - src/disparity.cpp src/source.cpp src/frame.cpp src/frameset.cpp src/sources/stereovideo/stereovideo.cpp - src/sources/middlebury/middlebury_source.cpp +# src/sources/middlebury/middlebury_source.cpp src/sources/net/net.cpp src/streamer.cpp src/colour.cpp @@ -17,7 +16,6 @@ set(RGBDSRC # src/algorithms/opencv_bm.cpp src/cb_segmentation.cpp src/abr.cpp - src/offilter.cpp src/sources/virtual/virtual.cpp src/sources/ftlfile/file_source.cpp src/sources/ftlfile/player.cpp @@ -34,14 +32,12 @@ if (LibArchive_FOUND) ) endif (LibArchive_FOUND) -if (LIBSGM_FOUND) - list(APPEND RGBDSRC "src/algorithms/fixstars_sgm.cpp") -endif (LIBSGM_FOUND) +#if (LIBSGM_FOUND) +# list(APPEND RGBDSRC "src/algorithms/fixstars_sgm.cpp") +#endif (LIBSGM_FOUND) if (CUDA_FOUND) list(APPEND RGBDSRC - src/algorithms/disp2depth.cu - src/algorithms/offilter.cu # "src/algorithms/opencv_cuda_bm.cpp" # "src/algorithms/opencv_cuda_bp.cpp" # "src/algorithms/rtcensus.cu" @@ -69,7 +65,7 @@ set_property(TARGET ftlrgbd PROPERTY CUDA_SEPARABLE_COMPILATION OFF) endif() #target_include_directories(cv-node PUBLIC ${PROJECT_SOURCE_DIR}/include) -target_link_libraries(ftlrgbd ftlcommon ${OpenCV_LIBS} ${LIBSGM_LIBRARIES} ${CUDA_LIBRARIES} Eigen3::Eigen ${REALSENSE_LIBRARY} ftlnet ${LibArchive_LIBRARIES} ftlcodecs) +target_link_libraries(ftlrgbd ftlcommon ${OpenCV_LIBS} ${LIBSGM_LIBRARIES} ${CUDA_LIBRARIES} Eigen3::Eigen ${REALSENSE_LIBRARY} ftlnet ${LibArchive_LIBRARIES} ftlcodecs ftloperators) add_subdirectory(test) diff --git a/components/rgbd-sources/include/ftl/offilter.hpp b/components/rgbd-sources/include/ftl/offilter.hpp index 6aece39aab241dfc7605dfb208d7d30ba6135509..0e273d80d0382dd1a805bb50f658e8180ded567e 100644 --- a/components/rgbd-sources/include/ftl/offilter.hpp +++ b/components/rgbd-sources/include/ftl/offilter.hpp @@ -17,6 +17,7 @@ public: OFDisparityFilter() : n_max_(0), threshold_(0.0) {} OFDisparityFilter(cv::Size size, int n_frames, float threshold); void filter(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream); + void filter(cv::cuda::GpuMat &disp, cv::cuda::GpuMat &optflow, cv::cuda::Stream &stream); private: int n_max_; diff --git a/components/rgbd-sources/include/ftl/rgbd/frame.hpp b/components/rgbd-sources/include/ftl/rgbd/frame.hpp index 52bbe9022987a85c3bf4398e9e3287011943ecac..e7a949600e6ba097aeda54460e83a1529851371e 100644 --- a/components/rgbd-sources/include/ftl/rgbd/frame.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/frame.hpp @@ -81,14 +81,14 @@ public: * argument to also create (or recreate) the associated GpuMat. */ template <typename T> - ftl::cuda::TextureObject<T> &createTexture(ftl::codecs::Channel c, const ftl::rgbd::Format<T> &f); + ftl::cuda::TextureObject<T> &createTexture(ftl::codecs::Channel c, const ftl::rgbd::Format<T> &f, bool interpolated=false); /** * Create a CUDA texture object for a channel. With this version the GpuMat * must already exist and be of the correct type. */ template <typename T> - ftl::cuda::TextureObject<T> &createTexture(ftl::codecs::Channel c); + ftl::cuda::TextureObject<T> &createTexture(ftl::codecs::Channel c, bool interpolated=false); void resetTexture(ftl::codecs::Channel c); @@ -200,7 +200,7 @@ ftl::cuda::TextureObject<T> &Frame::getTexture(ftl::codecs::Channel c) { } template <typename T> -ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c, const ftl::rgbd::Format<T> &f) { +ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c, const ftl::rgbd::Format<T> &f, bool interpolated) { if (!channels_.has(c)) channels_ += c; if (!gpu_.has(c)) gpu_ += c; @@ -221,19 +221,19 @@ ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c, const if (m.tex.devicePtr() == nullptr) { //LOG(INFO) << "Creating texture object"; - m.tex = ftl::cuda::TextureObject<T>(m.gpu); + m.tex = ftl::cuda::TextureObject<T>(m.gpu, interpolated); } else if (m.tex.cvType() != ftl::traits::OpenCVType<T>::value || m.tex.width() != m.gpu.cols || m.tex.height() != m.gpu.rows) { LOG(INFO) << "Recreating texture object for '" << ftl::codecs::name(c) << "'"; m.tex.free(); - m.tex = ftl::cuda::TextureObject<T>(m.gpu); + m.tex = ftl::cuda::TextureObject<T>(m.gpu, interpolated); } return ftl::cuda::TextureObject<T>::cast(m.tex); } template <typename T> -ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c) { - if (!channels_.has(c)) throw ftl::exception("createTexture needs a format if the channel does not exist"); +ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c, bool interpolated) { + if (!channels_.has(c)) throw ftl::exception(ftl::Formatter() << "createTexture needs a format if the channel does not exist: " << (int)c); auto &m = _get(c); @@ -254,11 +254,11 @@ ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c) { if (m.tex.devicePtr() == nullptr) { //LOG(INFO) << "Creating texture object"; - m.tex = ftl::cuda::TextureObject<T>(m.gpu); + m.tex = ftl::cuda::TextureObject<T>(m.gpu, interpolated); } else if (m.tex.cvType() != ftl::traits::OpenCVType<T>::value || m.tex.width() != m.gpu.cols || m.tex.height() != m.gpu.rows || m.tex.devicePtr() != m.gpu.data) { LOG(INFO) << "Recreating texture object for '" << ftl::codecs::name(c) << "'."; m.tex.free(); - m.tex = ftl::cuda::TextureObject<T>(m.gpu); + m.tex = ftl::cuda::TextureObject<T>(m.gpu, interpolated); } return ftl::cuda::TextureObject<T>::cast(m.tex); diff --git a/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp b/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp index 732c4ffb56a65a2ac8f53841b5dcb45e52ed5280..bbcad8c63f923bd92cbda56cb009e88d2ec50eb0 100644 --- a/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp +++ b/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp @@ -27,28 +27,68 @@ FixstarsSGM::FixstarsSGM(nlohmann::json &config) : Disparity(config) { CHECK(P1_ >= 0); CHECK(P2_ > P1_); - use_filter_ = value("use_filter", false); - if (use_filter_) { - int radius = value("filter_radius", 25); - int iter = value("filter_iter", 1); - CHECK(radius > 0) << "filter_radius must be greater than 0"; - CHECK(iter > 0) << "filter_iter must be greater than 0"; + updateBilateralFilter(); - filter_ = cv::cuda::createDisparityBilateralFilter(max_disp_ << 4, radius, iter); - } + on("use_filter", [this](const ftl::config::Event&) { + updateBilateralFilter(); + }); + + on("filter_radius", [this](const ftl::config::Event&) { + updateBilateralFilter(); + }); + + on("filter_iter", [this](const ftl::config::Event&) { + updateBilateralFilter(); + }); #ifdef HAVE_OPTFLOW - use_off_ = value("use_off", false); +/* + updateOFDisparityFilter(); - if (use_off_) - { - int off_size = value("off_size", 9); - double off_threshold = value("off_threshold", 0.9); - off_ = ftl::rgbd::OFDisparityFilter(size_, off_size, off_threshold); - } + on("use_off", [this](const ftl::config::Event&) { + updateOFDisparityFilter(); + }); + + on("use_off", [this](const ftl::config::Event&) { + updateOFDisparityFilter(); + }); +*/ #endif init(size_); + + on("uniqueness", [this](const ftl::config::Event&) { + float uniqueness = value("uniqueness", uniqueness_); + if ((uniqueness >= 0.0) && (uniqueness <= 1.0)) { + uniqueness_ = uniqueness; + updateParameters(); + } + else { + LOG(WARNING) << "invalid uniquness: " << uniqueness; + } + }); + + on("P1", [this](const ftl::config::Event&) { + int P1 = value("P1", P1_); + if (P1 <= P2_) { + P1_ = P1; + updateParameters(); + } + else { + LOG(WARNING) << "invalid P1: " << P1 << ", (P1 <= P2), P2 is " << P2_; + } + }); + + on("P2", [this](const ftl::config::Event&) { + int P2 = value("P2", P2_); + if (P2 >= P1_) { + P2_ = P2; + updateParameters(); + } + else { + LOG(WARNING) << "invalid P2: " << P2 << ", (P1 <= P2), P1 is " << P1_; + } + }); } void FixstarsSGM::init(const cv::Size size) { @@ -64,22 +104,73 @@ void FixstarsSGM::init(const cv::Size size) { ); } -void FixstarsSGM::compute(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream) -{ - /*if (!frame.hasChannel(ftl::rgbd::kChanLeftGray)) - { - auto &rgb = frame.getChannel<GpuMat>(ftl::rgbd::kChanLeft, stream); - auto &gray = frame.setChannel<GpuMat>(ftl::rgbd::kChanLeftGray); - cv::cuda::cvtColor(rgb, gray, cv::COLOR_BGR2GRAY, 0, stream); +bool FixstarsSGM::updateParameters() { + if (ssgm_ == nullptr) { return false; } + return this->ssgm_->updateParameters( + sgm::StereoSGM::Parameters(P1_, P2_, uniqueness_, true)); +} + +bool FixstarsSGM::updateBilateralFilter() { + bool enable = value("use_filter", false); + int radius = value("filter_radius", 25); + int iter = value("filter_iter", 1); + + if (enable) { + if (radius <= 0) { + LOG(WARNING) << "filter_radius must be greater than 0"; + enable = false; + } + if (iter <= 0) { + LOG(WARNING) << "filter_iter must be greater than 0"; + enable = false; + } + } + + if (enable) { + filter_ = cv::cuda::createDisparityBilateralFilter(max_disp_ << 4, radius, iter); + use_filter_ = true; + } + else { + use_filter_ = false; } - if (!frame.hasChannel(ftl::rgbd::kChanRightGray)) - { - auto &rgb = frame.getChannel<GpuMat>(ftl::rgbd::kChanRight, stream); - auto &gray = frame.setChannel<GpuMat>(ftl::rgbd::kChanRightGray); - cv::cuda::cvtColor(rgb, gray, cv::COLOR_BGR2GRAY, 0, stream); - }*/ + return use_filter_; +} + +#ifdef HAVE_OPTFLOW +/* +bool FixstarsSGM::updateOFDisparityFilter() { + bool enable = value("use_off", false); + int off_size = value("off_size", 9); + double off_threshold = value("off_threshold", 0.9); + + if (enable) { + if (off_size <= 0) { + LOG(WARNING) << "bad off_size: " << off_size; + enable = false; + } + + if (off_threshold <= 0.0) { + LOG(WARNING) << "bad off_threshold: " << off_threshold; + enable = false; + } + } + + if (enable) { + LOG(INFO) << "Optical flow filter, size: " << off_size << ", threshold: " << off_threshold; + off_ = ftl::rgbd::OFDisparityFilter(size_, off_size, off_threshold); + use_off_ = true; + } + else { + use_off_ = false; + } + + return use_off_; +}*/ +#endif +void FixstarsSGM::compute(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream) +{ const auto &l = frame.get<GpuMat>(Channel::Left); const auto &r = frame.get<GpuMat>(Channel::Right); auto &disp = frame.create<GpuMat>(Channel::Disparity, Format<float>(l.size())); @@ -100,21 +191,10 @@ void FixstarsSGM::compute(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream) stream.waitForCompletion(); ssgm_->execute(lbw_.data, rbw_.data, dispt_.data); - GpuMat left_pixels(dispt_, cv::Rect(0, 0, max_disp_, dispt_.rows)); - left_pixels.setTo(0); + // GpuMat left_pixels(dispt_, cv::Rect(0, 0, max_disp_, dispt_.rows)); + // left_pixels.setTo(0); cv::cuda::threshold(dispt_, dispt_, 4096.0f, 0.0f, cv::THRESH_TOZERO_INV, stream); - // TODO: filter could be applied after upscaling (to the upscaled disparity image) - if (use_filter_) - { - filter_->apply( - dispt_, - (l.size() == size_) ? l : l_scaled, - dispt_, - stream - ); - } - GpuMat dispt_scaled; if (l.size() != size_) { @@ -125,10 +205,28 @@ void FixstarsSGM::compute(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream) dispt_scaled = dispt_; } + // TODO: filter could be applied after upscaling (to the upscaled disparity image) + if (use_filter_) + { + filter_->apply( + dispt_, + l, + dispt_, + stream + ); + } + dispt_scaled.convertTo(disp, CV_32F, 1.0f / 16.0f, stream); #ifdef HAVE_OPTFLOW - if (use_off_) { off_.filter(frame, stream); } +/* + // TODO: Optical flow filter expects CV_32F + if (use_off_) { + frame.upload(Channel::Flow, stream); + stream.waitForCompletion(); + off_.filter(disp, frame.get<GpuMat>(Channel::Flow), stream); + } +*/ #endif } diff --git a/components/rgbd-sources/src/algorithms/fixstars_sgm.hpp b/components/rgbd-sources/src/algorithms/fixstars_sgm.hpp index d2c0c1d2a8fd459695bb02c9f66f17e423dc9fa5..2de1c41ef377570915397ea2758d265fa875b179 100644 --- a/components/rgbd-sources/src/algorithms/fixstars_sgm.hpp +++ b/components/rgbd-sources/src/algorithms/fixstars_sgm.hpp @@ -41,6 +41,11 @@ namespace ftl { private: void init(const cv::Size size); + bool updateParameters(); + bool updateBilateralFilter(); + #ifdef HAVE_OPTFLOW + bool updateOFDisparityFilter(); + #endif float uniqueness_; int P1_; @@ -54,7 +59,7 @@ namespace ftl { cv::cuda::GpuMat dispt_; #ifdef HAVE_OPTFLOW - ftl::rgbd::OFDisparityFilter off_; + //ftl::rgbd::OFDisparityFilter off_; #endif }; }; diff --git a/components/rgbd-sources/src/cuda_algorithms.hpp b/components/rgbd-sources/src/cuda_algorithms.hpp index 439c16cfc21fef08086bb69b7e84b1c8b49fec74..43fff52d0c1d00aa2ff7960456db1fb827ad5cc6 100644 --- a/components/rgbd-sources/src/cuda_algorithms.hpp +++ b/components/rgbd-sources/src/cuda_algorithms.hpp @@ -38,13 +38,6 @@ namespace cuda { void texture_map(const TextureObject<uchar4> &t, TextureObject<float> &f); - void disparity_to_depth(const cv::cuda::GpuMat &disparity, cv::cuda::GpuMat &depth, - const ftl::rgbd::Camera &c, cv::cuda::Stream &stream); - - void optflow_filter(cv::cuda::GpuMat &disp, const cv::cuda::GpuMat &optflow, - cv::cuda::GpuMat &history, int n_max, float threshold, - cv::cuda::Stream &stream); - } } diff --git a/components/rgbd-sources/src/offilter.cpp b/components/rgbd-sources/src/offilter.cpp deleted file mode 100644 index 34c01c1feb73b8171b0890f8d94405ee22a0433b..0000000000000000000000000000000000000000 --- a/components/rgbd-sources/src/offilter.cpp +++ /dev/null @@ -1,42 +0,0 @@ -#include "ftl/offilter.hpp" -#include "cuda_algorithms.hpp" - -#ifdef HAVE_OPTFLOW - -#include <loguru.hpp> - -using namespace ftl::rgbd; -using namespace ftl::codecs; - -using cv::Mat; -using cv::Size; - -using std::vector; - -template<typename T> static bool inline isValidDisparity(T d) { return (0.0 < d) && (d < 256.0); } // TODO - -OFDisparityFilter::OFDisparityFilter(Size size, int n_frames, float threshold) : - n_max_(n_frames + 1), threshold_(threshold) -{ - CHECK((n_max_ > 1) && (n_max_ <= 32)) << "History length must be between 0 and 31!"; - disp_old_ = cv::cuda::GpuMat(cv::Size(size.width * n_max_, size.height), CV_32FC1); - - /*nvof_ = cv::cuda::NvidiaOpticalFlow_1_0::create(size.width, size.height, - cv::cuda::NvidiaOpticalFlow_1_0::NV_OF_PERF_LEVEL_SLOW, - true, false, false, 0);*/ - -} - -void OFDisparityFilter::filter(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream) -{ - frame.upload(Channel::Flow, stream); - const cv::cuda::GpuMat &optflow = frame.get<cv::cuda::GpuMat>(Channel::Flow); - //frame.get<cv::cuda::GpuMat>(Channel::Disparity); - stream.waitForCompletion(); - if (optflow.empty()) { return; } - - cv::cuda::GpuMat &disp = frame.create<cv::cuda::GpuMat>(Channel::Disparity); - ftl::cuda::optflow_filter(disp, optflow, disp_old_, n_max_, threshold_, stream); -} - -#endif // HAVE_OPTFLOW diff --git a/components/rgbd-sources/src/source.cpp b/components/rgbd-sources/src/source.cpp index b0ee77a449e33b82641c18aa0c7a039ed42e8626..13cdd5487edf0b7cbc99f7cd9dd7032b43d31185 100644 --- a/components/rgbd-sources/src/source.cpp +++ b/components/rgbd-sources/src/source.cpp @@ -104,8 +104,8 @@ ftl::rgbd::detail::Source *Source::_createFileImpl(const ftl::URI &uri) { if (ftl::is_directory(path)) { if (ftl::is_file(path + "/video.mp4")) { return new StereoVideoSource(this, path); - } else if (ftl::is_file(path + "/im0.png")) { - return new MiddleburySource(this, path); +// } else if (ftl::is_file(path + "/im0.png")) { +// return new MiddleburySource(this, path); } else { LOG(ERROR) << "Directory is not a valid RGBD source: " << path; } diff --git a/components/rgbd-sources/src/sources/ftlfile/file_source.cpp b/components/rgbd-sources/src/sources/ftlfile/file_source.cpp index 9597bde796f80ac67f38a5a569190001e13d7065..0962c1886dc199e50530343c0d01edf4e74e37f0 100644 --- a/components/rgbd-sources/src/sources/ftlfile/file_source.cpp +++ b/components/rgbd-sources/src/sources/ftlfile/file_source.cpp @@ -30,6 +30,13 @@ FileSource::FileSource(ftl::rgbd::Source *s, ftl::rgbd::Player *r, int sid) : ft realtime_ = host_->value("realtime", true); timestamp_ = r->getStartTime(); sourceid_ = sid; + freeze_ = host_->value("freeze", false); + have_frozen_ = false; + + host_->on("freeze", [this](const ftl::config::Event &e) { + have_frozen_ = false; + freeze_ = host_->value("freeze", false); + }); r->onPacket(sid, [this](const ftl::codecs::StreamPacket &spkt, ftl::codecs::Packet &pkt) { host_->notifyRaw(spkt, pkt); @@ -133,18 +140,28 @@ bool FileSource::capture(int64_t ts) { } bool FileSource::retrieve() { - if (!reader_->read(timestamp_)) { + if (!have_frozen_ && !reader_->read(timestamp_)) { cache_write_ = -1; } return true; } void FileSource::swap() { + if (have_frozen_) return; cache_read_ = cache_write_; cache_write_ = (cache_write_ == 0) ? 1 : 0; } bool FileSource::compute(int n, int b) { + // Freeze frame requires a copy to be made each time... + if (have_frozen_) { + cv::cuda::GpuMat t1, t2; + if (!rgb_.empty()) rgb_.copyTo(t1); + if (!depth_.empty()) depth_.copyTo(t2); + host_->notify(timestamp_, t1, t2); + return true; + } + if (cache_read_ < 0) return false; if (cache_[cache_read_].size() == 0) return false; @@ -191,7 +208,11 @@ bool FileSource::compute(int n, int b) { if (rgb_.empty() || depth_.empty()) return false; // Inform about a decoded frame pair - host_->notify(timestamp_, rgb_, depth_); + if (freeze_ && !have_frozen_) { + have_frozen_ = true; + } else { + host_->notify(timestamp_, rgb_, depth_); + } return true; } diff --git a/components/rgbd-sources/src/sources/ftlfile/file_source.hpp b/components/rgbd-sources/src/sources/ftlfile/file_source.hpp index 2d59b68cb1e07d10aade07b33780af2497736a12..f8a26da37802e0aa2aae4681b420e6061c230fd2 100644 --- a/components/rgbd-sources/src/sources/ftlfile/file_source.hpp +++ b/components/rgbd-sources/src/sources/ftlfile/file_source.hpp @@ -43,6 +43,8 @@ class FileSource : public detail::Source { ftl::codecs::Decoder *decoders_[2]; bool realtime_; + bool freeze_; + bool have_frozen_; void _processCalibration(ftl::codecs::Packet &pkt); void _processPose(ftl::codecs::Packet &pkt); diff --git a/components/rgbd-sources/src/sources/middlebury/middlebury_source.cpp b/components/rgbd-sources/src/sources/middlebury/middlebury_source.cpp index d152d25e5a7d8830e8b555851f94d97b70463e29..229895049061f2308f1a982eab46005a26940548 100644 --- a/components/rgbd-sources/src/sources/middlebury/middlebury_source.cpp +++ b/components/rgbd-sources/src/sources/middlebury/middlebury_source.cpp @@ -149,7 +149,7 @@ void MiddleburySource::_performDisparity() { //calib_->rectifyStereo(left_, right_, stream_); disp_->compute(rgb_, right_, disp_tmp_, stream_); //disparityToDepth(disp_tmp_, depth_tmp_, params_, stream_); - ftl::cuda::disparity_to_depth(disp_tmp_, depth_, params_, stream_); + //ftl::cuda::disparity_to_depth(disp_tmp_, depth_, params_, stream_); //left_.download(rgb_, stream_); //rgb_ = lsrc_->cachedLeft(); //depth_tmp_.download(depth_, stream_); diff --git a/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp b/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp index f8e08a9f6d1f7670adeb8be9310129e8353daf1b..84d1e574b8e7aff91c774a1035c75280cd8ac03c 100644 --- a/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp +++ b/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp @@ -1,13 +1,26 @@ #include <loguru.hpp> + #include "stereovideo.hpp" -#include <ftl/configuration.hpp> -#include <ftl/threads.hpp> + +#include "ftl/configuration.hpp" + +#ifdef HAVE_OPTFLOW +#include "ftl/operators/opticalflow.hpp" +#endif + + +#include "ftl/operators/smoothing.hpp" +#include "ftl/operators/colours.hpp" +#include "ftl/operators/normals.hpp" +#include "ftl/operators/filling.hpp" +#include "ftl/operators/segmentation.hpp" +#include "ftl/operators/disparity.hpp" +#include "ftl/operators/mask.hpp" + +#include "ftl/threads.hpp" #include "calibrate.hpp" #include "local.hpp" #include "disparity.hpp" -#include "cuda_algorithms.hpp" - -#include "cuda_algorithms.hpp" using ftl::rgbd::detail::Calibrate; using ftl::rgbd::detail::LocalSource; @@ -27,13 +40,11 @@ StereoVideoSource::StereoVideoSource(ftl::rgbd::Source *host, const string &file } StereoVideoSource::~StereoVideoSource() { - delete disp_; delete calib_; delete lsrc_; } -void StereoVideoSource::init(const string &file) -{ +void StereoVideoSource::init(const string &file) { capabilities_ = kCapVideo | kCapStereo; if (ftl::is_video(file)) { @@ -61,18 +72,7 @@ void StereoVideoSource::init(const string &file) cv::Size size = cv::Size(lsrc_->width(), lsrc_->height()); frames_ = std::vector<Frame>(2); -#ifdef HAVE_OPTFLOW - use_optflow_ = host_->value("use_optflow", false); - LOG(INFO) << "Using optical flow: " << (use_optflow_ ? "true" : "false"); - - nvof_ = cv::cuda::NvidiaOpticalFlow_1_0::create(size.width, size.height, - cv::cuda::NvidiaOpticalFlow_1_0::NV_OF_PERF_LEVEL_SLOW, - true, false, false, 0); - -#endif - calib_ = ftl::create<Calibrate>(host_, "calibration", size, stream_); - if (!calib_->isCalibrated()) LOG(WARNING) << "Cameras are not calibrated!"; // Generate camera parameters from camera matrix @@ -117,20 +117,33 @@ void StereoVideoSource::init(const string &file) }); // left and right masks (areas outside rectified images) - // only left mask used + // only left mask used (not used) cv::cuda::GpuMat mask_r_gpu(lsrc_->height(), lsrc_->width(), CV_8U, 255); cv::cuda::GpuMat mask_l_gpu(lsrc_->height(), lsrc_->width(), CV_8U, 255); - calib_->rectifyStereo(mask_l_gpu, mask_r_gpu, stream_); stream_.waitForCompletion(); - cv::Mat mask_l; mask_l_gpu.download(mask_l); mask_l_ = (mask_l == 0); - disp_ = Disparity::create(host_, "disparity"); - if (!disp_) LOG(FATAL) << "Unknown disparity algorithm : " << *host_->get<ftl::config::json_t>("disparity"); - disp_->setMask(mask_l_); + pipeline_input_ = ftl::config::create<ftl::operators::Graph>(host_, "input"); + #ifdef HAVE_OPTFLOW + pipeline_input_->append<ftl::operators::NVOpticalFlow>("optflow"); + #endif + + pipeline_depth_ = ftl::config::create<ftl::operators::Graph>(host_, "disparity"); + pipeline_depth_->append<ftl::operators::FixstarsSGM>("algorithm"); + + #ifdef HAVE_OPTFLOW + pipeline_depth_->append<ftl::operators::OpticalFlowTemporalSmoothing>("optflow_filter"); + #endif + pipeline_depth_->append<ftl::operators::DisparityBilateralFilter>("bilateral_filter"); + pipeline_depth_->append<ftl::operators::DisparityToDepth>("calculate_depth"); + pipeline_depth_->append<ftl::operators::ColourChannels>("colour"); // Convert BGR to BGRA + pipeline_depth_->append<ftl::operators::Normals>("normals"); // Estimate surface normals + pipeline_depth_->append<ftl::operators::CrossSupport>("cross"); + pipeline_depth_->append<ftl::operators::DiscontinuityMask>("discontinuity_mask"); + pipeline_depth_->append<ftl::operators::AggreMLS>("mls"); // Perform MLS (using smoothing channel) LOG(INFO) << "StereoVideo source ready..."; ready_ = true; @@ -158,15 +171,6 @@ ftl::rgbd::Camera StereoVideoSource::parameters(Channel chan) { } } -static void disparityToDepth(const cv::cuda::GpuMat &disparity, cv::cuda::GpuMat &depth, - const cv::Mat &Q, cv::cuda::Stream &stream) { - // Q(3, 2) = -1/Tx - // Q(2, 3) = f - - double val = (1.0f / Q.at<double>(3, 2)) * Q.at<double>(2, 3); - cv::cuda::divide(val, disparity, depth, 1.0f / 1000.0f, -1, stream); -} - bool StereoVideoSource::capture(int64_t ts) { timestamp_ = ts; lsrc_->grab(); @@ -180,31 +184,9 @@ bool StereoVideoSource::retrieve() { auto &right = frame.create<cv::cuda::GpuMat>(Channel::Right); lsrc_->get(left, right, calib_, stream2_); -#ifdef HAVE_OPTFLOW - // see comments in https://gitlab.utu.fi/nicolas.pope/ftl/issues/155 - - if (use_optflow_) - { - auto &left_gray = frame.create<cv::cuda::GpuMat>(Channel::LeftGray); - auto &right_gray = frame.create<cv::cuda::GpuMat>(Channel::RightGray); - - cv::cuda::cvtColor(left, left_gray, cv::COLOR_BGR2GRAY, 0, stream2_); - cv::cuda::cvtColor(right, right_gray, cv::COLOR_BGR2GRAY, 0, stream2_); - - if (frames_[1].hasChannel(Channel::LeftGray)) - { - //frames_[1].download(Channel::LeftGray); - auto &left_gray_prev = frames_[1].get<cv::cuda::GpuMat>(Channel::LeftGray); - auto &optflow = frame.create<cv::cuda::GpuMat>(Channel::Flow); - nvof_->calc(left_gray, left_gray_prev, optflow, stream2_); - // nvof_->upSampler() isn't implemented with CUDA - // cv::cuda::resize() does not work wiht 2-channel input - // cv::cuda::resize(optflow_, optflow, left.size(), 0.0, 0.0, cv::INTER_NEAREST, stream2_); - } - } -#endif - + pipeline_input_->apply(frame, frame, host_, cv::cuda::StreamAccessor::getStream(stream2_)); stream2_.waitForCompletion(); + return true; } @@ -216,37 +198,32 @@ void StereoVideoSource::swap() { bool StereoVideoSource::compute(int n, int b) { auto &frame = frames_[1]; - auto &left = frame.get<cv::cuda::GpuMat>(Channel::Left); - auto &right = frame.get<cv::cuda::GpuMat>(Channel::Right); - + const ftl::codecs::Channel chan = host_->getChannel(); - if (left.empty() || right.empty()) return false; + if (!frame.hasChannel(Channel::Left) || !frame.hasChannel(Channel::Right)) { + return false; + } if (chan == Channel::Depth) { - disp_->compute(frame, stream_); - - auto &disp = frame.get<cv::cuda::GpuMat>(Channel::Disparity); - auto &depth = frame.create<cv::cuda::GpuMat>(Channel::Depth); - if (depth.empty()) depth = cv::cuda::GpuMat(left.size(), CV_32FC1); - - ftl::cuda::disparity_to_depth(disp, depth, params_, stream_); - - //left.download(rgb_, stream_); - //depth.download(depth_, stream_); - //frame.download(Channel::Left + Channel::Depth); + pipeline_depth_->apply(frame, frame, host_, cv::cuda::StreamAccessor::getStream(stream_)); stream_.waitForCompletion(); - host_->notify(timestamp_, left, depth); + host_->notify(timestamp_, + frame.get<cv::cuda::GpuMat>(Channel::Left), + frame.get<cv::cuda::GpuMat>(Channel::Depth)); + } else if (chan == Channel::Right) { - //left.download(rgb_, stream_); - //right.download(depth_, stream_); - stream_.waitForCompletion(); // TODO:(Nick) Move to getFrames - host_->notify(timestamp_, left, right); + stream_.waitForCompletion(); // TODO:(Nick) Move to getFrames + host_->notify(timestamp_, + frame.get<cv::cuda::GpuMat>(Channel::Left), + frame.get<cv::cuda::GpuMat>(Channel::Right)); + } else { - //left.download(rgb_, stream_); - stream_.waitForCompletion(); // TODO:(Nick) Move to getFrames - //LOG(INFO) << "NO SECOND CHANNEL: " << (bool)depth_.empty(); + stream_.waitForCompletion(); // TODO:(Nick) Move to getFrames + auto &left = frame.get<cv::cuda::GpuMat>(Channel::Left); depth_.create(left.size(), left.type()); - host_->notify(timestamp_, left, depth_); + host_->notify(timestamp_, + frame.get<cv::cuda::GpuMat>(Channel::Left), + depth_); } return true; diff --git a/components/rgbd-sources/src/sources/stereovideo/stereovideo.hpp b/components/rgbd-sources/src/sources/stereovideo/stereovideo.hpp index dfea7937f99777c23f753a7ab2b5bad8c68bb4af..78fcdbcf809eeae0d5de6da30b99ce3dc07f9214 100644 --- a/components/rgbd-sources/src/sources/stereovideo/stereovideo.hpp +++ b/components/rgbd-sources/src/sources/stereovideo/stereovideo.hpp @@ -3,6 +3,7 @@ #define _FTL_RGBD_STEREOVIDEO_HPP_ #include <ftl/rgbd/source.hpp> +#include <ftl/operators/operator.hpp> #include <string> namespace ftl { @@ -33,15 +34,14 @@ class StereoVideoSource : public detail::Source { bool isReady(); Camera parameters(ftl::codecs::Channel chan); - //const cv::Mat &getRight() const { return right_; } - private: LocalSource *lsrc_; Calibrate *calib_; - Disparity *disp_; - + + ftl::operators::Graph *pipeline_input_; + ftl::operators::Graph *pipeline_depth_; + bool ready_; - bool use_optflow_; cv::cuda::Stream stream_; cv::cuda::Stream stream2_; @@ -50,12 +50,6 @@ class StereoVideoSource : public detail::Source { cv::Mat mask_l_; -#ifdef HAVE_OPTFLOW - // see comments in https://gitlab.utu.fi/nicolas.pope/ftl/issues/155 - cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> nvof_; - cv::cuda::GpuMat optflow_; -#endif - void init(const std::string &); }; diff --git a/config/config_vision.jsonc b/config/config_vision.jsonc index b73446cefe06aaaf3663b8fe2823822878b5a1a8..cea950e0c21bde19d369ad0a4f53c255657e3724 100644 --- a/config/config_vision.jsonc +++ b/config/config_vision.jsonc @@ -9,53 +9,6 @@ } }, - "disparity": { - "libsgm": { - "algorithm": "libsgm", - "width": 1280, - "height": 720, - "use_cuda": true, - "minimum": 0, - "maximum": 256, - "tau": 0.0, - "gamma": 0.0, - "window_size": 5, - "sigma": 1.5, - "lambda": 8000.0, - "uniqueness": 0.65, - "use_filter": true, - "P1": 8, - "P2": 130, - "filter_radius": 11, - "filter_iter": 2, - "use_off": true, - "off_size": 24, - "off_threshold": 0.75, - "T": 60, - "T_add": 0, - "T_del": 25, - "T_x" : 3.0, - "alpha" : 0.6, - "beta" : 1.7, - "epsilon" : 15.0 - }, - - "rtcensus": { - "algorithm": "rtcensus", - "use_cuda": true, - "minimum": 0, - "maximum": 256, - "tau": 0.0, - "gamma": 0.0, - "window_size": 5, - "sigma": 1.5, - "lambda": 8000.0, - "use_filter": true, - "filter_radius": 3, - "filter_iter": 4 - } - }, - "sources": { "stereocam": { "uri": "device:video", @@ -71,7 +24,23 @@ }, "use_optflow" : true, "calibration": { "$ref": "#calibrations/default" }, - "disparity": { "$ref": "#disparity/libsgm" } + "disparity": { + "algorithm" : { + "P1" : 10, + "P2" : 120, + "enabled" : true + }, + "bilateral_filter" : { + "radius" : 17, + "iter" : 21, + "enabled" : true + }, + "optflow_filter" : { + "threshold" : 0.8, + "history_size" : 10, + "enabled": true + } + } }, "stereovid": {}, "localhost": {}