diff --git a/CMakeLists.txt b/CMakeLists.txt index 89f70a3cce686c59306ad1ff39d79b3191cd5d7e..87114cb8b4834724d4930f70557781bebc3f3847 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -453,6 +453,7 @@ endif() # ============================================================================= +add_subdirectory(lib/cudatl) add_subdirectory(components/common/cpp) add_subdirectory(components/codecs) add_subdirectory(components/structures) @@ -463,6 +464,7 @@ add_subdirectory(components/operators) add_subdirectory(components/streams) add_subdirectory(components/audio) add_subdirectory(components/calibration) +add_subdirectory(components/disparity) #add_subdirectory(applications/groupview) #add_subdirectory(applications/player) #add_subdirectory(applications/recorder) diff --git a/applications/gui2/CMakeLists.txt b/applications/gui2/CMakeLists.txt index 9c147be1a8dafc77b9b665f52394024b84539ef0..783aba56a22ff8c8a76118550365cceb0f56c84f 100644 --- a/applications/gui2/CMakeLists.txt +++ b/applications/gui2/CMakeLists.txt @@ -36,6 +36,12 @@ add_gui_module("camera3d") add_gui_module("thumbnails") add_gui_module("addsource") +list(APPEND GUI2SRC + src/modules/dev/developer.cpp + src/modules/dev/disparity.cpp + src/views/dev/disparityview.cpp + ) + if (WITH_CERES) list(APPEND GUI2SRC src/modules/calibration/calibration.cpp @@ -72,6 +78,6 @@ target_include_directories(ftl-gui2 PUBLIC #endif() #target_include_directories(cv-node PUBLIC ${PROJECT_SOURCE_DIR}/include) -target_link_libraries(ftl-gui2 ftlcommon ftldata ftlctrl ftlrgbd ftlstreams ftlrender Threads::Threads ${OpenCV_LIBS} openvr ftlnet nanogui ${NANOGUI_EXTRA_LIBS} ceres nvidia-ml) +target_link_libraries(ftl-gui2 ftlcommon ftldata ftlctrl ftlrgbd ftlstreams ftlrender ftldisparity Threads::Threads ${OpenCV_LIBS} openvr ftlnet nanogui ${NANOGUI_EXTRA_LIBS} ceres nvidia-ml) target_precompile_headers(ftl-gui2 REUSE_FROM ftldata) diff --git a/applications/gui2/src/main.cpp b/applications/gui2/src/main.cpp index ee07557d2848599e4c2af85034bfb0fc887f4bd3..27b5d1714027d741a3aa985d4fe5831edb899284 100644 --- a/applications/gui2/src/main.cpp +++ b/applications/gui2/src/main.cpp @@ -78,8 +78,11 @@ FTLGui::FTLGui(int argc, char **argv) { #endif auto *adder = loadModule<AddCtrl>("adder"); + if (root_->value("dev",false)) loadModule<Developer>("developer"); + for (int c = 1; c < argc; c++) { std::string path(argv[c]); + if (path.size() > 0 && path[0] == '-') continue; try { io_->feed()->add(path); LOG(INFO) << "Add: " << path; diff --git a/applications/gui2/src/modules.hpp b/applications/gui2/src/modules.hpp index 83cac08c1f99aa0289d294751a52c5ff64c49f00..9c17cf769e939cc1b48b4c6588857013814d0e8e 100644 --- a/applications/gui2/src/modules.hpp +++ b/applications/gui2/src/modules.hpp @@ -9,3 +9,4 @@ #include "modules/calibration/calibration.hpp" #endif #include "modules/addsource.hpp" +#include "modules/dev/developer.hpp" diff --git a/applications/gui2/src/modules/dev/developer.cpp b/applications/gui2/src/modules/dev/developer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d2632f404bb0685b966be91c2310e2afb36e3750 --- /dev/null +++ b/applications/gui2/src/modules/dev/developer.cpp @@ -0,0 +1,64 @@ +#include "developer.hpp" +#include <nanogui/entypo.h> +#include <nanogui/layout.h> +#include "../../widgets/popupbutton.hpp" +#include "../../screen.hpp" + +using ftl::gui2::Developer; + +void Developer::init() { + //screen->addModule<ExtrinsicCalibration>("calib_extrinsic", this, screen, io); + //screen->addModule<StereoCalibration>("calib_stereo", this, screen, io); + + // NOTE: If more GUI code is added, consider moving the GUI cude to a new + // file in ../views/ + + // Should implement PopupMenu widget which would abstract building steps + // and provide common feel&look. (TODO) + + auto button = screen->addButton<ftl::gui2::PopupButton>("", ENTYPO_ICON_TOOLS); + button->setChevronIcon(0); + button->setTooltip("Developer Tools"); + + auto* popup = button->popup(); + popup->setLayout(new nanogui::BoxLayout + (nanogui::Orientation::Vertical, nanogui::Alignment::Fill, 10, 6)); + + auto* button_disp = new nanogui::Button(popup, "Disparity Tools"); + button_disp->setCallback([this, button, button_disp, popup](){ + button->setPushed(false); + button_disp->setPushed(false); + button_disp->setFocused(false); + auto* disp = screen->getModuleNoExcept<DisparityDev>(); + //auto* view = new ftl::gui2::IntrinsicCalibrationStart(screen, calib); + //screen->setView(view); + + if (!disp) screen->addModule<DisparityDev>("disparity_dev", this, screen, io); + }); + + /*auto* button_extrinsic = new nanogui::Button(popup, "Extrinsic Calibration"); + button_extrinsic->setCallback([this, button, button_extrinsic, popup](){ + button->setPushed(false); + button_extrinsic->setPushed(false); + button_extrinsic->setFocused(false); + auto* calib = screen->getModule<ExtrinsicCalibration>(); + auto* view = new ftl::gui2::ExtrinsicCalibrationStart(screen, calib); + screen->setView(view); + }); + + auto* button_stereo = new nanogui::Button(popup, "Stereo Calibration"); + button_stereo->setCallback([this, button, button_extrinsic, popup](){ + button->setPushed(false); + button_extrinsic->setPushed(false); + button_extrinsic->setFocused(false); + auto* calib = screen->getModule<StereoCalibration>(); + auto* view = new ftl::gui2::StereoCalibrationStart(screen, calib); + screen->setView(view); + });*/ + + button->setVisible(true); +} + +Developer::~Developer() { + // remove button +} \ No newline at end of file diff --git a/applications/gui2/src/modules/dev/developer.hpp b/applications/gui2/src/modules/dev/developer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bdb52a9d139864e59d573d6208179d90ab27cffe --- /dev/null +++ b/applications/gui2/src/modules/dev/developer.hpp @@ -0,0 +1,102 @@ +#pragma once + +#include "../../module.hpp" +#include "../camera.hpp" +#include <ftl/threads.hpp> +#include <ftl/data/new_frame.hpp> +#include <ftl/disparity/features.hpp> + +namespace ftl { +namespace gui2 { + +class DisparityView; + +class Developer : public Module { + public: + using Module::Module; + void init() override; + + virtual ~Developer(); +}; + +class DisparityDev : public Module { + public: + using Module::Module; + void init() override; + + virtual ~DisparityDev(); + + virtual void activate(ftl::data::FrameID id); + + /** Gets current active frame to display. Always 4 channel uchar4. Reference + * will stay valid until getFrame() is called again. Always returns a + * reference to internal buffer. */ + ftl::cuda::TextureObject<uchar4>& getFrame(); + ftl::cuda::TextureObject<uchar4>& getFrame(ftl::codecs::Channel channel); + bool getFrame(ftl::cuda::TextureObject<uchar4>&); + bool getFrame(ftl::cuda::TextureObject<uchar4>&, ftl::codecs::Channel channel); + + /** Check if new frame is available */ + bool hasFrame(); + + inline bool isLive() const { return live_; } + inline bool isTouchable() const { return touch_; } + inline bool isMovable() const { return movable_; } + inline bool isVR() const { return vr_; } + + void setFocalPoint(int x, int y); + //void setMode(ftl::disparity::Mode m); + const cv::cuda::GpuMat& getFeatureImageLeft(ftl::disparity::ColourFeatures::Feature f); + const cv::cuda::GpuMat& getFeatureImageRight(ftl::disparity::ColourFeatures::Feature f); + //ftl::cuda::TextureObject<uchar4>& getHistogramImage(ftl::disparity::Histogram h); + //ftl::cuda::TextureObject<uchar4>& getDisparityImage(); + //ftl::cuda::TextureObject<uchar4>& getErrorImage(); + //ftl::cuda::TextureObject<uchar4>& getConfidenceImage(); + + void generate(); + + double getLastRuntime(); + + private: + int frame_idx = -1; + ftl::data::FrameID frame_id_; + ftl::stream::Feed::Filter *filter_ = nullptr; + std::atomic_bool paused_ = false; // TODO: implement in InputOutput + bool has_seen_frame_ = false; + bool live_=false; + bool touch_=false; + bool movable_=false; + bool vr_=false; + float last_=0.0f; + std::atomic_int16_t nframes_=0; + std::atomic_int64_t latency_=0; + int update_fps_freq_=30; // fps counter update frequency (frames) + + ftl::data::FrameSetPtr current_fs_; + ftl::data::FrameSetPtr latest_; + ftl::cuda::TextureObject<uchar4> current_frame_; + ftl::cuda::TextureObject<uchar4> current_frame_colour_; + + std::unique_ptr<ftl::render::Colouriser> colouriser_; + std::unique_ptr<ftl::overlay::Overlay> overlay_; + + cv::cuda::GpuMat left_; + cv::cuda::GpuMat right_; + + ftl::disparity::ColourFeatures col_feat_left_; + ftl::disparity::ColourFeatures col_feat_right_; + + + + std::map<ftl::data::Message,std::string> messages_; + + DisparityView* view = nullptr; + + MUTEX mtx_; + + void initiate_(ftl::data::Frame &frame); + void _updateCapabilities(ftl::data::Frame &frame); +}; + +} +} \ No newline at end of file diff --git a/applications/gui2/src/modules/dev/disparity.cpp b/applications/gui2/src/modules/dev/disparity.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c4218615c4e28f420a056e868ccf669ca8a3843d --- /dev/null +++ b/applications/gui2/src/modules/dev/disparity.cpp @@ -0,0 +1,217 @@ +#include "developer.hpp" +#include <loguru.hpp> + +#include "../../views/dev/disparityview.hpp" +#include <ftl/codecs/channels.hpp> + +using ftl::gui2::DisparityDev; +using ftl::codecs::Channel; +using ftl::rgbd::Capability; + +void DisparityDev::init() { + colouriser_ = std::unique_ptr<ftl::render::Colouriser>( + ftl::create<ftl::render::Colouriser>(this, "colouriser")); +} + +void DisparityDev::_updateCapabilities(ftl::data::Frame &frame) { + if (frame.has(Channel::Capabilities)) { + live_ = false; + touch_ = false; + movable_ = false; + vr_ = false; + + const auto &cap = frame.get<std::unordered_set<Capability>>(Channel::Capabilities); + + for (auto c : cap) { + switch (c) { + case Capability::LIVE : live_ = true; break; + case Capability::TOUCH : touch_ = true; break; + case Capability::MOVABLE : movable_ = true; break; + case Capability::VR : vr_ = true; break; + default: break; + } + } + } +} + +void DisparityDev::initiate_(ftl::data::Frame &frame) { + if (frame.has(Channel::Capabilities)) { + const auto &rgbdf = frame.cast<ftl::rgbd::Frame>(); + const auto &cap = rgbdf.capabilities(); + for (auto c : cap) { + LOG(INFO) << " -- " << ftl::rgbd::capabilityName(c); + + switch (c) { + case Capability::LIVE : live_ = true; break; + case Capability::TOUCH : touch_ = true; break; + case Capability::MOVABLE : movable_ = true; break; + case Capability::VR : vr_ = true; break; + default: break; + } + } + + if (live_ && cap.count(Capability::VIRTUAL)) { + //view = new ftl::gui2::CameraView3D(screen, this); + } else { + //view = new ftl::gui2::CameraView(screen, this); + } + } else { + ///view = new ftl::gui2::CameraView(screen, this); + } + + has_seen_frame_ = true; + view = new ftl::gui2::DisparityView(screen, this); + + if (frame.has(Channel::MetaData)) { + const auto &meta = frame.metadata(); + LOG(INFO) << "Camera Frame Meta Data:"; + for (auto m : meta) { + LOG(INFO) << " -- " << m.first << " = " << m.second; + } + } + + if (!view) return; + + view->onClose([this](){ + filter_->remove(); + filter_ = nullptr; + nframes_ = -1; + + /*auto *mod = this->screen->getModule<ftl::gui2::Statistics>(); + + mod->getJSON(StatisticsPanel::PERFORMANCE_INFO).clear(); + mod->getJSON(StatisticsPanel::MEDIA_STATUS).clear(); + mod->getJSON(StatisticsPanel::MEDIA_META).clear(); + mod->getJSON(StatisticsPanel::CAMERA_DETAILS).clear();*/ + }); + + screen->setView(view); + view->refresh(); +} + +void DisparityDev::activate(ftl::data::FrameID id) { + LOG(INFO) << "DISP DEV ACTIVATE"; + frame_idx = id.source(); + frame_id_ = id; + last_ = glfwGetTime(); + nframes_ = 0; + // Clear the members to defaults + has_seen_frame_ = false; + live_ = false; + touch_ = false; + movable_ = false; + vr_ = false; + + filter_ = io->feed()->filter(std::unordered_set<unsigned int>{id.frameset()}, {Channel::Left, Channel::Right}); + filter_->on( + [this, feed = io->feed(), speaker = io->speaker()](ftl::data::FrameSetPtr fs){ + std::atomic_store(¤t_fs_, fs); + std::atomic_store(&latest_, fs); + + // Need to notify GUI thread when first data comes + if (!has_seen_frame_) { + //std::unique_lock<std::mutex> lk(m); + has_seen_frame_ = true; + //cv.notify_one(); + } + + // Extract and record any frame messages + auto &frame = fs->frames[frame_idx]; + if (frame.hasMessages()) { + const auto &msgs = frame.messages(); + //auto &jmsgs = mod->getJSON(StatisticsPanel::LOGGING); + + UNIQUE_LOCK(mtx_, lk); + messages_.insert(msgs.begin(), msgs.end()); + } + + // Some capabilities can change over time + if (frame.changed(Channel::Capabilities)) { + _updateCapabilities(frame); + } + + if (!view) return true; + + screen->redraw(); + nframes_++; + latency_ += ftl::timer::get_time() - fs->localTimestamp; + return true; + } + ); + + auto sets = filter_->getLatestFrameSets(); + if (sets.size() > 0) { + std::atomic_store(¤t_fs_, sets.front()); + std::atomic_store(&latest_, sets.front()); + initiate_(sets.front()->frames[frame_idx]); + } else { + throw FTL_Error("Cannot activate disparity devtools, no data"); + } +} + +DisparityDev::~DisparityDev() { + +} + +ftl::cuda::TextureObject<uchar4>& DisparityDev::getFrame() { + return getFrame(Channel::Right); +} + +ftl::cuda::TextureObject<uchar4>& DisparityDev::getFrame(ftl::codecs::Channel channel) { + if (std::atomic_load(¤t_fs_)) { + auto& frame = current_fs_->frames[frame_idx].cast<ftl::rgbd::Frame>(); + + if (frame.hasChannel(Channel::Left)) current_frame_colour_ = frame.getTexture<uchar4>(Channel::Left); + + if (frame.hasChannel(channel)) { + current_frame_ = colouriser_->colourise(frame, channel, 0); + } else { + throw FTL_Error("Channel missing for frame " << frame.timestamp() << ": '" << ftl::codecs::name(channel) << "'"); + } + std::atomic_store(¤t_fs_, {}); + } + if (channel == Channel::Left) { return current_frame_colour_; } + else { return current_frame_; } +} + +bool DisparityDev::getFrame(ftl::cuda::TextureObject<uchar4>& frame, ftl::codecs::Channel channel) { + if (std::atomic_load(¤t_fs_).get() != nullptr) { + frame = getFrame(); + return true; + } + return false; +} + +bool DisparityDev::getFrame(ftl::cuda::TextureObject<uchar4>& frame) { + return getFrame(frame, Channel::Right); +} + +bool DisparityDev::hasFrame() { + auto ptr = std::atomic_load(¤t_fs_); + if (ptr && ptr->frames.size() > (unsigned int)(frame_idx)) { + return ptr->frames[frame_idx].hasChannel(Channel::Left); + } + return false; +} + +void DisparityDev::generate() { + auto ptr = std::atomic_load(¤t_fs_); + if (ptr && ptr->frames.size() > (unsigned int)(frame_idx)) { + if (ptr->frames[frame_idx].hasChannel(Channel::Left)) { + col_feat_left_.generate(ptr->frames[frame_idx].get<cv::cuda::GpuMat>(Channel::Left), nullptr); + } + if (ptr->frames[frame_idx].hasChannel(Channel::Right)) { + col_feat_right_.generate(ptr->frames[frame_idx].get<cv::cuda::GpuMat>(Channel::Right), nullptr); + } + } +} + +const cv::cuda::GpuMat& DisparityDev::getFeatureImageLeft(ftl::disparity::ColourFeatures::Feature f) { + col_feat_left_.visualise(f, 0, left_, nullptr); + return left_; +} + +const cv::cuda::GpuMat& DisparityDev::getFeatureImageRight(ftl::disparity::ColourFeatures::Feature f) { + col_feat_right_.visualise(f, 0, right_, nullptr); + return right_; +} diff --git a/applications/gui2/src/modules/thumbnails.cpp b/applications/gui2/src/modules/thumbnails.cpp index f23dd97ee99abb01d5e0004befc5d21a40b28374..f94b446cb8980057bff78e20ba5bb8dc2654aefe 100644 --- a/applications/gui2/src/modules/thumbnails.cpp +++ b/applications/gui2/src/modules/thumbnails.cpp @@ -1,4 +1,5 @@ #include "thumbnails.hpp" +#include "dev/developer.hpp" #include "../views/thumbnails.hpp" #include "camera.hpp" @@ -71,6 +72,11 @@ std::vector<ftl::data::FrameSetPtr> ThumbnailsController::getFrameSets() { } void ThumbnailsController::show_camera(ftl::data::FrameID id) { - auto* camera = screen->getModule<ftl::gui2::Camera>(); - camera->activate(id); + auto *dispdev = screen->getModuleNoExcept<ftl::gui2::DisparityDev>(); + if (dispdev) { + dispdev->activate(id); + } else { + auto* camera = screen->getModule<ftl::gui2::Camera>(); + camera->activate(id); + } } diff --git a/applications/gui2/src/screen.hpp b/applications/gui2/src/screen.hpp index a5e47d6a3fab057a8e8633a012f53511f7dc6c25..a5adee68fc58f6017aaeababb5ec75b0f536fef2 100644 --- a/applications/gui2/src/screen.hpp +++ b/applications/gui2/src/screen.hpp @@ -58,6 +58,9 @@ public: template<typename T> T* getModule(); + template<typename T> + T* getModuleNoExcept(); + // prever above template (explicit who manages delete) // template<typename T> // T* addModule(T* ptr) { return addModule_(ptr); } @@ -149,6 +152,20 @@ T* Screen::getModule() { throw ftl::exception("module not found"); } +template<typename T> +T* Screen::getModuleNoExcept() { + static_assert(std::is_base_of<Module, T>::value); + + for (auto& [name, ptr] : modules_) { + std::ignore = name; + if (typeid(*ptr) == typeid(T)) { + return dynamic_cast<T*>(ptr); + } + } + + return nullptr; +} + template<typename T, typename ... Args> T* Screen::addButton(Args ... args) { static_assert(std::is_base_of<nanogui::Button, T>::value); diff --git a/applications/gui2/src/views/dev/disparityview.cpp b/applications/gui2/src/views/dev/disparityview.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d031963b2341702f5cc861558762d98c75b3d523 --- /dev/null +++ b/applications/gui2/src/views/dev/disparityview.cpp @@ -0,0 +1,224 @@ +#include <nanogui/screen.h> +#include <nanogui/layout.h> +#include <nanogui/button.h> +#include <nanogui/vscrollpanel.h> +#include <ftl/utility/string.hpp> + +#include "disparityview.hpp" + +#include "../../modules/dev/developer.hpp" +#include "../../modules/config.hpp" + +#include "../../widgets/popupbutton.hpp" + +#include <loguru.hpp> + +using ftl::gui2::DisparityDev; +using ftl::gui2::FixedWindow; +using ftl::gui2::DisparityView; +using ftl::gui2::PopupButton; +using ftl::gui2::Tools; +using ftl::gui2::ToolGroup; + +using ftl::codecs::Channel; + +// ==== CameraView ============================================================= + +DisparityView::DisparityView(ftl::gui2::Screen* parent, ftl::gui2::DisparityDev* ctrl) : + View(parent), ctrl_(ctrl), + stereoim_(nullptr) { + + //imview_ = new ftl::gui2::FTLImageView(this); + //panel_ = new ftl::gui2::MediaPanel(screen(), ctrl, this); + //tools_ = new ftl::gui2::ToolPanel(screen(), ctrl, this); + + stereoim_ = new StereoImageView(this); + imview_ = stereoim_->right(); + + //imview_->setFlipped(ctrl->isVR()); + + /*auto *mod = ctrl_->screen->getModule<ftl::gui2::Statistics>(); + if (ctrl_->isMovable()) { + imview_->setCursor(nanogui::Cursor::Hand); + mod->setCursor(nanogui::Cursor::Hand); + } else { + imview_->setCursor(nanogui::Cursor::Crosshair); + mod->setCursor(nanogui::Cursor::Crosshair); + }*/ + + auto theme = dynamic_cast<ftl::gui2::Screen*>(screen())->getTheme("toolbutton"); + //this->setTheme(theme); + + context_menu_ = new nanogui::Window(parent, ""); + context_menu_->setVisible(false); + context_menu_->setLayout(new nanogui::BoxLayout(nanogui::Orientation::Vertical)); + context_menu_->setTheme(theme); + + screen()->performLayout(); + + /*auto *button = new nanogui::Button(context_menu_, "Capture Image"); + button->setCallback([this]() { + char timestamp[18]; + std::time_t t=std::time(NULL); + std::strftime(timestamp, sizeof(timestamp), "%F-%H%M%S", std::localtime(&t)); + context_menu_->setVisible(false); + ctrl_->snapshot(std::string(timestamp)+std::string(".png")); + }); + + button = new nanogui::Button(context_menu_, "Settings"); + button->setCallback([this, button]() { + context_menu_->setVisible(false); + ctrl_->screen->getModule<ftl::gui2::ConfigCtrl>()->show(ctrl_->getID()); + });*/ + + /*tools_->setAvailable({ + Tools::SELECT_POINT, + Tools::OVERLAY, + Tools::PAN, + Tools::ZOOM_FIT, + Tools::ZOOM_IN, + Tools::ZOOM_OUT, + Tools::CENTRE_VIEW, + Tools::INSPECT_POINT + }); + + tools_->addCallback([this](ftl::gui2::Tools tool) { + switch (tool) { + case Tools::OVERLAY : ctrl_->toggleOverlay(); return true; + case Tools::ZOOM_FIT : imview_->fit(); return true; + case Tools::CENTRE_VIEW : imview_->center(); return true; + //case CameraTools::ZOOM_OUT : imview_->zoom(-1, imview_->sizeF() / 2); return true; + //case CameraTools::ZOOM_IN : imview_->zoom(1, imview_->sizeF() / 2); return true; + default: return false; + } + });*/ +} + +DisparityView::~DisparityView() { + if (parent()->getRefCount() > 0) { + // segfault without this check; nanogui already deleted windows? + // should be fixed in nanogui + //panel_->dispose(); + //tools_->dispose(); + } + + if (context_menu_->parent()->getRefCount() > 0) { + context_menu_->setVisible(false); + context_menu_->dispose(); + } +} + +void DisparityView::refresh() { + bool was_valid = imview_->texture().isValid(); + + if (ctrl_->hasFrame()) { + ctrl_->generate(); + //imview_->copyFrom(ctrl_->getFrame()); + //stereoim_->left()->copyFrom(ctrl_->getFrame(Channel::Left)); + imview_->copyFrom(ctrl_->getFeatureImageRight(ftl::disparity::ColourFeatures::Feature::ALL)); + stereoim_->left()->copyFrom(ctrl_->getFeatureImageLeft(ftl::disparity::ColourFeatures::Feature::ALL)); + } + if (!was_valid && imview_->texture().isValid()) { + screen()->performLayout(); + } +} + +bool DisparityView::mouseMotionEvent(const Eigen::Vector2i &p, const Eigen::Vector2i &rel, int button, int modifiers) { + //if (button == 1) { + + /*if (tools_->isActive(Tools::SELECT_POINT)) { + auto pos = imview_->imageCoordinateAt((p - mPos + rel).cast<float>()); + if (pos.x() >= 0.0f && pos.y() >= 0.0f) { + ctrl_->touch(0, ftl::codecs::TouchType::MOUSE_LEFT, pos.x(), pos.y(), 0.0f, (button > 0) ? 255 : 0); + + //LOG(INFO) << "Depth at " << pos.x() << "," << pos.y() << " = " << ctrl_->depthAt(pos.x(), pos.y()); + } + }*/ + return true; + //} + return false; +} + +bool DisparityView::mouseButtonEvent(const Eigen::Vector2i &p, int button, bool down, int modifiers) { + //LOG(INFO) << "mouseButtonEvent: " << p << " - " << button; + if (button == 0) { + if (down) { + auto pos = imview_->imageCoordinateAt((p - mPos).cast<float>()); + LOG(INFO) << "Use focal point at " << pos.x() << "," << pos.y(); + + } + + context_menu_->setVisible(false); + return true; + } else if (button == 1) { + if (!down) { + context_menu_->setPosition(p - mPos); + context_menu_->setVisible(true); + return true; + } + } else { + context_menu_->setVisible(false); + } + return false; +} + +void DisparityView::draw(NVGcontext*ctx) { + using namespace nanogui; + + if (ctrl_->hasFrame()) { + ctrl_->generate(); + + try { + // TODO: Select shader to flip if VR capability found... + imview_->copyFrom(ctrl_->getFeatureImageRight(ftl::disparity::ColourFeatures::Feature::ALL)); + if (stereoim_) { + stereoim_->left()->copyFrom(ctrl_->getFeatureImageLeft(ftl::disparity::ColourFeatures::Feature::ALL)); + } + } + catch (std::exception& e) { + gui()->showError("Exception", e.what()); + } + + + /*try { + // TODO: Select shader to flip if VR capability found... + imview_->copyFrom(ctrl_->getFrame()); + if (stereoim_) { + stereoim_->left()->copyFrom(ctrl_->getFrame(Channel::Left)); + } + } + catch (std::exception& e) { + gui()->showError("Exception", e.what()); + }*/ + } + View::draw(ctx); + + auto osize = imview_->scaledImageSizeF(); + //ctrl_->drawOverlay(ctx, screen()->size().cast<float>(), osize, imview_->offset()); + + /*if (tools_->isActive(Tools::INSPECT_POINT)) { + auto mouse = screen()->mousePos(); + auto pos = imview_->imageCoordinateAt((mouse - mPos).cast<float>()); + float d = ctrl_->depthAt(pos.x(), pos.y()); + + if (d > 0.0f) { + nvgText(ctx, mouse.x()+25.0f, mouse.y()+20.0f, (to_string_with_precision(d,2) + std::string("m")).c_str(), nullptr); + } + }*/ +} + +void DisparityView::performLayout(NVGcontext* ctx) { + if (stereoim_) { + stereoim_->setFixedSize(size()); + //if (!(enable_zoom_ && enable_pan_)) { + stereoim_->fit(); + //} + } + else { + imview_->setSize(size()); + //if (!(enable_zoom_ && enable_pan_)) { + imview_->fit(); + //} + } + View::performLayout(ctx); +} diff --git a/applications/gui2/src/views/dev/disparityview.hpp b/applications/gui2/src/views/dev/disparityview.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a26021972bf15458fdc8df6a8f43f34e923eff25 --- /dev/null +++ b/applications/gui2/src/views/dev/disparityview.hpp @@ -0,0 +1,46 @@ +#pragma once + +#include "../../view.hpp" + +#include <ftl/utility/gltexture.hpp> + +#include "../../widgets/window.hpp" +#include "../../widgets/imageview.hpp" +#include "../../widgets/popupbutton.hpp" +#include "../../modules/camera_tools.hpp" + +#include "../camera.hpp" + +namespace ftl { +namespace gui2 { + +class DisparityDev; + +class DisparityView : public View { +public: + DisparityView(Screen* parent, DisparityDev* ctrl); + virtual ~DisparityView(); + + virtual void draw(NVGcontext* ctx) override; + virtual void performLayout(NVGcontext* ctx) override; + virtual bool mouseButtonEvent(const Eigen::Vector2i &p, int button, bool down, int modifiers) override; + virtual bool mouseMotionEvent(const Eigen::Vector2i &p, const Eigen::Vector2i &rel, int button, int modifiers) override; + + void refresh(); + +protected: + DisparityDev* ctrl_; + //MediaPanel* panel_; + ToolPanel* tools_; + FTLImageView* imview_; + nanogui::Window *context_menu_; + +private: + StereoImageView* stereoim_; + +public: + EIGEN_MAKE_ALIGNED_OPERATOR_NEW +}; + +} +} diff --git a/components/common/cpp/include/ftl/cuda_common.hpp b/components/common/cpp/include/ftl/cuda_common.hpp index 59053e4b2196779bfa4bb16e84431b00fd36594e..10f03281b4cecd689435b00866423fe8dd6839d1 100644 --- a/components/common/cpp/include/ftl/cuda_common.hpp +++ b/components/common/cpp/include/ftl/cuda_common.hpp @@ -17,6 +17,21 @@ #include <exception> #endif +#define printLastCudaError(msg) __printLastCudaError(msg, __FILE__, __LINE__) + +inline void __printLastCudaError(const char *errorMessage, const char *file, + const int line) { + cudaError_t err = cudaGetLastError(); + + if (cudaSuccess != err) { + fprintf(stderr, + "%s(%i) : getLastCudaError() CUDA error :" + " %s : (%d) %s.\n", + file, line, errorMessage, static_cast<int>(err), + cudaGetErrorString(err)); + } +} + /* Grid stride loop macros */ #define STRIDE_Y(I,N) int I = blockIdx.y * blockDim.y + threadIdx.y; I < N; I += blockDim.y * gridDim.y #define STRIDE_X(I,N) int I = blockIdx.x * blockDim.x + threadIdx.x; I < N; I += blockDim.x * gridDim.x diff --git a/components/disparity/CMakeLists.txt b/components/disparity/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..6c3bb715250b7f73da774e23992edb7c10b3e580 --- /dev/null +++ b/components/disparity/CMakeLists.txt @@ -0,0 +1,20 @@ +set(DISPSRC + src/features.cu +) + +add_library(ftldisparity ${DISPSRC}) + +target_include_directories(ftldisparity PUBLIC + $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> + $<INSTALL_INTERFACE:include> + PRIVATE src) + +target_link_libraries(ftldisparity ftlcommon cudatl Eigen3::Eigen Threads::Threads ${OpenCV_LIBS}) + +target_precompile_headers(ftldisparity REUSE_FROM ftlcommon) + +set_property(TARGET ftldisparity PROPERTY CUDA_ARCHITECTURES OFF) + +#if (BUILD_TESTS) +# add_subdirectory(test) +#endif() diff --git a/components/disparity/include/ftl/disparity/features.hpp b/components/disparity/include/ftl/disparity/features.hpp new file mode 100644 index 0000000000000000000000000000000000000000..08883ab3deb49f17974176ac53d45afca157d68b --- /dev/null +++ b/components/disparity/include/ftl/disparity/features.hpp @@ -0,0 +1,47 @@ +#ifndef _FTL_DISPARITY_FEATURES_HPP_ +#define _FTL_DISPARITY_FEATURES_HPP_ + +#include <cuda_runtime.h> +#include <opencv2/core/cuda.hpp> + +namespace ftl { +namespace disparity { + +class ColourFeatures { + public: + ColourFeatures(); + ~ColourFeatures(); + + enum class Feature { + ALL = 0, + RED = 0x01, + GREEN = 0x02, + BLUE = 0x04, + RED_GREEN = 0x03, + RED_BLUE = 0x05, + BLUE_GREEN = 0x06, + WHITE = 0x07 + }; + + void generate( + const cv::cuda::GpuMat &image, + cudaStream_t + ); + + void visualise( + Feature f, + int threshold, + cv::cuda::GpuMat &out, + cudaStream_t + ); + + private: + cv::cuda::GpuMat hls_; + cv::cuda::GpuMat sig_; + cv::cuda::GpuMat category_; +}; + +} +} + +#endif \ No newline at end of file diff --git a/components/disparity/src/features.cu b/components/disparity/src/features.cu new file mode 100644 index 0000000000000000000000000000000000000000..17c3ff625a769550c5511561fa480b73dde63623 --- /dev/null +++ b/components/disparity/src/features.cu @@ -0,0 +1,263 @@ +#include <ftl/disparity/features.hpp> +#include <ftl/cuda_common.hpp> +#include <cudatl/colours.hpp> +#include <opencv2/cudaimgproc.hpp> + +using ftl::disparity::ColourFeatures; + +ColourFeatures::ColourFeatures() { + +} + +ColourFeatures::~ColourFeatures() { + +} + +inline __device__ float absmax(float a, float b, float c) { + const float aa = fabsf(a); + const float ab = fabsf(b); + const float ac = fabsf(c); + if (aa >= ab && aa >= ac) return a; + if (ab >= aa && ab >= ac) return b; + if (ac >= aa && ac >= ab) return c; + return 0.0f; +} + +template <int RADIUS> +__global__ void colour_features_kernel( + const uchar4* __restrict__ image, + int image_pitch, + int width, int height, + uchar* __restrict__ sig, + uchar* __restrict__ category, + int pitch +) { + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + static constexpr float PIXEL_COUNT_I = ((2*RADIUS+1)*(2*RADIUS+1)); + static constexpr float PIXEL_COUNT = float(PIXEL_COUNT_I); + + if (x >= RADIUS && y >= RADIUS && x < width-RADIUS && y < height-RADIUS) { + uchar4 c = image[x+y*image_pitch]; + int maxR = 0; + int maxG = 0; + int maxB = 0; + + // First, find greatest difference of immediate surroundings. + for (int v=-1; v<=1; ++v) { + #pragma unroll + for (int u=-1; u<=1; ++u) { + uchar4 cN = image[x+u+(y+v)*image_pitch]; + maxR = max(maxR, abs(int(cN.z) - int(c.z))); + maxG = max(maxG, abs(int(cN.y) - int(c.y))); + maxB = max(maxB, abs(int(cN.x) - int(c.x))); + } + } + + int match_count_r = 0; + int match_count_g = 0; + int match_count_b = 0; + float match_r_val = 0.0f; + float nonmatch_r_val = 0.0f; + float match_g_val = 0.0f; + float nonmatch_g_val = 0.0f; + float match_b_val = 0.0f; + float nonmatch_b_val = 0.0f; + + for (int v=-RADIUS; v<=RADIUS; ++v) { + for (int u=-RADIUS; u<=RADIUS; ++u) { + uchar4 cN = image[x+u+(y+v)*image_pitch]; + if (abs(int(cN.z) - int(c.z)) < maxR) { + ++match_count_r; + match_r_val += cN.z; + } else { + nonmatch_r_val += cN.z; + } + if (abs(int(cN.y) - int(c.y)) < maxG) { + ++match_count_g; + match_g_val += cN.y; + } else { + nonmatch_g_val += cN.y; + } + if (abs(int(cN.x) - int(c.x)) < maxB) { + ++match_count_b; + match_b_val += cN.x; + } else { + nonmatch_b_val += cN.x; + } + } + } + + match_r_val /= match_count_r; + nonmatch_r_val /= PIXEL_COUNT_I - match_count_r; + match_g_val /= match_count_g; + nonmatch_g_val /= PIXEL_COUNT_I - match_count_g; + match_b_val /= match_count_b; + nonmatch_b_val /= PIXEL_COUNT_I - match_count_b; + + float sim_r = (fabsf(float(c.z) - match_r_val) / 255.0f); + float diff_r = fabsf(match_r_val - nonmatch_r_val) / 255.0f; + float sig_r = fabsf(float(match_count_r) / PIXEL_COUNT - 0.5f)*2.0f; + sig_r = 1.0f - sig_r; + sig_r *= 1.0f - sim_r; + //sig_r *= diff_r; + //sig_r = (1.0f - sim_r)*diff_r; + //sig_r *= min(1.0f, (float(maxR) / 60.0f)); + + float sim_g = (fabsf(float(c.y) - match_g_val) / 255.0f); + float diff_g = fabsf(match_g_val - nonmatch_g_val) / 255.0f; + float sig_g = fabsf(float(match_count_g) / PIXEL_COUNT - 0.5f)*2.0f; + sig_g = 1.0f - sig_g; + sig_g *= 1.0f - sim_g; + //sig_g *= diff_g; + //sig_g = (1.0f - sim_g)*diff_g; + //sig_g *= min(1.0f, (float(maxG) / 60.0f)); + + float sim_b = (fabsf(float(c.x) - match_b_val) / 255.0f); + float diff_b = fabsf(match_b_val - nonmatch_b_val) / 255.0f; + float sig_b = fabsf(float(match_count_b) / PIXEL_COUNT - 0.5f)*2.0f; + sig_b = 1.0f - sig_b; + sig_b *= 1.0f - sim_b; + //sig_b *= diff_b; + //sig_b = (1.0f - sim_r)*diff_b; + //sig_b *= min(1.0f, (float(maxB) / 60.0f)); + + uchar3 hsv = cudatl::rgb2hsv(match_r_val, match_g_val, match_b_val); + category[x+y*pitch] = hsv.x; //0.2126f * match_r_val + 0.7152f * match_g_val + 0.0722f * match_b_val; + + if (match_r_val < nonmatch_r_val) sig_r = -sig_r; + if (match_g_val < nonmatch_g_val) sig_g = -sig_g; + if (match_b_val < nonmatch_b_val) sig_b = -sig_b; + const float msig = absmax(sig_r, sig_g, sig_b); + sig[x+y*pitch] = char(msig * 127.0f); + } +} + +__global__ void thin_features_kernel( + uchar* __restrict__ sig, + int pitch, + int width, int height +) { + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x >= 1 && y >= 1 && x < width-1 && y < height-1) { + const char nP = sig[x-1+y*pitch]; + const char n = sig[x+y*pitch]; + const char nN = sig[x+1+y*pitch]; + + uchar v = 0; + if ((nP < 0 && n > 0) || (nP > 0 && n < 0)) v = max(n, nN); + else if ((nN > 0 && n < 0) || (nN < 0 && n > 0)) v = max(n, nP); + + sig[x+y*pitch] = v; + } +} + +void ColourFeatures::generate( + const cv::cuda::GpuMat &image, + cudaStream_t stream +) { + cv::cuda::cvtColor(image, hls_, cv::COLOR_BGR2Lab, 4); + sig_.create(image.size(), CV_8UC1); + category_.create(image.size(), CV_8UC1); + + static constexpr int THREADS_X = 16; + static constexpr int THREADS_Y = 8; + + const dim3 gridSize((image.cols + THREADS_X - 1)/THREADS_X, (image.rows + THREADS_Y - 1)/THREADS_Y); + const dim3 blockSize(THREADS_X, THREADS_Y); + + colour_features_kernel<3><<<gridSize, blockSize, 0, stream>>>( + hls_.ptr<uchar4>(), + image.step1() / 4, + image.cols, image.rows, + sig_.ptr<uchar>(), + category_.ptr<uchar>(), + sig_.step1() + ); + + printLastCudaError("Generating features error"); + + thin_features_kernel<<<gridSize, blockSize, 0, stream>>>( + sig_.ptr<uchar>(), + sig_.step1(), + sig_.cols, sig_.rows + ); + + printLastCudaError("Thin features error"); +} + +__global__ void vis_colour_features( + const uchar* __restrict__ sig, + const uchar* __restrict__ category, + int pitch, + uchar4* __restrict__ out, + int out_pitch, + int width, int height, + ColourFeatures::Feature feature, + int threshold +) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < width && y < height) { + int s = char(sig[x+y*pitch]); + const uchar c = category[x+y*pitch]; + + /*s = -s; + + out[x+y*out_pitch] = (s >= 0) ? make_uchar4( + (c & 0x04) ? s*2 : 0, + (c & 0x02) ? s*2 : 0, + (c & 0x01) ? s*2 : 0, + 255 + ) : make_uchar4(0,0,0,0);*/ + + //s = min(255, s*4); + + out[x+y*out_pitch] = make_uchar4(0,0,0,0); + //if (abs(s) >= 2) { + //uchar3 rgb = cudatl::hsv2rgb(c, uchar(255), uchar(abs(s*2))); + //out[x+y*out_pitch] = make_uchar4(rgb.z, rgb.y, rgb.x, 255); + + out[x+y*out_pitch] = (s >= 0) ? make_uchar4( + s*2, 0, 0, 255 + ) : make_uchar4(0,0,-s*2,255); + //} + /*if (abs(s) > 1) { + out[x+y*out_pitch] = (s > 0) ? make_uchar4( + 0, c, 0, 255 + ) : make_uchar4(0,0,c,255); + }*/ + } +} + +void ColourFeatures::visualise( + ColourFeatures::Feature f, + int threshold, + cv::cuda::GpuMat &out, + cudaStream_t stream +) { + out.create(sig_.size(), CV_8UC4); + + static constexpr int THREADS_X = 16; + static constexpr int THREADS_Y = 8; + + const dim3 gridSize((out.cols + THREADS_X - 1)/THREADS_X, (out.rows + THREADS_Y - 1)/THREADS_Y); + const dim3 blockSize(THREADS_X, THREADS_Y); + + vis_colour_features<<<gridSize, blockSize, 0, stream>>>( + sig_.ptr<uchar>(), + category_.ptr<uchar>(), + sig_.step1(), + out.ptr<uchar4>(), + out.step1()/4, + out.cols, out.rows, + f, + threshold + ); + + printLastCudaError("Visualising features error"); +} diff --git a/lib/cudatl/CMakeLists.txt b/lib/cudatl/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..1ea12947a402e666df6603a79bb2250d3415c2b6 --- /dev/null +++ b/lib/cudatl/CMakeLists.txt @@ -0,0 +1,14 @@ + +add_library(cudatl INTERFACE) + +target_include_directories(cudatl INTERFACE + ${CMAKE_CURRENT_SOURCE_DIR}/include) + +#target_link_libraries(ftldata ftlcommon Eigen3::Eigen ftlcodecs) + +#set_property(TARGET ftldata PROPERTY CUDA_ARCHITECTURES OFF) + +#if (BUILD_TESTS) +#add_subdirectory(test) +#endif() + diff --git a/lib/cudatl/include/cudatl/colours.hpp b/lib/cudatl/include/cudatl/colours.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d706be4963e6ac0d389c79bc6fcddd9a70da4050 --- /dev/null +++ b/lib/cudatl/include/cudatl/colours.hpp @@ -0,0 +1,93 @@ +#ifndef _CUDATL_COLOURS_HPP_ +#define _CUDATL_COLOURS_HPP_ + +#include <cuda_runtime.h> + +namespace cudatl { + +/* From NVIDIA Npp */ +template <typename T> +__device__ uchar3 rgb2hsv(T r, T g, T b) { + const float nNormalizedR = float(r) * 0.003921569F; // / 255.0F + const float nNormalizedG = float(g) * 0.003921569F; + const float nNormalizedB = float(b) * 0.003921569F; + float nS; + float nH; + // Value + float nV = fmaxf(nNormalizedR, nNormalizedG); + nV = fmaxf(nV, nNormalizedB); + // Saturation + float nTemp = fminf(nNormalizedR, nNormalizedG); + nTemp = fminf(nTemp, nNormalizedB); + float nDivisor = nV - nTemp; + if (nV == 0.0F) // achromatics case + { + nS = 0.0F; + nH = 0.0F; + } + else // chromatics case + nS = nDivisor / nV; + // Hue: + const float nCr = (nV - nNormalizedR) / nDivisor; + const float nCg = (nV - nNormalizedG) / nDivisor; + const float nCb = (nV - nNormalizedB) / nDivisor; + if (nNormalizedR == nV) + nH = nCb - nCg; + else if (nNormalizedG == nV) + nH = 2.0F + nCr - nCb; + else if (nNormalizedB == nV) + nH = 4.0F + nCg - nCr; + nH = nH * 0.166667F; // / 6.0F + if (nH < 0.0F) + nH = nH + 1.0F; + + return make_uchar3(nH * 255.0f, nS * 255.0f, nV* 255.0f); +} + +template <typename T> +__device__ inline uchar3 bgr2hsv(T bgr) { + return rgb2hsv(bgr.z, bgr.y, bgr.x); +} + +template <typename T> +__device__ inline uchar3 hsv2rgb(T h, T s, T v) { + float nNormalizedH = float(h) * 0.003921569F; // / 255.0F + const float nNormalizedS = float(s) * 0.003921569F; + const float nNormalizedV = float(v) * 0.003921569F; + float nR; + float nG; + float nB; + if (nNormalizedS == 0.0F) + { + nR = nG = nB = nNormalizedV; + } + else + { + if (nNormalizedH == 1.0F) + nNormalizedH = 0.0F; + else + nNormalizedH = nNormalizedH * 6.0F; // / 0.1667F + } + const float nI = floorf(nNormalizedH); + const float nF = nNormalizedH - nI; + const float nM = nNormalizedV * (1.0F - nNormalizedS); + const float nN = nNormalizedV * (1.0F - nNormalizedS * nF); + const float nK = nNormalizedV * (1.0F - nNormalizedS * (1.0F - nF)); + if (nI == 0.0F) + { nR = nNormalizedV; nG = nK; nB = nM; } + else if (nI == 1.0F) + { nR = nN; nG = nNormalizedV; nB = nM; } + else if (nI == 2.0F) + { nR = nM; nG = nNormalizedV; nB = nK; } + else if (nI == 3.0F) + { nR = nM; nG = nN; nB = nNormalizedV; } + else if (nI == 4.0F) + { nR = nK; nG = nM; nB = nNormalizedV; } + else if (nI == 5.0F) + { nR = nNormalizedV; nG = nM; nB = nN; } + return make_uchar3(nR * 255.0f, nG * 255.0f, nB * 255.0f); +} + +} + +#endif \ No newline at end of file diff --git a/lib/cudatl/include/cudatl/halfwarp.hpp b/lib/cudatl/include/cudatl/halfwarp.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3085381ae4d6b8ca33f2e690232e7204828f6832 --- /dev/null +++ b/lib/cudatl/include/cudatl/halfwarp.hpp @@ -0,0 +1,41 @@ +#ifndef _CUDATL_HALFWARP_HPP_ +#define _CUDATL_HALFWARP_HPP_ + +#include <cuda_runtime.h> + +namespace cudatl { + +static constexpr int HALF_WARP_SIZE = 16; +static constexpr unsigned int HALF_MASK1 = 0xFFFF0000; +static constexpr unsigned int HALF_MASK2 = 0x0000FFFF; + +template <typename T> +__device__ inline T halfWarpMin(T e) { + for (int i = WARP_SIZE/4; i > 0; i /= 2) { + const T other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e = min(e, other); + } + return e; +} + +template <typename T> +__device__ inline T halfWarpMax(T e) { + for (int i = WARP_SIZE/4; i > 0; i /= 2) { + const T other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e = max(e, other); + } + return e; +} + +template <typename T> +__device__ inline T halfWarpSum(T e) { + for (int i = WARP_SIZE/4; i > 0; i /= 2) { + const T other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e += other; + } + return e; +} + +} + +#endif diff --git a/lib/cudatl/include/cudatl/host_utility.hpp b/lib/cudatl/include/cudatl/host_utility.hpp new file mode 100644 index 0000000000000000000000000000000000000000..39f066c3573f10db41a0d34b0bcbc0a93d93a971 --- /dev/null +++ b/lib/cudatl/include/cudatl/host_utility.hpp @@ -0,0 +1,15 @@ +#ifndef _CUDATL_HOST_UTILITY_HPP_ +#define _CUDATL_HOST_UTILITY_HPP_ + +#include <cuda_runtime.hpp> +#include <string> + +namespace cudatl { + +inline safeCall(cudaError_t e) { + if (e != cudaSuccess) throw new std::exception(std::string("Cuda Error "+std::to_string(int(e)))); +} + +} + +#endif \ No newline at end of file diff --git a/lib/cudatl/include/cudatl/memory.hpp b/lib/cudatl/include/cudatl/memory.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3b249dc043e1ad09365a8d8662b3280dc352710e --- /dev/null +++ b/lib/cudatl/include/cudatl/memory.hpp @@ -0,0 +1,40 @@ +#ifndef _CUDATL_MEMORY_HPP_ +#define _CUDATL_MEMORY_HPP_ + +#include <cudatl/host_utility.hpp> + +namespace cudatl { + +template <typename T> +T *allocate(size_t size) { +#ifdef USE_GPU + T *ptr; + cudatl::safeCall(cudaMalloc(&ptr, size*sizeof(T))); + return ptr; +#else + return new T[size]; +#endif +} + +template <typename T> +T *allocate(size_t width, size_t height, uint &pitch) { + if (width == 1 || height == 1) { + pitch = width; + return allocateMemory<T>((width > height) ? width : height); + } else { + T *ptr; + size_t ptmp; + cudatl::safeCall(cudaMallocPitch(&ptr, &ptmp, width*sizeof(T), height)); + pitch = ptmp/sizeof(T); + return ptr; + } +} + +template <typename T> +void free(T *ptr) { + cudatl::safeCall(cudaFree(ptr)); +} + +} + +#endif diff --git a/lib/cudatl/include/cudatl/warp.hpp b/lib/cudatl/include/cudatl/warp.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1a41181e6ec5ab2478dcfa7abcfedc8079d0b0b9 --- /dev/null +++ b/lib/cudatl/include/cudatl/warp.hpp @@ -0,0 +1,61 @@ +#ifndef _CUDATL_WARP_HPP_ +#define _CUDATL_WARP_HPP_ + +#include <cuda_runtime.h> + +#define __cuda__ __host__ __device__ + +namespace cudatl { + +static constexpr int WARP_SIZE = 32; +static constexpr unsigned int FULL_MASK = 0xFFFFFFFF; + +template <typename T> +__device__ inline T warpMin(T e) { + for (int i = WARP_SIZE/2; i > 0; i /= 2) { + const T other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e = min(e, other); + } + return e; +} + +template <typename T> +__device__ inline T warpMax(T e) { + for (int i = WARP_SIZE/2; i > 0; i /= 2) { + const T other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e = max(e, other); + } + return e; +} + +template <typename T> +__device__ inline T warpSum(T e) { + for (int i = WARP_SIZE/2; i > 0; i /= 2) { + const T other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e += other; + } + return e; +} + +/** + * Find first histogram bucket that cumulatively exceeds a threshold, summing + * all previous buckets. Note: s_Data must be 32 items. + * TODO: This could be more efficient, perhaps with _shfl_XXX + */ +template <typename T> +inline __device__ int warpScan(volatile T *s_Data, int tix, T threshold) { + const int thread = tix%32; + for (uint offset = 1; offset < WARP_SIZE; offset <<= 1) { + __syncwarp(); + const uint t = (thread >= offset) ? s_Data[thread] + s_Data[thread - offset] : s_Data[thread]; + __syncwarp(); + s_Data[thread] = t; + } + + const uint t = __ballot_sync(FULL_MASK, s_Data[thread] > threshold); + return __ffs(t); +} + +} + +#endif