diff --git a/CMakeLists.txt b/CMakeLists.txt index 305168889f4427fbfa26f5a33a7926f5f7c2dd98..3fe11fcd5f1aab55cd4e23da15782d75ae90156c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,12 +29,23 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/") include(Findglog) +MACRO( VERSION_STR_TO_INTS major minor patch version ) + + STRING( REGEX REPLACE "([0-9]+).[0-9]+.[0-9]+" "\\1" ${major} ${version} ) + STRING( REGEX REPLACE "[0-9]+.([0-9]+).[0-9]+" "\\1" ${minor} ${version} ) + STRING( REGEX REPLACE "[0-9]+.[0-9]+.([0-9]+)" "\\1" ${patch} ${version} ) + +ENDMACRO( VERSION_STR_TO_INTS ) + find_package( OpenCV REQUIRED COMPONENTS core imgproc highgui cudaimgproc calib3d imgcodecs videoio aruco cudaarithm cudastereo cudaoptflow face tracking quality) find_package( Threads REQUIRED ) find_package( URIParser REQUIRED ) find_package( MsgPack REQUIRED ) find_package( Eigen3 REQUIRED ) +VERSION_STR_TO_INTS(OPENCV_MAJOR OPENCV_MINOR OPENCV_PATCH ${OpenCV_VERSION}) +math(EXPR OPENCV_NUMBER "(${OPENCV_MAJOR} * 10000) + (${OPENCV_MINOR} * 100) + ${OPENCV_PATCH}") + find_program(CCACHE_PROGRAM ccache) if(CCACHE_PROGRAM) set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}") diff --git a/applications/gui/src/camera.cpp b/applications/gui/src/camera.cpp index 2332831501d36c3578973a341ea924e8a48cd06b..00cb8554f5b4dceabe029aaf9c4ea78f499fe885 100644 --- a/applications/gui/src/camera.cpp +++ b/applications/gui/src/camera.cpp @@ -642,6 +642,21 @@ static void visualizeEnergy( const cv::Mat &depth, cv::Mat &out, cv::cvtColor(out,out, cv::COLOR_BGR2BGRA); } +static void visualizeWeights(const cv::Mat &weights, cv::Mat &out) +{ + weights.convertTo(out, CV_8U, 255.0f / 32767.0f); + //out = 255 - out; + //cv::Mat mask = (depth >= 39.0f); // TODO (mask for invalid pixels) + +#if (OPENCV_VERSION >= 40102) + applyColorMap(out, out, cv::COLORMAP_TURBO); +#else + applyColorMap(out, out, cv::COLORMAP_JET); +#endif + //out.setTo(cv::Scalar(255, 255, 255), mask); + cv::cvtColor(out,out, cv::COLOR_BGR2BGRA); +} + static void drawEdges( const cv::Mat &in, cv::Mat &out, const int ksize = 3, double weight = -1.0, const int threshold = 32, const int threshold_type = cv::THRESH_TOZERO) @@ -789,6 +804,12 @@ const GLTexture &ftl::gui::Camera::captureFrame() { visualizeEnergy(im2_, tmp, screen_->root()->value("float_image_max", 1.0f)); texture2_.update(tmp); break; + + case Channel::Weights: + if (im2_.rows == 0) { break; } + visualizeWeights(im2_, tmp); + texture2_.update(tmp); + break; case Channel::Density: case Channel::Energy: diff --git a/applications/gui/src/src_window.cpp b/applications/gui/src/src_window.cpp index a63861b24705a06866804e3bddd472dcd79ef317..f86a36d58c0b4acd1a10180561760a832c382b7b 100644 --- a/applications/gui/src/src_window.cpp +++ b/applications/gui/src/src_window.cpp @@ -29,6 +29,7 @@ #include <ftl/operators/smoothing.hpp> #include <ftl/operators/disparity.hpp> #include <ftl/operators/detectandtrack.hpp> +#include <ftl/operators/weighting.hpp> #include <nlohmann/json.hpp> @@ -192,12 +193,6 @@ bool SourceWindow::_processFrameset(ftl::rgbd::FrameSet &fs, bool fromstream) { fs.swapTo(*framesets_[fs.id]); } - /*if (fs.frames[0].hasChannel(Channel::Data)) { - int data = 0; - fs.frames[0].get(Channel::Data, data); - LOG(INFO) << "GOT DATA : " << data; - }*/ - const auto *cstream = interceptor_; _createDefaultCameras(*framesets_[fs.id], true); // cstream->available(fs.id).has(Channel::Depth) @@ -209,8 +204,10 @@ bool SourceWindow::_processFrameset(ftl::rgbd::FrameSet &fs, bool fromstream) { if (screen_->activeCamera() == cam.second.camera || (screen_->activeCamera() == nullptr && cycle_ % cameras_.size() == i++)) cam.second.camera->update(framesets_); - if (fromstream) cam.second.camera->update(fs.id, cstream->available(fs.id)); - else if (fs.frames.size() > 0) cam.second.camera->update(fs.id, fs.frames[0].getChannels()); + ftl::codecs::Channels<0> channels; + if (fromstream) channels = cstream->available(fs.id); + if ((*framesets_[fs.id]).frames.size() > 0) channels += (*framesets_[fs.id]).frames[0].getChannels(); + cam.second.camera->update(fs.id, channels); } ++cycle_; @@ -225,8 +222,10 @@ void SourceWindow::_checkFrameSets(int id) { p->append<ftl::operators::ArUco>("aruco")->value("enabled", false); //p->append<ftl::operators::HFSmoother>("hfnoise"); p->append<ftl::operators::CrossSupport>("cross"); - p->append<ftl::operators::DiscontinuityMask>("discontinuity"); + p->append<ftl::operators::PixelWeights>("weights"); + p->append<ftl::operators::CullWeight>("remove_weights")->value("enabled", false); p->append<ftl::operators::CullDiscontinuity>("remove_discontinuity"); + p->append<ftl::operators::DegradeWeight>("degrade"); p->append<ftl::operators::VisCrossSupport>("viscross")->set("enabled", false); pre_pipelines_.push_back(p); diff --git a/components/codecs/include/ftl/codecs/channels.hpp b/components/codecs/include/ftl/codecs/channels.hpp index 22c98d572b88201c4ee3b23b6c7205f5ae10f06d..370908195cf1590fcf5caa61400238dd6f2a42d4 100644 --- a/components/codecs/include/ftl/codecs/channels.hpp +++ b/components/codecs/include/ftl/codecs/channels.hpp @@ -18,7 +18,7 @@ enum struct Channel : int { Deviation = 4, Screen = 4, Normals = 5, // 32FC4 - Points = 6, // 32FC4 (should be deprecated) + Weights = 6, // short Confidence = 7, // 32F Contribution = 7, // 32F EnergyVector = 8, // 32FC4 @@ -90,6 +90,7 @@ class Channels { inline Channels &operator|=(Channel c) { mask |= (c == Channel::None || static_cast<unsigned int>(c) - BASE >= 32) ? 0 : (0x1 << (static_cast<unsigned int>(c) - BASE)); return *this; } inline Channels &operator+=(Channel c) { mask |= (c == Channel::None || static_cast<unsigned int>(c) - BASE >= 32) ? 0 : (0x1 << (static_cast<unsigned int>(c) - BASE)); return *this; } inline Channels &operator-=(Channel c) { mask &= ~((c == Channel::None || static_cast<unsigned int>(c) - BASE >= 32) ? 0 : (0x1 << (static_cast<unsigned int>(c) - BASE))); return *this; } + inline Channels &operator+=(Channels<BASE> cs) { mask |= cs.mask; return *this; } inline Channels &operator+=(unsigned int c) { mask |= (0x1 << (c - BASE)); return *this; } inline Channels &operator-=(unsigned int c) { mask &= ~(0x1 << (c - BASE)); return *this; } inline Channels &operator&=(const Channels<BASE> &c) { mask &= c.mask; return *this; } diff --git a/components/codecs/src/channels.cpp b/components/codecs/src/channels.cpp index b5b52fecf048902212670b0278d800bb2a32a12f..55426cb26513b28289a32c028d3067efa5ac54f5 100644 --- a/components/codecs/src/channels.cpp +++ b/components/codecs/src/channels.cpp @@ -14,7 +14,7 @@ static ChannelInfo info[] = { "DepthRight", CV_32F, // 3 "Deviation", CV_32F, // 4 "Normals", CV_32FC4, // 5 - "Points", CV_32FC4, // 6 + "Weights", CV_16SC1, // 6 "Confidence", CV_32F, // 7 "EnergyVector", CV_32FC4, // 8 "Flow", CV_32F, // 9 diff --git a/components/common/cpp/include/ftl/config.h.in b/components/common/cpp/include/ftl/config.h.in index 3de97a9c00c73afa572bbd9f9aad51cd1f87b784..5b54fea8e957b5fb3f7e9d403e6f45aec41c7c51 100644 --- a/components/common/cpp/include/ftl/config.h.in +++ b/components/common/cpp/include/ftl/config.h.in @@ -29,6 +29,12 @@ #cmakedefine ENABLE_PROFILER +#define OPENCV_MAJOR @OPENCV_MAJOR@ +#define OPENCV_MINOR @OPENCV_MINOR@ +#define OPENCV_PATCH @OPENCV_PATCH@ + +#define OPENCV_VERSION @OPENCV_NUMBER@ + extern const char *FTL_BRANCH; extern const char *FTL_VERSION_LONG; extern const char *FTL_VERSION; diff --git a/components/common/cpp/include/ftl/cuda_common.hpp b/components/common/cpp/include/ftl/cuda_common.hpp index 8210fdb92a4bcd3cc820e92679ec3141aa861551..c74c4faa0868c3e352a41e77369daa2ea24bd78e 100644 --- a/components/common/cpp/include/ftl/cuda_common.hpp +++ b/components/common/cpp/include/ftl/cuda_common.hpp @@ -42,6 +42,7 @@ template <> struct Float<float4> { typedef float4 type; }; template <> struct Float<uchar4> { typedef float4 type; }; template <> struct Float<uint8_t> { typedef float type; }; template <> struct Float<short2> { typedef float2 type; }; +template <> struct Float<short> { typedef float type; }; template <typename T> struct ScaleValue; @@ -50,6 +51,7 @@ template <> struct ScaleValue<uchar4> { static constexpr float value = 255.0f; } template <> struct ScaleValue<uint8_t> { static constexpr float value = 255.0f; }; template <> struct ScaleValue<float> { static constexpr float value = 1.0f; }; template <> struct ScaleValue<float4> { static constexpr float value = 1.0f; }; +template <> struct ScaleValue<short> { static constexpr float value = 32000.0f; }; /** * Represent a CUDA texture object. Instances of this class can be used on both diff --git a/components/operators/CMakeLists.txt b/components/operators/CMakeLists.txt index 6d6e796b5decb3272f549e02721ea87262938980..a5e9ea7790c40538f55d3911e4fd1a67193736b6 100644 --- a/components/operators/CMakeLists.txt +++ b/components/operators/CMakeLists.txt @@ -25,6 +25,8 @@ set(OPERSRC src/depth.cpp src/detectandtrack.cpp src/aruco.cpp + src/weighting.cpp + src/weighting.cu ) if (LIBSGM_FOUND) diff --git a/components/operators/include/ftl/operators/mask_cuda.hpp b/components/operators/include/ftl/operators/mask_cuda.hpp index 818724b16da48f44d0177749b41fee0a5634e68f..53747d61fd82c96ec1eafdb4f51d3ad322db1222 100644 --- a/components/operators/include/ftl/operators/mask_cuda.hpp +++ b/components/operators/include/ftl/operators/mask_cuda.hpp @@ -13,14 +13,16 @@ namespace cuda { */ class Mask { public: + typedef uint8_t type; + __device__ inline Mask() : v_(0u) {} - __device__ explicit inline Mask(uint8_t v) : v_(v) {} + __device__ explicit inline Mask(type v) : v_(v) {} #ifdef __CUDACC__ - __device__ inline Mask(const ftl::cuda::TextureObject<uint8_t> &m, int x, int y) : v_(m.tex2D(x,y)) {} + __device__ inline Mask(const ftl::cuda::TextureObject<type> &m, int x, int y) : v_(m.tex2D(x,y)) {} #endif __device__ inline operator int() const { return v_; } - __device__ inline bool is(uint8_t m) const { return v_ & m; } + __device__ inline bool is(type m) const { return v_ & m; } __device__ inline bool isFilled() const { return v_ & kMask_Filled; } __device__ inline bool isDiscontinuity() const { return v_ & kMask_Discontinuity; } @@ -34,18 +36,18 @@ class Mask { __device__ inline void isBad(bool v) { v_ = (v) ? v_ | kMask_Bad : v_ & (~kMask_Bad); } __device__ inline void isNoise(bool v) { v_ = (v) ? v_ | kMask_Noise : v_ & (~kMask_Noise); } - static constexpr uint8_t kMask_Filled = 0x01; - static constexpr uint8_t kMask_Discontinuity = 0x02; - static constexpr uint8_t kMask_Correspondence = 0x04; - static constexpr uint8_t kMask_Bad = 0x08; - static constexpr uint8_t kMask_Noise = 0x10; + static constexpr type kMask_Filled = 0x01; + static constexpr type kMask_Discontinuity = 0x02; + static constexpr type kMask_Correspondence = 0x04; + static constexpr type kMask_Bad = 0x08; + static constexpr type kMask_Noise = 0x10; private: - uint8_t v_; + type v_; }; void discontinuity( - ftl::cuda::TextureObject<uint8_t> &mask, + ftl::cuda::TextureObject<ftl::cuda::Mask::type> &mask, ftl::cuda::TextureObject<uchar4> &support, ftl::cuda::TextureObject<float> &depth, const cv::Size size, @@ -55,7 +57,7 @@ void discontinuity( cudaStream_t stream); void discontinuity( - ftl::cuda::TextureObject<uint8_t> &mask, + ftl::cuda::TextureObject<ftl::cuda::Mask::type> &mask, ftl::cuda::TextureObject<uchar4> &support, ftl::cuda::TextureObject<float> &depth, const cv::Size size, @@ -68,9 +70,9 @@ void discontinuity( cudaStream_t stream); void cull_mask( - ftl::cuda::TextureObject<uint8_t> &mask, + ftl::cuda::TextureObject<ftl::cuda::Mask::type> &mask, ftl::cuda::TextureObject<float> &depth, - uint8_t id, + ftl::cuda::Mask::type id, unsigned int radius, cudaStream_t stream); diff --git a/components/operators/include/ftl/operators/smoothing.hpp b/components/operators/include/ftl/operators/smoothing.hpp index 2714af64fa498e6c22bdf4eb7cd242c0d5773e2d..bb99865954f4553fab1b7e6c862bd4828dcd5b35 100644 --- a/components/operators/include/ftl/operators/smoothing.hpp +++ b/components/operators/include/ftl/operators/smoothing.hpp @@ -61,6 +61,7 @@ class SimpleMLS : public ftl::operators::Operator { bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) override; private: + ftl::rgbd::Frame temp_; }; /** @@ -77,6 +78,7 @@ class ColourMLS : public ftl::operators::Operator { bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) override; private: + ftl::rgbd::Frame temp_; }; /** @@ -122,6 +124,8 @@ class AggreMLS : public ftl::operators::Operator { ftl::cuda::TextureObject<float4> centroid_horiz_; ftl::cuda::TextureObject<float4> centroid_vert_; ftl::cuda::TextureObject<float4> normals_horiz_; + + ftl::rgbd::Frame temp_; }; /** @@ -141,6 +145,7 @@ class AdaptiveMLS : public ftl::operators::Operator { bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) override; private: + ftl::rgbd::Frame temp_; }; } diff --git a/components/operators/include/ftl/operators/weighting.hpp b/components/operators/include/ftl/operators/weighting.hpp new file mode 100644 index 0000000000000000000000000000000000000000..545b4c3b8380311aaccc29190a83626da28b8aa6 --- /dev/null +++ b/components/operators/include/ftl/operators/weighting.hpp @@ -0,0 +1,59 @@ +#ifndef _FTL_OPERATORS_WEIGHTING_HPP_ +#define _FTL_OPERATORS_WEIGHTING_HPP_ + +#include <ftl/operators/operator.hpp> +#include <ftl/cuda_common.hpp> + +namespace ftl { +namespace operators { + +/** + * Generate weight values for each pixel in the depth map. At the same time + * output a mask for the pixel and optionally output normals since they are + * implicitely calculated anyway. + * + * Weights are based upon: + * 1) Depth resolution, or distance from the camera + * 2) Normal direction relative to camera direction + * 3) Noise or discontinuity assessment. + * + * These weights are then used, for example, by the rendering process when + * generating depth or colour values from multiple sources. + */ +class PixelWeights : public ftl::operators::Operator { + public: + explicit PixelWeights(ftl::Configurable*); + ~PixelWeights(); + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) override; + +}; + +class CullWeight : public ftl::operators::Operator { + public: + explicit CullWeight(ftl::Configurable*); + ~CullWeight(); + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) override; + +}; + +class DegradeWeight : public ftl::operators::Operator { + public: + explicit DegradeWeight(ftl::Configurable*); + ~DegradeWeight(); + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) override; + +}; + +} +} + +#endif // _FTL_OPERATORS_WEIGHTING_HPP_ diff --git a/components/operators/src/mask.cu b/components/operators/src/mask.cu index 9f25b50ffc73c9e7881a1ce623a7e33d67b7d7f2..4975bcd069f204d3ec1015d8af871e1ad861569f 100644 --- a/components/operators/src/mask.cu +++ b/components/operators/src/mask.cu @@ -4,57 +4,6 @@ using ftl::cuda::Mask; -/* OLD VERSION */ -__global__ void discontinuity_kernel(ftl::cuda::TextureObject<uint8_t> mask_out, - ftl::cuda::TextureObject<uchar4> support, - ftl::cuda::TextureObject<float> depth, - const cv::Size size, const double minDepth, const double maxDepth, - float depthCoef, int radius) { - - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < size.width && y < size.height) { - Mask mask(0); - - const float d = depth.tex2D((int)x, (int)y); - // Multiples of pixel size at given depth - //const float threshold = (depthCoef / ((depthCoef / d) - (radius+disconDisparities-1))) - d; - const float threshold = depthCoef * d; // Where depthCoef = 1 / focal * N, N = number of pixel distances equal to a discon. - - if (d > minDepth && d < maxDepth) { - /* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. */ - - // If colour cross support region terminates within the requested - // radius, and the absolute depth difference on the other side is - // greater than threshold, then is is a discontinuity. - // Repeat for left, right, up and down. - const uchar4 sup = support.tex2D((int)x, (int)y); - if (sup.x <= radius) { - float dS = depth.tex2D((int)x - sup.x - radius, (int)y); - if (fabs(dS - d) > threshold) mask.isDiscontinuity(true); - } - if (sup.y <= radius) { - float dS = depth.tex2D((int)x + sup.y + radius, (int)y); - if (fabs(dS - d) > threshold) mask.isDiscontinuity(true); - } - if (sup.z <= radius) { - float dS = depth.tex2D((int)x, (int)y - sup.z - radius); - if (fabs(dS - d) > threshold) mask.isDiscontinuity(true); - } - if (sup.w <= radius) { - float dS = depth.tex2D((int)x, (int)y + sup.w + radius); - if (fabs(dS - d) > threshold) mask.isDiscontinuity(true); - } - - // FIXME: The above results in a cross formation, need to test all 8 directions - } - - mask_out(x,y) = (int)mask; - } -} - -/* New / Current version */ __global__ void discontinuity_kernel(ftl::cuda::TextureObject<uint8_t> mask_out, ftl::cuda::TextureObject<uchar4> support, ftl::cuda::TextureObject<float> depth, @@ -101,22 +50,6 @@ __global__ void discontinuity_kernel(ftl::cuda::TextureObject<uint8_t> mask_out, } } -void ftl::cuda::discontinuity( ftl::cuda::TextureObject<uint8_t> &mask_out, ftl::cuda::TextureObject<uchar4> &support, - ftl::cuda::TextureObject<float> &depth, - const cv::Size size, const double minDepth, const double maxDepth, - int discon, float depthCoef, cudaStream_t stream) { - - const dim3 gridSize((size.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (size.height + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - discontinuity_kernel<<<gridSize, blockSize, 0, stream>>>(mask_out, support, depth, size, minDepth, maxDepth, depthCoef, discon); - cudaSafeCall( cudaGetLastError() ); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); -#endif -} - void ftl::cuda::discontinuity( ftl::cuda::TextureObject<uint8_t> &mask_out, ftl::cuda::TextureObject<uchar4> &support, ftl::cuda::TextureObject<float> &depth, const cv::Size size, const double minDepth, const double maxDepth, diff --git a/components/operators/src/smoothing.cpp b/components/operators/src/smoothing.cpp index 7f9cb50d3d682d90f899e2069a77ed1efb62af2c..c558637085972bdb12f84697131e9617f0e20a9c 100644 --- a/components/operators/src/smoothing.cpp +++ b/components/operators/src/smoothing.cpp @@ -160,17 +160,18 @@ bool SimpleMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t for (int i=0; i<iters; ++i) { ftl::cuda::mls_smooth( in.createTexture<float4>(Channel::Normals), - in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + temp_.createTexture<float4>(Channel::Normals, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), in.createTexture<float>(Channel::Depth), - in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + temp_.createTexture<float>(Channel::Depth, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), thresh, radius, in.getLeftCamera(), stream ); - in.swapChannels(Channel::Depth, Channel::Depth2); - in.swapChannels(Channel::Normals, Channel::Points); + //in.swapChannels(Channel::Depth, Channel::Depth2); + //in.swapChannels(Channel::Normals, Channel::Points); + temp_.swapChannels(Channel::Normals + Channel::Depth, in); } return true; @@ -204,9 +205,9 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t if (!crosssup) { ftl::cuda::colour_mls_smooth( in.createTexture<float4>(Channel::Normals), - in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + temp_.createTexture<float4>(Channel::Normals, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), in.createTexture<float>(Channel::Depth), - in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + temp_.createTexture<float>(Channel::Depth, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), in.createTexture<uchar4>(Channel::Colour), thresh, col_smooth, @@ -218,9 +219,9 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t ftl::cuda::colour_mls_smooth_csr( in.createTexture<uchar4>(Channel::Support1), in.createTexture<float4>(Channel::Normals), - in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + temp_.createTexture<float4>(Channel::Normals, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), in.createTexture<float>(Channel::Depth), - in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + temp_.createTexture<float>(Channel::Depth, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), in.createTexture<uchar4>(Channel::Colour), thresh, col_smooth, @@ -230,8 +231,9 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t ); } - in.swapChannels(Channel::Depth, Channel::Depth2); - in.swapChannels(Channel::Normals, Channel::Points); + //in.swapChannels(Channel::Depth, Channel::Depth2); + //in.swapChannels(Channel::Normals, Channel::Points); + temp_.swapChannels(Channel::Normals + Channel::Depth, in); } return true; @@ -307,21 +309,22 @@ bool AggreMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t s in.createTexture<float4>(Channel::Normals), centroid_vert_, in.createTexture<float>(Channel::Depth), - in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(size)), + temp_.createTexture<float>(Channel::Depth, ftl::rgbd::Format<float>(size)), in.getLeftCamera(), stream ); - in.swapChannels(Channel::Depth, Channel::Depth2); + //in.swapChannels(Channel::Depth, Channel::Depth2); //in.swapChannels(Channel::Normals, Channel::Points); + temp_.swapChannels(ftl::codecs::Channels<0>(Channel::Depth), in); } else { ftl::cuda::colour_mls_smooth_csr( in.createTexture<uchar4>(Channel::Support1), in.createTexture<float4>(Channel::Normals), - in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + temp_.createTexture<float4>(Channel::Normals, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), in.createTexture<float>(Channel::Depth), - in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + temp_.createTexture<float>(Channel::Depth, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), in.createTexture<uchar4>(Channel::Colour), thresh, col_smooth, @@ -330,8 +333,7 @@ bool AggreMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t s stream ); - in.swapChannels(Channel::Depth, Channel::Depth2); - in.swapChannels(Channel::Normals, Channel::Points); + temp_.swapChannels(Channel::Normals + Channel::Depth, in); } } @@ -362,17 +364,17 @@ bool AdaptiveMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_ for (int i=0; i<iters; ++i) { ftl::cuda::adaptive_mls_smooth( in.createTexture<float4>(Channel::Normals), - in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + temp_.createTexture<float4>(Channel::Normals, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), in.createTexture<float>(Channel::Depth), - in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + temp_.createTexture<float>(Channel::Depth, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), in.createTexture<float>(Channel::Smoothing), radius, in.getLeftCamera(), stream ); - in.swapChannels(Channel::Depth, Channel::Depth2); - in.swapChannels(Channel::Normals, Channel::Points); + temp_.swapChannels(Channel::Normals + Channel::Depth, in); + } return true; diff --git a/components/operators/src/weighting.cpp b/components/operators/src/weighting.cpp new file mode 100644 index 0000000000000000000000000000000000000000..520f136caa9d21e6821f8596f1b7e6d57ce3c9f0 --- /dev/null +++ b/components/operators/src/weighting.cpp @@ -0,0 +1,101 @@ +#include <ftl/operators/weighting.hpp> +#include "weighting_cuda.hpp" + +#define LOGURU_REPLACE_GLOG 1 +#include <loguru.hpp> + +using ftl::operators::PixelWeights; +using ftl::operators::CullWeight; +using ftl::operators::DegradeWeight; +using ftl::codecs::Channel; + +PixelWeights::PixelWeights(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +PixelWeights::~PixelWeights() { + +} + +bool PixelWeights::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) { + if (in.hasChannel(Channel::Mask)) return true; + + ftl::cuda::PixelWeightingParameters params; + //int radius = config()->value("radius", 2); + float disconPixels = config()->value("discon_pixels", 15.0f); + params.depthCoef = (1.0f / in.getLeft().fx) * disconPixels; + params.disconThresh = config()->value("discon_thresh", 0.8f); + params.noiseThresh = config()->value("noise_thresh", 0.8f); + params.areaMax = config()->value("area_max", 126.0f); // Cross support radius squared + 1 + params.depth = config()->value("use_depth", true); + params.colour = config()->value("use_colour", false); + params.noise = config()->value("use_noise", true); + params.normals = config()->value("use_normals", true); + + if (!in.hasChannel(Channel::Depth) || !in.hasChannel(Channel::Support1)) return false; + + ftl::cuda::pixel_weighting( + out.createTexture<short>(Channel::Weights, ftl::rgbd::Format<short>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + out.createTexture<uint8_t>(Channel::Mask, ftl::rgbd::Format<uint8_t>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + in.createTexture<uchar4>(Channel::Support1), + in.createTexture<float>(Channel::Depth), + in.getLeftCamera(), + in.get<cv::cuda::GpuMat>(Channel::Depth).size(), + params, stream + ); + + return true; +} + +CullWeight::CullWeight(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +CullWeight::~CullWeight() { + +} + +bool CullWeight::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) { + if (!in.hasChannel(Channel::Depth) || !in.hasChannel(Channel::Weights)) return false; + + float weight = config()->value("weight", 0.1f); + + out.clearPackets(Channel::Depth); // Force reset + ftl::cuda::cull_weight( + in.createTexture<short>(Channel::Weights), + out.createTexture<float>(Channel::Depth), + weight, + stream + ); + + return true; +} + + + +DegradeWeight::DegradeWeight(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +DegradeWeight::~DegradeWeight() { + +} + +bool DegradeWeight::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) { + if (!in.hasChannel(Channel::Mask) || !in.hasChannel(Channel::Weights)) return false; + + uint8_t maskid = config()->value("mask_id", 2); + int radius = config()->value("radius", 2); + bool invert = config()->value("invert", false); + + ftl::cuda::degrade_mask( + in.createTexture<uint8_t>(Channel::Mask), + out.createTexture<short>(Channel::Weights), + maskid, + radius, + invert, + stream + ); + + return true; +} diff --git a/components/operators/src/weighting.cu b/components/operators/src/weighting.cu new file mode 100644 index 0000000000000000000000000000000000000000..286207f391a6f90259b813150e4a26286f245ec8 --- /dev/null +++ b/components/operators/src/weighting.cu @@ -0,0 +1,209 @@ +#include "weighting_cuda.hpp" +#include <ftl/cuda/weighting.hpp> + +#define T_PER_BLOCK 8 + +using ftl::cuda::Mask; + +__device__ inline bool isValid(const ftl::rgbd::Camera &camera, const float3 &d) { + return d.z >= camera.minDepth && d.z <= camera.maxDepth; +} + +__device__ inline float square(float v) { return v*v; } + +__global__ void pixel_weight_kernel( + ftl::cuda::TextureObject<short> weight_out, + ftl::cuda::TextureObject<uint8_t> mask_out, + ftl::cuda::TextureObject<uchar4> support, + ftl::cuda::TextureObject<float> depth, + ftl::rgbd::Camera camera, + const cv::Size size, ftl::cuda::PixelWeightingParameters params) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < size.width && y < size.height) { + Mask mask(0); + + const float d = depth.tex2D((int)x, (int)y); + // Multiples of pixel size at given depth + //const float threshold = (depthCoef / ((depthCoef / d) - (radius+disconDisparities-1))) - d; + const float threshold = params.depthCoef * d; // Where depthCoef = 1 / focal * N, N = number of pixel distances equal to a discon. + + float weight = 0.0f; + + if (d > camera.minDepth && d < camera.maxDepth) { + if (params.depth) { + weight = 1.0f - ((d - camera.minDepth) / (camera.maxDepth - camera.minDepth)); + weight *= weight; + } else { + weight = 1.0f; + } + /* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. + * This paper just says to remove values around discontinuities. */ + + const float depths[4] = { + depth.tex2D((int)x+0, (int)y+1), + depth.tex2D((int)x+1, (int)y+0), + depth.tex2D((int)x+0, (int)y-1), + depth.tex2D((int)x-1, (int)y+0) + }; + + if (params.normals) { + const float3 CC = camera.screenToCam(x+0, y+0, d); + const float3 PC = camera.screenToCam(x+0, y+1, depths[0]); + const float3 CP = camera.screenToCam(x+1, y+0, depths[1]); + const float3 MC = camera.screenToCam(x+0, y-1, depths[2]); + const float3 CM = camera.screenToCam(x-1, y+0, depths[3]); + + //if(CC.z < && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) { + if (isValid(camera,CC) && isValid(camera,PC) && isValid(camera,CP) && isValid(camera,MC) && isValid(camera,CM)) { + float3 n = cross(PC-MC, CP-CM); + const float l = length(n); + + if(l > 0.0f) { + n = n / -l; + float3 ray = camera.screenToCam(x, y, d); + ray = ray / length(ray); + weight *= min(1.0f, 1.4*dot(ray,n)); + } + } + } + + // Find max change in depth gradient in each direction + const float g1 = fabsf((depths[3] - d) - (d - depths[1])); + const float g2 = fabsf((depths[2] - d) - (d - depths[0])); + const float g3 = fabsf((depth.tex2D(x-1, y-1) - d) - (d - depth.tex2D(x+1,y+1))); + const float g4 = fabsf((depth.tex2D(x+1, y-1) - d) - (d - depth.tex2D(x-1,y+1))); + const float g = max(g1,max(g2,(max(g3,g4)))); + + // Calculate support window area + //const uchar4 sup = support.tex2D((int)x, (int)y); + const uchar4 sup = getScaledTex2D(x, y, support, depth); + const float supx = min(sup.x,sup.y); + const float supy = min(sup.z,sup.w); + const float area = supx * supx * supy; + + float grad_weight = min(1.0f, g / threshold); + float area_weight = min(1.0f, area / params.areaMax); + + if (grad_weight * (1.0f - area_weight) > params.disconThresh) mask.isDiscontinuity(true); + if (grad_weight * (area_weight) > params.noiseThresh) mask.isNoise(true); + + // High change of gradient and large area means noise + if (params.noise) weight *= (1.0f - square(grad_weight * area_weight)); + + // Really high area means planar so unlikely to be reliable. + //if (params.colour) weight *= (min(1.2f, (1.8f - area_weight))); + if (params.colour) weight *= (min(1.0f, (1.0f - area_weight))); + } + + mask_out(x,y) = (int)mask; + weight_out(x,y) = short(min(1.0f, weight) * 32767.0f); + } +} + +void ftl::cuda::pixel_weighting( + ftl::cuda::TextureObject<short> &weight_out, + ftl::cuda::TextureObject<ftl::cuda::Mask::type> &mask_out, + ftl::cuda::TextureObject<uchar4> &support, + ftl::cuda::TextureObject<float> &depth, + const ftl::rgbd::Camera &camera, + const cv::Size size, const ftl::cuda::PixelWeightingParameters ¶ms, + cudaStream_t stream) { + + const dim3 gridSize((size.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (size.height + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + pixel_weight_kernel<<<gridSize, blockSize, 0, stream>>>(weight_out, mask_out, support, depth, camera, size, params); + cudaSafeCall( cudaGetLastError() ); + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + #endif +} + +// ============================================================================= + +__global__ void cull_weight_kernel(ftl::cuda::TextureObject<short> weights, ftl::cuda::TextureObject<float> depth, float thresh) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < depth.width() && y < depth.height()) { + if (float(weights.tex2D(x,y)) / 32767.0f < thresh) { + depth(x,y) = 0.0f; + } + } +} + +void ftl::cuda::cull_weight(ftl::cuda::TextureObject<short> &weights, ftl::cuda::TextureObject<float> &depth, float thresh, cudaStream_t stream) { + const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + cull_weight_kernel<<<gridSize, blockSize, 0, stream>>>(weights, depth, thresh); + cudaSafeCall( cudaGetLastError() ); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif +} + +// ============================================================================= + +template <int RADIUS, bool INVERT> +__global__ void degrade_mask_kernel(ftl::cuda::TextureObject<uint8_t> mask, + ftl::cuda::TextureObject<short> weights, uint8_t id) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < weights.width()-RADIUS && y < weights.height()-RADIUS) { + float distance = 100.0f; + + #pragma unroll + for (int v=-RADIUS; v<=RADIUS; ++v) { + #pragma unroll + for (int u=-RADIUS; u<=RADIUS; ++u) { + Mask m(mask.tex2D((int)x+u,(int)y+v)); + if (m.is(id)) distance = min(distance, sqrt(float(v*v + u*u))); + } + } + + if (distance < 99.0f) { + const float w = float(weights(x,y)); + if (INVERT) weights(x,y) = short(w + (32767.0f-w) * (ftl::cuda::weighting(distance, float(RADIUS+1)))); + else weights(x,y) = short(w * (1.0f - ftl::cuda::weighting(distance, float(RADIUS+1)))); + } + } +} + +void ftl::cuda::degrade_mask(ftl::cuda::TextureObject<uint8_t> &mask, ftl::cuda::TextureObject<short> &weights, uint8_t id, unsigned int radius, bool invert, cudaStream_t stream) { + const dim3 gridSize((weights.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (weights.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + if (invert) { + switch (radius) { + case 0 : degrade_mask_kernel<0,true><<<gridSize, blockSize, 0, stream>>>(mask, weights, id); break; + case 1 : degrade_mask_kernel<1,true><<<gridSize, blockSize, 0, stream>>>(mask, weights, id); break; + case 2 : degrade_mask_kernel<2,true><<<gridSize, blockSize, 0, stream>>>(mask, weights, id); break; + case 3 : degrade_mask_kernel<3,true><<<gridSize, blockSize, 0, stream>>>(mask, weights, id); break; + case 4 : degrade_mask_kernel<4,true><<<gridSize, blockSize, 0, stream>>>(mask, weights, id); break; + case 5 : degrade_mask_kernel<5,true><<<gridSize, blockSize, 0, stream>>>(mask, weights, id); break; + default: break; + } + } else { + switch (radius) { + case 0 : degrade_mask_kernel<0,false><<<gridSize, blockSize, 0, stream>>>(mask, weights, id); break; + case 1 : degrade_mask_kernel<1,false><<<gridSize, blockSize, 0, stream>>>(mask, weights, id); break; + case 2 : degrade_mask_kernel<2,false><<<gridSize, blockSize, 0, stream>>>(mask, weights, id); break; + case 3 : degrade_mask_kernel<3,false><<<gridSize, blockSize, 0, stream>>>(mask, weights, id); break; + case 4 : degrade_mask_kernel<4,false><<<gridSize, blockSize, 0, stream>>>(mask, weights, id); break; + case 5 : degrade_mask_kernel<5,false><<<gridSize, blockSize, 0, stream>>>(mask, weights, id); break; + default: break; + } + } + cudaSafeCall( cudaGetLastError() ); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif +} diff --git a/components/operators/src/weighting_cuda.hpp b/components/operators/src/weighting_cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7cd544012db66c59c578c808e3abc3351d9c54b1 --- /dev/null +++ b/components/operators/src/weighting_cuda.hpp @@ -0,0 +1,45 @@ +#ifndef _FTL_OPERATORS_WEIGHTING_CUDA_HPP_ +#define _FTL_OPERATORS_WEIGHTING_CUDA_HPP_ + +#include <ftl/operators/mask_cuda.hpp> + +namespace ftl { +namespace cuda { + +struct PixelWeightingParameters { + float depthCoef; + float disconThresh; + float noiseThresh; + float areaMax; + bool normals; + bool depth; + bool colour; + bool noise; +}; + +void pixel_weighting( + ftl::cuda::TextureObject<short> &weight_out, + ftl::cuda::TextureObject<ftl::cuda::Mask::type> &mask_out, + ftl::cuda::TextureObject<uchar4> &support, + ftl::cuda::TextureObject<float> &depth, + const ftl::rgbd::Camera &camera, + const cv::Size size, const ftl::cuda::PixelWeightingParameters ¶ms, + cudaStream_t stream); + +void cull_weight( + ftl::cuda::TextureObject<short> &weights, + ftl::cuda::TextureObject<float> &depth, + float thresh, cudaStream_t stream); + +void degrade_mask( + ftl::cuda::TextureObject<uint8_t> &mask, + ftl::cuda::TextureObject<short> &weights, + uint8_t id, + unsigned int radius, + bool invert, + cudaStream_t stream); + +} +} + +#endif diff --git a/components/renderers/cpp/include/ftl/cuda/weighting.hpp b/components/renderers/cpp/include/ftl/cuda/weighting.hpp index b0c3f58b9c5a959c58fb9b0cba43bf27823aac4f..61db736d316494b9c20262eff080aacec710ec26 100644 --- a/components/renderers/cpp/include/ftl/cuda/weighting.hpp +++ b/components/renderers/cpp/include/ftl/cuda/weighting.hpp @@ -58,6 +58,19 @@ __device__ inline float colourDistance(uchar4 a, uchar4 b) { return ch*ch*ch*ch; } +/* + * Colour weighting as suggested in: + * C. Kuster et al. Spatio-Temporal Geometry Fusion for Multiple Hybrid Cameras using Moving Least Squares Surfaces. 2014. + * c = colour distance + */ + __device__ inline float colourWeighting(float a, float b, float h) { + const float c = fabsf(a-b); + if (c >= h) return 0.0f; + float ch = c / h; + ch = 1.0f - ch*ch; + return ch*ch*ch*ch; +} + } } diff --git a/components/renderers/cpp/include/ftl/render/render_params.hpp b/components/renderers/cpp/include/ftl/render/render_params.hpp index 8f9ae61f53ddadbb4e29cb451a31e863aaa1a60a..c109d7c1e035f12c961f025787e377746b28e8e1 100644 --- a/components/renderers/cpp/include/ftl/render/render_params.hpp +++ b/components/renderers/cpp/include/ftl/render/render_params.hpp @@ -10,6 +10,7 @@ namespace render { static const uint kShowDisconMask = 0x00000001; static const uint kNormalWeightColours = 0x00000002; +static const uint kUseWeightsChannel = 0x00000004; struct ViewPort { short x; @@ -48,6 +49,14 @@ enum class ViewPortMode : uint8_t { Warping = 2 // Stretch viewport region over entire frame }; +enum class AccumulationFunction : uint8_t { + Simple = 0, + BestWeight = 1, + CloseWeights = 2, + ColourDiscard = 3, + ColourDiscardSmooth = 4 +}; + struct Parameters { uint m_flags; float disconDisparities; @@ -57,6 +66,8 @@ struct Parameters { ftl::rgbd::Camera camera; // Virtual camera intrinsics ftl::render::ViewPort viewport; ftl::render::ViewPortMode viewPortMode; + + ftl::render::AccumulationFunction accumulationMode; }; } diff --git a/components/renderers/cpp/src/CUDARender.cpp b/components/renderers/cpp/src/CUDARender.cpp index d407db476a0c4fc04b015043ee5de794e5809206..38e244d34c2f1c7428c03d2aaae637dfbc603e3e 100644 --- a/components/renderers/cpp/src/CUDARender.cpp +++ b/components/renderers/cpp/src/CUDARender.cpp @@ -192,15 +192,16 @@ void CUDARender::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Chann auto transform = MatrixConversion::toCUDA(f.getPose().cast<float>().inverse() * t.cast<float>().inverse()) * poseInverse_; auto transformR = MatrixConversion::toCUDA(f.getPose().cast<float>().inverse()).getFloat3x3(); - if (mesh_) { + //if (mesh_) { if (f.hasChannel(Channel::Depth)) { ftl::cuda::reproject( f.createTexture<T>(in), f.createTexture<float>(Channel::Depth), output.getTexture<float>(Channel::Depth), - output.getTexture<float4>(Channel::Normals), + f.createTexture<short>(Channel::Weights), + (output.hasChannel(Channel::Normals)) ? &output.createTexture<float4>(Channel::Normals) : nullptr, temp_.createTexture<typename AccumSelector<T>::type>(AccumSelector<T>::channel), - temp_.getTexture<float>(Channel::Contribution), + temp_.getTexture<int>(Channel::Contribution), params_, f.getLeftCamera(), transform, transformR, stream @@ -211,13 +212,13 @@ void CUDARender::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Chann f.createTexture<T>(in), output.getTexture<float>(Channel::Depth), temp_.createTexture<typename AccumSelector<T>::type>(AccumSelector<T>::channel), - temp_.getTexture<float>(Channel::Contribution), + temp_.getTexture<int>(Channel::Contribution), params_, f.getLeftCamera(), transform, stream ); } - } else { + /*} else { // Can't use normals with point cloud version if (f.hasChannel(Channel::Depth)) { ftl::cuda::reproject( @@ -225,7 +226,7 @@ void CUDARender::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Chann f.createTexture<float>(Channel::Depth), output.getTexture<float>(Channel::Depth), temp_.createTexture<typename AccumSelector<T>::type>(AccumSelector<T>::channel), - temp_.getTexture<float>(Channel::Contribution), + temp_.getTexture<int>(Channel::Contribution), params_, f.getLeftCamera(), transform, stream @@ -235,13 +236,13 @@ void CUDARender::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Chann f.createTexture<T>(in), output.getTexture<float>(Channel::Depth), temp_.createTexture<typename AccumSelector<T>::type>(AccumSelector<T>::channel), - temp_.getTexture<float>(Channel::Contribution), + temp_.getTexture<int>(Channel::Contribution), params_, f.getLeftCamera(), transform, stream ); } - } + }*/ } } @@ -300,11 +301,13 @@ void CUDARender::_adjustDepthThresholds(const ftl::rgbd::Camera &fcam) { void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStream_t stream) { cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); - bool do_blend = value("mesh_blend", true); + bool do_blend = value("mesh_blend", false); float blend_alpha = value("blend_alpha", 0.02f); if (do_blend) { temp_.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(0x7FFFFFFF), cvstream); - temp_.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0x7FFFFFFF), cvstream); + temp_.get<GpuMat>(Channel::Weights).setTo(cv::Scalar(0.0f), cvstream); + // FIXME: Doesnt work with multiple compositing + out.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(1000.0f), cvstream); } else { temp_.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0x7FFFFFFF), cvstream); } @@ -354,12 +357,20 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre params_, stream ); + // TODO: Reproject here + // And merge based upon weight adjusted distances + if (do_blend) { // Blend this sources mesh with previous meshes ftl::cuda::mesh_blender( temp_.getTexture<int>(Channel::Depth), - temp_.createTexture<int>(Channel::Depth2), - params_.camera, + //temp_.createTexture<int>(Channel::Depth2), + out.createTexture<float>(Channel::Depth), + f.createTexture<short>(Channel::Weights), + temp_.createTexture<float>(Channel::Weights), + params_, + f.getLeftCamera(), + transform.getInverse(), blend_alpha, stream ); @@ -368,7 +379,17 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre // Convert from int depth to float depth //temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 100000.0f, cvstream); - ftl::cuda::merge_convert_depth(temp_.getTexture<int>(Channel::Depth2), out.createTexture<float>(Channel::Depth), 1.0f / 100000.0f, stream_); + + if (do_blend) { + ftl::cuda::dibr_normalise( + out.getTexture<float>(Channel::Depth), + out.getTexture<float>(Channel::Depth), + temp_.getTexture<float>(Channel::Weights), + stream_ + ); + } else { + ftl::cuda::merge_convert_depth(temp_.getTexture<int>(Channel::Depth2), out.createTexture<float>(Channel::Depth), 1.0f / 100000.0f, stream_); + } //filters_->filter(out, src, stream); @@ -393,7 +414,7 @@ void CUDARender::_renderChannel( temp_.createTexture<float4>(Channel::Colour); - temp_.createTexture<float>(Channel::Contribution); + temp_.createTexture<int>(Channel::Contribution); // FIXME: Using colour 2 in this way seems broken since it is already used if (is_4chan) { @@ -471,10 +492,11 @@ void CUDARender::_allocateChannels(ftl::rgbd::Frame &out) { } temp_.create<GpuMat>(Channel::Colour, Format<float4>(camera.width, camera.height)); - temp_.create<GpuMat>(Channel::Contribution, Format<float>(camera.width, camera.height)); + temp_.create<GpuMat>(Channel::Contribution, Format<int>(camera.width, camera.height)); temp_.create<GpuMat>(Channel::Depth, Format<int>(camera.width, camera.height)); temp_.create<GpuMat>(Channel::Depth2, Format<int>(camera.width, camera.height)); temp_.create<GpuMat>(Channel::Normals, Format<float4>(camera.width, camera.height)); + temp_.create<GpuMat>(Channel::Weights, Format<float>(camera.width, camera.height)); temp_.createTexture<int>(Channel::Depth); } @@ -486,8 +508,10 @@ void CUDARender::_updateParameters(ftl::rgbd::Frame &out) { //params_.depthThreshold = value("depth_threshold", 0.004f); //params_.depthThreshScale = value("depth_thresh_scale", 0.166f); // baseline*focal / disp.... params_.disconDisparities = value("discon_disparities", 2.0f); + params_.accumulationMode = static_cast<ftl::render::AccumulationFunction>(value("accumulation_func", 0)); params_.m_flags = 0; if (value("normal_weight_colours", true)) params_.m_flags |= ftl::render::kNormalWeightColours; + if (value("channel_weights", false)) params_.m_flags |= ftl::render::kUseWeightsChannel; params_.camera = camera; poseInverse_ = MatrixConversion::toCUDA(out.getPose().cast<float>()); @@ -541,11 +565,18 @@ void CUDARender::_postprocessColours(ftl::rgbd::Frame &out) { ); } - if (value("show_bad_colour", false)) { + if (value("show_colour_weights", false)) { + ftl::cuda::show_colour_weights( + out.getTexture<uchar4>(Channel::Colour), + temp_.getTexture<int>(Channel::Contribution), + make_uchar4(0,0,255,0), + stream_ + ); + } else if (value("show_bad_colour", false)) { ftl::cuda::show_missing_colour( out.getTexture<float>(Channel::Depth), out.getTexture<uchar4>(Channel::Colour), - temp_.getTexture<float>(Channel::Contribution), + temp_.getTexture<int>(Channel::Contribution), make_uchar4(255,0,0,0), params_.camera, stream_ @@ -554,7 +585,7 @@ void CUDARender::_postprocessColours(ftl::rgbd::Frame &out) { ftl::cuda::fix_bad_colour( out.getTexture<float>(Channel::Depth), out.getTexture<uchar4>(Channel::Colour), - temp_.getTexture<float>(Channel::Contribution), + temp_.getTexture<int>(Channel::Contribution), make_uchar4(255,0,0,0), params_.camera, stream_ @@ -685,9 +716,9 @@ void CUDARender::begin(ftl::rgbd::Frame &out) { AccumSelector<uchar4>::channel, Format<typename AccumSelector<uchar4>::type>(params_.camera.width, params_.camera.height) ).setTo(cv::Scalar(0.0f), cvstream); - temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0.0f), cvstream); + temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0), cvstream); - temp_.createTexture<float>(Channel::Contribution); + temp_.createTexture<int>(Channel::Contribution); sets_.clear(); } @@ -714,7 +745,7 @@ void CUDARender::end() { ftl::cuda::dibr_normalise( temp_.getTexture<typename AccumSelector<uchar4>::type>(AccumSelector<uchar4>::channel), out_->createTexture<uchar4>(Channel::Colour), - temp_.getTexture<float>(Channel::Contribution), + temp_.getTexture<int>(Channel::Contribution), stream_ ); diff --git a/components/renderers/cpp/src/dibr.cu b/components/renderers/cpp/src/dibr.cu index 90e37ff4fcae5b94274aedb43dfb306bff03e957..7530f0a4717d99a9a9ec62944c814ac37e7a9bff 100644 --- a/components/renderers/cpp/src/dibr.cu +++ b/components/renderers/cpp/src/dibr.cu @@ -85,12 +85,12 @@ template <typename A, typename B> __global__ void dibr_normalise_kernel( TextureObject<A> in, TextureObject<B> out, - TextureObject<float> contribs) { + TextureObject<int> contribs) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; if (x < in.width() && y < in.height()) { - const float contrib = contribs.tex2D((int)x,(int)y); + const float contrib = float(contribs.tex2D((int)x,(int)y) & 0xFFFFFF) / float(0xFFFF); const A a = in.tex2D((int)x,(int)y); //const float4 normal = normals.tex2D((int)x,(int)y); @@ -104,7 +104,7 @@ __global__ void dibr_normalise_kernel( } template <typename A, typename B> -void ftl::cuda::dibr_normalise(TextureObject<A> &in, TextureObject<B> &out, TextureObject<float> &contribs, cudaStream_t stream) { +void ftl::cuda::dibr_normalise(TextureObject<A> &in, TextureObject<B> &out, TextureObject<int> &contribs, cudaStream_t stream) { const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); @@ -112,6 +112,39 @@ void ftl::cuda::dibr_normalise(TextureObject<A> &in, TextureObject<B> &out, Text cudaSafeCall( cudaGetLastError() ); } -template void ftl::cuda::dibr_normalise<float4,uchar4>(TextureObject<float4> &in, TextureObject<uchar4> &out, TextureObject<float> &contribs, cudaStream_t stream); -template void ftl::cuda::dibr_normalise<float,float>(TextureObject<float> &in, TextureObject<float> &out, TextureObject<float> &contribs, cudaStream_t stream); -template void ftl::cuda::dibr_normalise<float4,float4>(TextureObject<float4> &in, TextureObject<float4> &out, TextureObject<float> &contribs, cudaStream_t stream); +template void ftl::cuda::dibr_normalise<float4,uchar4>(TextureObject<float4> &in, TextureObject<uchar4> &out, TextureObject<int> &contribs, cudaStream_t stream); +template void ftl::cuda::dibr_normalise<float,float>(TextureObject<float> &in, TextureObject<float> &out, TextureObject<int> &contribs, cudaStream_t stream); +template void ftl::cuda::dibr_normalise<float4,float4>(TextureObject<float4> &in, TextureObject<float4> &out, TextureObject<int> &contribs, cudaStream_t stream); + +// Float version + +template <typename A, typename B> +__global__ void dibr_normalise_kernel( + TextureObject<A> in, + TextureObject<B> out, + TextureObject<float> weights) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < in.width() && y < in.height()) { + const float contrib = weights.tex2D((int)x,(int)y); + const A a = in.tex2D((int)x,(int)y); + + if (contrib > 0.0f) { + out(x,y) = make<B>(a / contrib); + } + } +} + +template <typename A, typename B> +void ftl::cuda::dibr_normalise(TextureObject<A> &in, TextureObject<B> &out, TextureObject<float> &weights, cudaStream_t stream) { + const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(in, out, weights); + cudaSafeCall( cudaGetLastError() ); +} + +template void ftl::cuda::dibr_normalise<float4,uchar4>(TextureObject<float4> &in, TextureObject<uchar4> &out, TextureObject<float> &weights, cudaStream_t stream); +template void ftl::cuda::dibr_normalise<float,float>(TextureObject<float> &in, TextureObject<float> &out, TextureObject<float> &weights, cudaStream_t stream); +template void ftl::cuda::dibr_normalise<float4,float4>(TextureObject<float4> &in, TextureObject<float4> &out, TextureObject<float> &weights, cudaStream_t stream); diff --git a/components/renderers/cpp/src/reprojection.cu b/components/renderers/cpp/src/reprojection.cu index 703fd4becc7ebca7841dc07c956783579ca3a8f9..2012642a80729dec2c7dd88de9003e409dd6560c 100644 --- a/components/renderers/cpp/src/reprojection.cu +++ b/components/renderers/cpp/src/reprojection.cu @@ -13,6 +13,7 @@ using ftl::cuda::TextureObject; using ftl::render::Parameters; using ftl::rgbd::Camera; using ftl::render::ViewPortMode; +using ftl::render::AccumulationFunction; /*template <typename T> __device__ inline T generateInput(const T &in, const SplatParams ¶ms, const float4 &worldPos) { @@ -41,18 +42,75 @@ __device__ inline float4 weightInput(const uchar4 &in, float weight) { } template <typename T> -__device__ inline void accumulateOutput(TextureObject<T> &out, TextureObject<float> &contrib, const uint2 &pos, const T &in, float w) { - atomicAdd(&out(pos.x, pos.y), in); - atomicAdd(&contrib(pos.x, pos.y), w); -} +__device__ inline float colourDifference(const T &a, const T &b); template <> -__device__ inline void accumulateOutput(TextureObject<float4> &out, TextureObject<float> &contrib, const uint2 &pos, const float4 &in, float w) { - atomicAdd((float*)&out(pos.x, pos.y), in.x); - atomicAdd(((float*)&out(pos.x, pos.y))+1, in.y); - atomicAdd(((float*)&out(pos.x, pos.y))+2, in.z); - atomicAdd(((float*)&out(pos.x, pos.y))+3, in.w); - atomicAdd(&contrib(pos.x, pos.y), w); +__device__ inline float colourDifference<float4>(const float4 &a, const float4 &b) { + return max(fabsf(a.x-b.x), max(fabsf(a.y-b.y), fabsf(a.z-b.z))); +} + +template <> +__device__ inline float colourDifference<float>(const float &a, const float &b) { + return fabs(a-b); +} + +template <AccumulationFunction F, typename T> +__device__ inline void accumulateOutput(TextureObject<T> &out, TextureObject<int> &contrib, const uint2 &pos, const T &in, float w) { + // Just weighted average everything + if (F == AccumulationFunction::Simple) { + const T old = out.tex2D(pos.x, pos.y); + const int c = contrib.tex2D(pos.x,pos.y); + out(pos.x, pos.y) = old + in*w; + contrib(pos.x, pos.y) = int(w * float(0xFFFF)) + (c & 0xFFFFFF); + } else { + int c = contrib.tex2D(pos.x,pos.y); + float weight_sum = float(c & 0xFFFFFF) / float(0xFFFF); + float count = c >> 24; + const T old = out.tex2D(pos.x, pos.y); + + // Really close weights are weighted averaged together + // but substantially stronger weights do a straight replacement + if (F == AccumulationFunction::CloseWeights) { + if (count == 0 || w*0.95f > weight_sum/count) { + out(pos.x, pos.y) = in*w; + contrib(pos.x, pos.y) = (1 << 24) + int(w * float(0xFFFF)); + } else { + out(pos.x, pos.y) = old + in*w; + contrib(pos.x, pos.y) = (int(count+1.0f) << 24) + int(w * float(0xFFFF)) + (c & 0xFFFFFF); + } + // The winner takes all in determining colour + } else if (F == AccumulationFunction::BestWeight) { + if (count == 0 || w > weight_sum/count) { + out(pos.x, pos.y) = in*w; + contrib(pos.x, pos.y) = (1 << 24) + int(w * float(0xFFFF)); + } + // If colours are close then weighted average, otherwise discard the + // lowest weighted colours + } else if (F == AccumulationFunction::ColourDiscard) { + if (colourDifference(old / weight_sum, in) > 10.0f) { + if (count == 0 || w > weight_sum/count) { + out(pos.x, pos.y) = in*w; + contrib(pos.x, pos.y) = (1 << 24) + int(w * float(0xFFFF)); + } else { + //out(pos.x, pos.y) = old + in*w; + //contrib(pos.x, pos.y) = (int(count+1.0f) << 24) + int(w * float(0xFFFF)) + (c & 0xFFFFFF); + } + } else { + out(pos.x, pos.y) = old + in*w; + contrib(pos.x, pos.y) = (int(count+1.0f) << 24) + int(w * float(0xFFFF)) + (c & 0xFFFFFF); + } + } else if (F == AccumulationFunction::ColourDiscardSmooth) { + //const float cdiff = 1.0f - min(1.0f, colourDifference(old / weight_sum, in) / 50.0f); + // TODO: Determine colour smoothing param from magnitude of weighting difference + // Or at least consider the magnitude of weight difference some how. + const float cdiff = ftl::cuda::weighting(colourDifference(old / weight_sum, in), 255.0f); + const float alpha = (w > weight_sum/count) ? cdiff : 1.0f; + const float beta = (count == 0 || w > weight_sum/count) ? 1.0f : cdiff; + + out(pos.x, pos.y) = old*alpha + in*w*beta; + contrib(pos.x, pos.y) = (int(count+1.0f) << 24) + int(w*beta * float(0xFFFF)) + int(weight_sum*alpha * float(0xFFFF)); + } + } } template <ViewPortMode VPMODE> @@ -92,14 +150,15 @@ __device__ float depthMatching(const Parameters ¶ms, float d1, float d2) { /* * Full reprojection with normals and depth */ - template <typename A, typename B, ViewPortMode VPMODE> + template <typename A, typename B, ViewPortMode VPMODE, AccumulationFunction ACCUM> __global__ void reprojection_kernel( TextureObject<A> in, // Attribute input TextureObject<float> depth_src, TextureObject<float> depth_in, // Virtual depth map + TextureObject<short> weights, TextureObject<float4> normals, TextureObject<B> out, // Accumulated output - TextureObject<float> contrib, + TextureObject<int> contrib, Parameters params, Camera camera, float4x4 transform, float3x3 transformR) { @@ -122,6 +181,9 @@ __global__ void reprojection_kernel( // Boolean match (0 or 1 weight). 1.0 if depths are sufficiently close float weight = depthMatching(params, camPos.z, d2); + if (params.m_flags & ftl::render::kUseWeightsChannel) + weight *= float(weights.tex2D(int(screenPos.x+0.5f), int(screenPos.y+0.5f))) / 32767.0f; + // TODO: Weight by distance to discontinuity? Perhaps as an alternative to // removing a discontinuity. This would also gradually blend colours from // multiple cameras that otherwise might jump when one cameras input is lost @@ -136,107 +198,53 @@ __global__ void reprojection_kernel( if (params.m_flags & ftl::render::kNormalWeightColours) weight *= weightByNormal(normals, x, y, transformR, screenPos, camera); - const B weighted = make<B>(input) * weight; //weightInput(input, weight); + const B output = make<B>(input); // * weight; //weightInput(input, weight); if (weight > 0.0f) { - accumulateOutput(out, contrib, make_uint2(x,y), weighted, weight); + accumulateOutput<ACCUM,B>(out, contrib, make_uint2(x,y), output, weight); } } } } } - -template <typename A, typename B> -void ftl::cuda::reproject( - TextureObject<A> &in, - TextureObject<float> &depth_src, // Original 3D points - TextureObject<float> &depth_in, // Virtual depth map - TextureObject<float4> &normals, - TextureObject<B> &out, // Accumulated output - TextureObject<float> &contrib, - const Parameters ¶ms, - const Camera &camera, const float4x4 &transform, const float3x3 &transformR, - cudaStream_t stream) { - const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - switch (params.viewPortMode) { - case ViewPortMode::Disabled: reprojection_kernel<A,B,ViewPortMode::Disabled><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, normals, out, contrib, params, camera, transform, transformR); break; - case ViewPortMode::Clipping: reprojection_kernel<A,B,ViewPortMode::Clipping><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, normals, out, contrib, params, camera, transform, transformR); break; - case ViewPortMode::Warping: reprojection_kernel<A,B,ViewPortMode::Warping><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, normals, out, contrib, params, camera, transform, transformR); break; - } - cudaSafeCall( cudaGetLastError() ); -} - -template void ftl::cuda::reproject( - ftl::cuda::TextureObject<uchar4> &in, // Original colour image - ftl::cuda::TextureObject<float> &depth_src, // Original 3D points - ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map - ftl::cuda::TextureObject<float4> &normals, - ftl::cuda::TextureObject<float4> &out, // Accumulated output - ftl::cuda::TextureObject<float> &contrib, - const ftl::render::Parameters ¶ms, - const ftl::rgbd::Camera &camera, - const float4x4 &transform, const float3x3 &transformR, - cudaStream_t stream); - -template void ftl::cuda::reproject( - ftl::cuda::TextureObject<float> &in, // Original colour image - ftl::cuda::TextureObject<float> &depth_src, // Original 3D points - ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map - ftl::cuda::TextureObject<float4> &normals, - ftl::cuda::TextureObject<float> &out, // Accumulated output - ftl::cuda::TextureObject<float> &contrib, - const ftl::render::Parameters ¶ms, - const ftl::rgbd::Camera &camera, - const float4x4 &transform, const float3x3 &transformR, - cudaStream_t stream); - -template void ftl::cuda::reproject( - ftl::cuda::TextureObject<float4> &in, // Original colour image - ftl::cuda::TextureObject<float> &depth_src, // Original 3D points - ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map - ftl::cuda::TextureObject<float4> &normals, - ftl::cuda::TextureObject<float4> &out, // Accumulated output - ftl::cuda::TextureObject<float> &contrib, - const ftl::render::Parameters ¶ms, - const ftl::rgbd::Camera &camera, - const float4x4 &transform, const float3x3 &transformR, - cudaStream_t stream); - -//============================================================================== -// Without normals -//============================================================================== - - - template <typename A, typename B> +/* + * Full reprojection without normals + */ + template <typename A, typename B, ViewPortMode VPMODE, AccumulationFunction ACCUM> __global__ void reprojection_kernel( TextureObject<A> in, // Attribute input TextureObject<float> depth_src, TextureObject<float> depth_in, // Virtual depth map + TextureObject<short> weights, TextureObject<B> out, // Accumulated output - TextureObject<float> contrib, + TextureObject<int> contrib, Parameters params, - Camera camera, float4x4 poseInv) { + Camera camera, float4x4 transform, float3x3 transformR) { const int x = (blockIdx.x*blockDim.x + threadIdx.x); const int y = blockIdx.y*blockDim.y + threadIdx.y; const float d = depth_in.tex2D((int)x, (int)y); if (d > params.camera.minDepth && d < params.camera.maxDepth) { - const float3 camPos = poseInv * params.camera.screenToCam(x, y, d); - if (camPos.z > camera.minDepth && camPos.z > camera.maxDepth) { + const uint2 rpt = convertScreen<VPMODE>(params, x, y); + const float3 camPos = transform * params.camera.screenToCam(rpt.x, rpt.y, d); + if (camPos.z > camera.minDepth && camPos.z < camera.maxDepth) { const float2 screenPos = camera.camToScreen<float2>(camPos); + // Not on screen so stop now... if (screenPos.x < depth_src.width() && screenPos.y < depth_src.height()) { - const float d2 = depth_src.tex2D((int)(screenPos.x+0.5f), (int)(screenPos.y+0.5f)); - const auto input = getInput(in, screenPos, depth_src.width(), depth_src.height()); + const float d2 = depth_src.tex2D(int(screenPos.x+0.5f), int(screenPos.y+0.5f)); + const auto input = getInput(in, screenPos, depth_src.width(), depth_src.height()); + + // Boolean match (0 or 1 weight). 1.0 if depths are sufficiently close float weight = depthMatching(params, camPos.z, d2); - const B weighted = make<B>(input) * weight; + weight *= float(weights.tex2D(int(screenPos.x+0.5f), int(screenPos.y+0.5f))) / 32767.0f; + + const B output = make<B>(input); // * weight; //weightInput(input, weight); if (weight > 0.0f) { - accumulateOutput(out, contrib, make_uint2(x,y), weighted, weight); + accumulateOutput<ACCUM,B>(out, contrib, make_uint2(x,y), output, weight); } } } @@ -249,55 +257,123 @@ void ftl::cuda::reproject( TextureObject<A> &in, TextureObject<float> &depth_src, // Original 3D points TextureObject<float> &depth_in, // Virtual depth map + TextureObject<short> &weights, + TextureObject<float4> *normals, TextureObject<B> &out, // Accumulated output - TextureObject<float> &contrib, + TextureObject<int> &contrib, const Parameters ¶ms, - const Camera &camera, const float4x4 &poseInv, cudaStream_t stream) { + const Camera &camera, const float4x4 &transform, const float3x3 &transformR, + cudaStream_t stream) { const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - reprojection_kernel<<<gridSize, blockSize, 0, stream>>>( - in, - depth_src, - depth_in, - out, - contrib, - params, - camera, - poseInv - ); - cudaSafeCall( cudaGetLastError() ); + if (normals) { + if (params.accumulationMode == AccumulationFunction::CloseWeights) { + switch (params.viewPortMode) { + case ViewPortMode::Disabled: reprojection_kernel<A,B,ViewPortMode::Disabled,AccumulationFunction::CloseWeights><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, *normals, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Clipping: reprojection_kernel<A,B,ViewPortMode::Clipping,AccumulationFunction::CloseWeights><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, *normals, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Warping: reprojection_kernel<A,B,ViewPortMode::Warping,AccumulationFunction::CloseWeights><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, *normals, out, contrib, params, camera, transform, transformR); break; + } + } else if (params.accumulationMode == AccumulationFunction::BestWeight) { + switch (params.viewPortMode) { + case ViewPortMode::Disabled: reprojection_kernel<A,B,ViewPortMode::Disabled,AccumulationFunction::BestWeight><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, *normals, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Clipping: reprojection_kernel<A,B,ViewPortMode::Clipping,AccumulationFunction::BestWeight><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, *normals, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Warping: reprojection_kernel<A,B,ViewPortMode::Warping,AccumulationFunction::BestWeight><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, *normals, out, contrib, params, camera, transform, transformR); break; + } + } else if (params.accumulationMode == AccumulationFunction::Simple) { + switch (params.viewPortMode) { + case ViewPortMode::Disabled: reprojection_kernel<A,B,ViewPortMode::Disabled,AccumulationFunction::Simple><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, *normals, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Clipping: reprojection_kernel<A,B,ViewPortMode::Clipping,AccumulationFunction::Simple><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, *normals, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Warping: reprojection_kernel<A,B,ViewPortMode::Warping,AccumulationFunction::Simple><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, *normals, out, contrib, params, camera, transform, transformR); break; + } + } else if (params.accumulationMode == AccumulationFunction::ColourDiscard) { + switch (params.viewPortMode) { + case ViewPortMode::Disabled: reprojection_kernel<A,B,ViewPortMode::Disabled,AccumulationFunction::ColourDiscard><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, *normals, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Clipping: reprojection_kernel<A,B,ViewPortMode::Clipping,AccumulationFunction::ColourDiscard><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, *normals, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Warping: reprojection_kernel<A,B,ViewPortMode::Warping,AccumulationFunction::ColourDiscard><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, *normals, out, contrib, params, camera, transform, transformR); break; + } + } else if (params.accumulationMode == AccumulationFunction::ColourDiscardSmooth) { + switch (params.viewPortMode) { + case ViewPortMode::Disabled: reprojection_kernel<A,B,ViewPortMode::Disabled,AccumulationFunction::ColourDiscardSmooth><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, *normals, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Clipping: reprojection_kernel<A,B,ViewPortMode::Clipping,AccumulationFunction::ColourDiscardSmooth><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, *normals, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Warping: reprojection_kernel<A,B,ViewPortMode::Warping,AccumulationFunction::ColourDiscardSmooth><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, *normals, out, contrib, params, camera, transform, transformR); break; + } + } + } else { + if (params.accumulationMode == AccumulationFunction::CloseWeights) { + switch (params.viewPortMode) { + case ViewPortMode::Disabled: reprojection_kernel<A,B,ViewPortMode::Disabled,AccumulationFunction::CloseWeights><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Clipping: reprojection_kernel<A,B,ViewPortMode::Clipping,AccumulationFunction::CloseWeights><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Warping: reprojection_kernel<A,B,ViewPortMode::Warping,AccumulationFunction::CloseWeights><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, out, contrib, params, camera, transform, transformR); break; + } + } else if (params.accumulationMode == AccumulationFunction::BestWeight) { + switch (params.viewPortMode) { + case ViewPortMode::Disabled: reprojection_kernel<A,B,ViewPortMode::Disabled,AccumulationFunction::BestWeight><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Clipping: reprojection_kernel<A,B,ViewPortMode::Clipping,AccumulationFunction::BestWeight><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Warping: reprojection_kernel<A,B,ViewPortMode::Warping,AccumulationFunction::BestWeight><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, out, contrib, params, camera, transform, transformR); break; + } + } else if (params.accumulationMode == AccumulationFunction::Simple) { + switch (params.viewPortMode) { + case ViewPortMode::Disabled: reprojection_kernel<A,B,ViewPortMode::Disabled,AccumulationFunction::Simple><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Clipping: reprojection_kernel<A,B,ViewPortMode::Clipping,AccumulationFunction::Simple><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Warping: reprojection_kernel<A,B,ViewPortMode::Warping,AccumulationFunction::Simple><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, out, contrib, params, camera, transform, transformR); break; + } + } else if (params.accumulationMode == AccumulationFunction::ColourDiscard) { + switch (params.viewPortMode) { + case ViewPortMode::Disabled: reprojection_kernel<A,B,ViewPortMode::Disabled,AccumulationFunction::ColourDiscard><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Clipping: reprojection_kernel<A,B,ViewPortMode::Clipping,AccumulationFunction::ColourDiscard><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Warping: reprojection_kernel<A,B,ViewPortMode::Warping,AccumulationFunction::ColourDiscard><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, out, contrib, params, camera, transform, transformR); break; + } + } else if (params.accumulationMode == AccumulationFunction::ColourDiscardSmooth) { + switch (params.viewPortMode) { + case ViewPortMode::Disabled: reprojection_kernel<A,B,ViewPortMode::Disabled,AccumulationFunction::ColourDiscardSmooth><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Clipping: reprojection_kernel<A,B,ViewPortMode::Clipping,AccumulationFunction::ColourDiscardSmooth><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Warping: reprojection_kernel<A,B,ViewPortMode::Warping,AccumulationFunction::ColourDiscardSmooth><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, weights, out, contrib, params, camera, transform, transformR); break; + } + } + } + cudaSafeCall( cudaGetLastError() ); } template void ftl::cuda::reproject( ftl::cuda::TextureObject<uchar4> &in, // Original colour image ftl::cuda::TextureObject<float> &depth_src, // Original 3D points ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map + ftl::cuda::TextureObject<short> &weights, + ftl::cuda::TextureObject<float4> *normals, ftl::cuda::TextureObject<float4> &out, // Accumulated output - ftl::cuda::TextureObject<float> &contrib, + ftl::cuda::TextureObject<int> &contrib, const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, - const float4x4 &poseInv, cudaStream_t stream); + const float4x4 &transform, const float3x3 &transformR, + cudaStream_t stream); template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &in, // Original colour image ftl::cuda::TextureObject<float> &depth_src, // Original 3D points ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map + ftl::cuda::TextureObject<short> &weights, + ftl::cuda::TextureObject<float4> *normals, ftl::cuda::TextureObject<float> &out, // Accumulated output - ftl::cuda::TextureObject<float> &contrib, + ftl::cuda::TextureObject<int> &contrib, const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, - const float4x4 &poseInv, cudaStream_t stream); + const float4x4 &transform, const float3x3 &transformR, + cudaStream_t stream); template void ftl::cuda::reproject( ftl::cuda::TextureObject<float4> &in, // Original colour image ftl::cuda::TextureObject<float> &depth_src, // Original 3D points ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map + ftl::cuda::TextureObject<short> &weights, + ftl::cuda::TextureObject<float4> *normals, ftl::cuda::TextureObject<float4> &out, // Accumulated output - ftl::cuda::TextureObject<float> &contrib, + ftl::cuda::TextureObject<int> &contrib, const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, - const float4x4 &poseInv, cudaStream_t stream); + const float4x4 &transform, const float3x3 &transformR, + cudaStream_t stream); + //============================================================================== // Without normals or depth @@ -311,7 +387,7 @@ __global__ void reprojection_kernel( TextureObject<A> in, // Attribute input TextureObject<float> depth_in, // Virtual depth map TextureObject<B> out, // Accumulated output - TextureObject<float> contrib, + TextureObject<int> contrib, Parameters params, Camera camera, float4x4 poseInv) { @@ -329,7 +405,7 @@ __global__ void reprojection_kernel( const B weighted = make<B>(input) * weight; if (weight > 0.0f) { - accumulateOutput(out, contrib, make_uint2(x,y), weighted, weight); + accumulateOutput<AccumulationFunction::Simple>(out, contrib, make_uint2(x,y), weighted, weight); } } } @@ -341,7 +417,7 @@ void ftl::cuda::reproject( TextureObject<A> &in, TextureObject<float> &depth_in, // Virtual depth map TextureObject<B> &out, // Accumulated output - TextureObject<float> &contrib, + TextureObject<int> &contrib, const Parameters ¶ms, const Camera &camera, const float4x4 &poseInv, cudaStream_t stream) { const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); @@ -363,7 +439,7 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<uchar4> &in, // Original colour image ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map ftl::cuda::TextureObject<float4> &out, // Accumulated output - ftl::cuda::TextureObject<float> &contrib, + ftl::cuda::TextureObject<int> &contrib, const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, const float4x4 &poseInv, cudaStream_t stream); @@ -372,7 +448,7 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &in, // Original colour image ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map ftl::cuda::TextureObject<float> &out, // Accumulated output - ftl::cuda::TextureObject<float> &contrib, + ftl::cuda::TextureObject<int> &contrib, const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, const float4x4 &poseInv, cudaStream_t stream); @@ -381,7 +457,7 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float4> &in, // Original colour image ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map ftl::cuda::TextureObject<float4> &out, // Accumulated output - ftl::cuda::TextureObject<float> &contrib, + ftl::cuda::TextureObject<int> &contrib, const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, const float4x4 &poseInv, cudaStream_t stream); @@ -444,7 +520,7 @@ template <int RADIUS> __global__ void fix_colour_kernel( TextureObject<float> depth, TextureObject<uchar4> out, - TextureObject<float> contribs, + TextureObject<int> contribs, uchar4 bad_colour, ftl::rgbd::Camera cam) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; @@ -454,15 +530,15 @@ __global__ void fix_colour_kernel( const float contrib = contribs.tex2D((int)x,(int)y); const float d = depth.tex2D(x,y); - if (contrib < 0.0000001f && d > cam.minDepth && d < cam.maxDepth) { + if (contrib == 0 && d > cam.minDepth && d < cam.maxDepth) { float4 sumcol = make_float4(0.0f); float count = 0.0f; for (int v=-RADIUS; v<=RADIUS; ++v) { for (int u=-RADIUS; u<=RADIUS; ++u) { - const float contrib = contribs.tex2D((int)x+u,(int)y+v); + const int contrib = contribs.tex2D((int)x+u,(int)y+v); const float4 c = make_float4(out(int(x)+u,int(y)+v)); - if (contrib > 0.0000001f) { + if (contrib > 0) { sumcol += c; count += 1.0f; } @@ -477,7 +553,7 @@ __global__ void fix_colour_kernel( void ftl::cuda::fix_bad_colour( TextureObject<float> &depth, TextureObject<uchar4> &out, - TextureObject<float> &contribs, + TextureObject<int> &contribs, uchar4 bad_colour, const ftl::rgbd::Camera &cam, cudaStream_t stream) { @@ -493,17 +569,17 @@ void ftl::cuda::fix_bad_colour( __global__ void show_missing_colour_kernel( TextureObject<float> depth, TextureObject<uchar4> out, - TextureObject<float> contribs, + TextureObject<int> contribs, uchar4 bad_colour, ftl::rgbd::Camera cam) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; if (x < out.width() && y < out.height()) { - const float contrib = contribs.tex2D((int)x,(int)y); + const int contrib = contribs.tex2D((int)x,(int)y); const float d = depth.tex2D(x,y); - if (contrib < 0.0000001f && d > cam.minDepth && d < cam.maxDepth) { + if (contrib == 0 && d > cam.minDepth && d < cam.maxDepth) { out(x,y) = bad_colour; } } @@ -512,7 +588,7 @@ __global__ void show_missing_colour_kernel( void ftl::cuda::show_missing_colour( TextureObject<float> &depth, TextureObject<uchar4> &out, - TextureObject<float> &contribs, + TextureObject<int> &contribs, uchar4 bad_colour, const ftl::rgbd::Camera &cam, cudaStream_t stream) { @@ -522,3 +598,34 @@ void ftl::cuda::show_missing_colour( show_missing_colour_kernel<<<gridSize, blockSize, 0, stream>>>(depth, out, contribs, bad_colour, cam); cudaSafeCall( cudaGetLastError() ); } + +// ===== Show colour weights =================================================== + +__global__ void show_colour_weights_kernel( + TextureObject<uchar4> out, + TextureObject<int> contribs, + uchar4 bad_colour) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < out.width() && y < out.height()) { + const int contrib = contribs.tex2D((int)x,(int)y); + + if (contrib > 0) { + float w = float(contrib & 0xFFFFFF) / float(0xFFFF) / float(contrib >> 24); + out(x,y) = make_uchar4(float(bad_colour.x) * w, float(bad_colour.y) * w, float(bad_colour.z) * w, 0.0f); + } + } +} + +void ftl::cuda::show_colour_weights( + TextureObject<uchar4> &out, + TextureObject<int> &contribs, + uchar4 bad_colour, + cudaStream_t stream) { + const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + show_colour_weights_kernel<<<gridSize, blockSize, 0, stream>>>(out, contribs, bad_colour); + cudaSafeCall( cudaGetLastError() ); +} diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp index 95450ea36a19c1e96bd3cedcb05ffbda3f6c9d44..285f2e651394913d8b10dba603a04b74090fe52a 100644 --- a/components/renderers/cpp/src/splatter_cuda.hpp +++ b/components/renderers/cpp/src/splatter_cuda.hpp @@ -36,6 +36,17 @@ namespace cuda { const ftl::rgbd::Camera &camera, float alpha, cudaStream_t stream); + + void mesh_blender( + ftl::cuda::TextureObject<int> &depth_in, + ftl::cuda::TextureObject<float> &depth_out, + ftl::cuda::TextureObject<short> &weights_in, + ftl::cuda::TextureObject<float> &weights_out, + const ftl::render::Parameters ¶ms, + const ftl::rgbd::Camera &camera, + const float4x4 &transform, + float alpha, + cudaStream_t stream); void dibr_merge( ftl::cuda::TextureObject<float4> &points, @@ -90,31 +101,32 @@ namespace cuda { ftl::cuda::TextureObject<A> &in, // Original colour image ftl::cuda::TextureObject<float> &depth_src, // Original 3D points ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map - ftl::cuda::TextureObject<float4> &normals, + ftl::cuda::TextureObject<short> &weights, + ftl::cuda::TextureObject<float4> *normals, ftl::cuda::TextureObject<B> &out, // Accumulated output - ftl::cuda::TextureObject<float> &contrib, + ftl::cuda::TextureObject<int> &contrib, const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream); - template <typename A, typename B> + /*template <typename A, typename B> void reproject( ftl::cuda::TextureObject<A> &in, // Original colour image ftl::cuda::TextureObject<float> &depth_src, // Original 3D points ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map ftl::cuda::TextureObject<B> &out, // Accumulated output - ftl::cuda::TextureObject<float> &contrib, + ftl::cuda::TextureObject<int> &contrib, const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, - const float4x4 &poseInv, cudaStream_t stream); + const float4x4 &poseInv, cudaStream_t stream);*/ template <typename A, typename B> void reproject( ftl::cuda::TextureObject<A> &in, // Original colour image ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map ftl::cuda::TextureObject<B> &out, // Accumulated output - ftl::cuda::TextureObject<float> &contrib, + ftl::cuda::TextureObject<int> &contrib, const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, const float4x4 &poseInv, cudaStream_t stream); @@ -128,17 +140,30 @@ namespace cuda { void dibr_normalise( ftl::cuda::TextureObject<A> &in, ftl::cuda::TextureObject<B> &out, - ftl::cuda::TextureObject<float> &contribs, + ftl::cuda::TextureObject<int> &contribs, + cudaStream_t stream); + + template <typename A, typename B> + void dibr_normalise( + ftl::cuda::TextureObject<A> &in, + ftl::cuda::TextureObject<B> &out, + ftl::cuda::TextureObject<float> &weights, cudaStream_t stream); void show_missing_colour( ftl::cuda::TextureObject<float> &depth, ftl::cuda::TextureObject<uchar4> &out, - ftl::cuda::TextureObject<float> &contribs, + ftl::cuda::TextureObject<int> &contribs, uchar4 bad_colour, const ftl::rgbd::Camera &cam, cudaStream_t stream); + void show_colour_weights( + ftl::cuda::TextureObject<uchar4> &out, + ftl::cuda::TextureObject<int> &contribs, + uchar4 bad_colour, + cudaStream_t stream); + void show_mask( ftl::cuda::TextureObject<uchar4> &colour, ftl::cuda::TextureObject<uint8_t> &mask, @@ -152,7 +177,7 @@ namespace cuda { void fix_bad_colour( ftl::cuda::TextureObject<float> &depth, ftl::cuda::TextureObject<uchar4> &out, - ftl::cuda::TextureObject<float> &contribs, + ftl::cuda::TextureObject<int> &contribs, uchar4 bad_colour, const ftl::rgbd::Camera &cam, cudaStream_t stream); diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu index 02bc694dca8c128fb23770d3defa760d606d584a..dccdd276ad1ab8a6520a19a5b908c51db6b54823 100644 --- a/components/renderers/cpp/src/triangle_render.cu +++ b/components/renderers/cpp/src/triangle_render.cu @@ -220,7 +220,7 @@ void ftl::cuda::merge_convert_depth(TextureObject<int> &depth_in, TextureObject< /* * Merge two depth maps together */ - __global__ void mesh_blender_kernel( + __global__ void mesh_blender_simple_kernel( TextureObject<int> depth_in, TextureObject<int> depth_out, ftl::rgbd::Camera camera, @@ -243,10 +243,59 @@ void ftl::cuda::merge_convert_depth(TextureObject<int> &depth_in, TextureObject< depth_out(x,y) = (int)(depth * 100000.0f); } +__global__ void mesh_blender_kernel( + TextureObject<int> depth_in, + TextureObject<float> depth_out, + TextureObject<short> weights_in, + TextureObject<float> weights_out, + ftl::render::Parameters params, + ftl::rgbd::Camera camera, + float4x4 transform, + float alpha) { + + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float d1 = float(depth_in.tex2D((int)x, (int)y)) / 100000.0f; + const float d2 = depth_out.tex2D((int)x, (int)y); + const float wout = weights_out.tex2D((int)x, (int)y); + + if (d1 > params.camera.minDepth && d1 < params.camera.maxDepth) { + //const uint2 rpt = convertScreen<VPMODE>(params, x, y); + const float3 camPos = transform * params.camera.screenToCam(x, y, d1); + if (camPos.z > camera.minDepth && camPos.z < camera.maxDepth) { + const float2 screenPos = camera.camToScreen<float2>(camPos); + + // Not on screen so stop now... + if (screenPos.x < weights_in.width() && screenPos.y < weights_in.height()) { + const float win = float(weights_in.tex2D(int(screenPos.x+0.5f), int(screenPos.y+0.5f))); + + if (d1 < d2/wout - alpha || (fabsf(d2/wout - d1) < alpha && win > wout)) { + depth_out(x,y) = d1 * win; + weights_out(x,y) = win; + } //else if (fabsf(d2/wout - d1) < alpha) { + //depth_out(x,y) = d2 + d1 * win; + //weights_out(x,y) = wout + win; + //depth_out(x,y) = (win > wout) ? d1*win : d2; + //weights_out(x,y) = (win > wout) ? win : wout; + //} + } + } + } +} + void ftl::cuda::mesh_blender(TextureObject<int> &depth_in, TextureObject<int> &depth_out, const ftl::rgbd::Camera &camera, float alpha, cudaStream_t stream) { const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - mesh_blender_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, camera, alpha); + mesh_blender_simple_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, camera, alpha); + cudaSafeCall( cudaGetLastError() ); +} + +void ftl::cuda::mesh_blender(TextureObject<int> &depth_in, TextureObject<float> &depth_out, TextureObject<short> &weights_in, TextureObject<float> &weights_out, const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, const float4x4 &transform, float alpha, cudaStream_t stream) { + const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + mesh_blender_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, weights_in, weights_out, params, camera, transform, alpha); cudaSafeCall( cudaGetLastError() ); } diff --git a/components/rgbd-sources/include/ftl/rgbd/frame.hpp b/components/rgbd-sources/include/ftl/rgbd/frame.hpp index 652bd244151343fd2172dbe04e77c9dfae7133f6..688e5fed2ddd9d1ab53a92d5a5ddd8751c8be26a 100644 --- a/components/rgbd-sources/include/ftl/rgbd/frame.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/frame.hpp @@ -222,7 +222,7 @@ ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c, const //if (!hasChannel(c)) channels_ += c; //using ftl::data::Frame<0,32,ftl::rgbd::FrameState,VideoData>::create; - ftl::data::Frame<0,32,ftl::rgbd::FrameState,VideoData>::create<cv::cuda::GpuMat>(c); + create<cv::cuda::GpuMat>(c); auto &m = getData(c); if (f.empty()) { diff --git a/components/structures/include/ftl/data/frame.hpp b/components/structures/include/ftl/data/frame.hpp index 3a33dc086114ccf8fa288761170d5943bc9bb5fa..182e8e9cee975258170a9d197a63ee193b057da4 100644 --- a/components/structures/include/ftl/data/frame.hpp +++ b/components/structures/include/ftl/data/frame.hpp @@ -63,11 +63,19 @@ public: * Perform a buffer swap of the selected channels. This is intended to be * a copy from `this` to the passed frame object but by buffer swap * instead of memory copy, meaning `this` may become invalid afterwards. + * It is a complete frame swap. */ void swapTo(ftl::codecs::Channels<BASE>, Frame &); void swapTo(Frame &); + /** + * Swap only selected channels to another frame, without resetting or swapping + * any other aspect of the frame. Unlike swapTo, this isn't intended to + * be a complete frame swap. + */ + void swapChannels(ftl::codecs::Channels<BASE> channels, Frame<BASE,N,STATE,DATA> &); + void swapChannels(ftl::codecs::Channel, ftl::codecs::Channel); /** @@ -330,6 +338,20 @@ void ftl::data::Frame<BASE,N,STATE,DATA>::swapChannels(ftl::codecs::Channel a, f m1 = std::move(temp); } +template <int BASE, int N, typename STATE, typename DATA> +void ftl::data::Frame<BASE,N,STATE,DATA>::swapChannels(ftl::codecs::Channels<BASE> channels, Frame<BASE,N,STATE,DATA> &f) { + // For all channels in this frame object + for (auto c : channels_) { + // Should we swap this channel? + if (channels.has(c)) { + f.channels_ += c; + // TODO: Make sure this does a move not copy + std::swap(f.getData(c),getData(c)); + channels_ -= c; + } + } +} + template <int BASE, int N, typename STATE, typename DATA> void ftl::data::Frame<BASE,N,STATE,DATA>::copyTo(ftl::codecs::Channels<BASE> channels, Frame<BASE,N,STATE,DATA> &f) { f.reset();