From ec7267db063fffa691ef4c27f47511f51cd45132 Mon Sep 17 00:00:00 2001 From: Nicolas Pope <nicolas.pope@utu.fi> Date: Tue, 12 Nov 2019 14:07:54 +0200 Subject: [PATCH] Implements #236 Fast Approximate Anti-Aliasing --- applications/gui/src/camera.cpp | 3 +- applications/gui/src/media_panel.cpp | 2 +- .../reconstruct/src/ilw/correspondence.cu | 8 +- applications/reconstruct/src/main.cpp | 8 +- .../codecs/include/ftl/codecs/channels.hpp | 1 + .../common/cpp/include/ftl/cuda_common.hpp | 33 +++++-- components/operators/CMakeLists.txt | 2 + .../include/ftl/operators/antialiasing.hpp | 27 ++++++ components/operators/src/antialiasing.cpp | 22 +++++ components/operators/src/antialiasing.cu | 87 +++++++++++++++++++ .../operators/src/antialiasing_cuda.hpp | 14 +++ components/operators/src/colours.cpp | 3 + components/operators/src/mls.cu | 8 +- components/operators/src/segmentation.cu | 25 +++--- .../cpp/include/ftl/cuda/weighting.hpp | 14 +++ components/renderers/cpp/src/reprojection.cu | 6 +- components/renderers/cpp/src/tri_render.cpp | 15 ++-- .../rgbd-sources/include/ftl/rgbd/frame.hpp | 16 ++-- 18 files changed, 251 insertions(+), 43 deletions(-) create mode 100644 components/operators/include/ftl/operators/antialiasing.hpp create mode 100644 components/operators/src/antialiasing.cpp create mode 100644 components/operators/src/antialiasing.cu create mode 100644 components/operators/src/antialiasing_cuda.hpp diff --git a/applications/gui/src/camera.cpp b/applications/gui/src/camera.cpp index debb8e73d..ae0d5627a 100644 --- a/applications/gui/src/camera.cpp +++ b/applications/gui/src/camera.cpp @@ -293,6 +293,7 @@ void ftl::gui::Camera::setChannel(Channel c) { case Channel::Flow: case Channel::Confidence: case Channel::Normals: + case Channel::ColourNormals: case Channel::Right: src_->setChannel(c); break; @@ -464,7 +465,7 @@ const GLTexture &ftl::gui::Camera::captureFrame() { break; //case Channel::Flow: - case Channel::Normals: + case Channel::ColourNormals: case Channel::Right: if (im2_.rows == 0 || im2_.type() != CV_8UC3) { break; } texture2_.update(im2_); diff --git a/applications/gui/src/media_panel.cpp b/applications/gui/src/media_panel.cpp index b3ea1a6af..800940199 100644 --- a/applications/gui/src/media_panel.cpp +++ b/applications/gui/src/media_panel.cpp @@ -192,7 +192,7 @@ MediaPanel::MediaPanel(ftl::gui::Screen *screen) : nanogui::Window(screen, ""), button->setCallback([this]() { ftl::gui::Camera *cam = screen_->activeCamera(); if (cam) { - cam->setChannel(Channel::Normals); + cam->setChannel(Channel::ColourNormals); } }); diff --git a/applications/reconstruct/src/ilw/correspondence.cu b/applications/reconstruct/src/ilw/correspondence.cu index 8fb9d94b7..9fb440ae4 100644 --- a/applications/reconstruct/src/ilw/correspondence.cu +++ b/applications/reconstruct/src/ilw/correspondence.cu @@ -65,7 +65,7 @@ __global__ void correspondence_energy_vector_kernel( const float3 world1 = pose1 * cam1.screenToCam(x,y,depth1); - const uchar4 colour1 = c1.tex2D(x, y); + const auto colour1 = c1.tex2D((float)x+0.5f, (float)y+0.5f); float bestdepth = 0.0f; float bestweight = 0.0f; @@ -91,17 +91,17 @@ __global__ void correspondence_energy_vector_kernel( // Calculate adjusted depth 3D point in camera 2 space const float3 worldPos = world1 + j * rayStep_world; //(pose1 * cam1.screenToCam(x, y, depth_adjust)); const float3 camPos = rayStart_2 + j * rayStep_2; //pose2 * worldPos; - const uint2 screen = cam2.camToScreen<uint2>(camPos); + const float2 screen = cam2.camToScreen<float2>(camPos); if (screen.x >= cam2.width || screen.y >= cam2.height) continue; // Generate a depth correspondence value - const float depth2 = d2.tex2D((int)screen.x, (int)screen.y); + const float depth2 = d2.tex2D(int(screen.x+0.5f), int(screen.y+0.5f)); const float dweight = ftl::cuda::weighting(fabs(depth2 - camPos.z), params.spatial_smooth); //const float dweight = ftl::cuda::weighting(fabs(depth_adjust - depth1), 2.0f*params.range); // Generate a colour correspondence value - const uchar4 colour2 = c2.tex2D((int)screen.x, (int)screen.y); + const auto colour2 = c2.tex2D(screen.x, screen.y); const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth); const float weight = weightFunction<FUNCTION>(params, dweight, cweight); diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp index 03c21b142..06111688a 100644 --- a/applications/reconstruct/src/main.cpp +++ b/applications/reconstruct/src/main.cpp @@ -36,6 +36,7 @@ #include <ftl/operators/filling.hpp> #include <ftl/operators/segmentation.hpp> #include <ftl/operators/mask.hpp> +#include <ftl/operators/antialiasing.hpp> #include <ftl/cuda/normals.hpp> #include <ftl/registration.hpp> @@ -236,8 +237,12 @@ static void run(ftl::Configurable *root) { int o = root->value("origin_pose", 0) % sources.size(); virt->setPose(sources[o]->getPose()); + auto *renderpipe = ftl::config::create<ftl::operators::Graph>(root, "render_pipe"); + renderpipe->append<ftl::operators::ColourChannels>("colour"); // Generate interpolation texture... + renderpipe->append<ftl::operators::FXAA>("antialiasing"); + // Generate virtual camera render when requested by streamer - virt->onRender([splat,virt,&scene_B,align](ftl::rgbd::Frame &out) { + virt->onRender([splat,virt,&scene_B,align,renderpipe](ftl::rgbd::Frame &out) { //virt->setTimestamp(scene_B.timestamp); // Do we need to convert Lab to BGR? if (align->isLabColour()) { @@ -247,6 +252,7 @@ static void run(ftl::Configurable *root) { } } splat->render(virt, out); + renderpipe->apply(out, out, virt, 0); }); stream->add(virt); diff --git a/components/codecs/include/ftl/codecs/channels.hpp b/components/codecs/include/ftl/codecs/channels.hpp index 3a486cb32..6673275fe 100644 --- a/components/codecs/include/ftl/codecs/channels.hpp +++ b/components/codecs/include/ftl/codecs/channels.hpp @@ -31,6 +31,7 @@ enum struct Channel : int { Support1 = 13, // 8UC4 (currently) Support2 = 14, // 8UC4 (currently) Segmentation = 15, // 32S? + ColourNormals = 16, // 8UC4 AudioLeft = 32, AudioRight = 33, diff --git a/components/common/cpp/include/ftl/cuda_common.hpp b/components/common/cpp/include/ftl/cuda_common.hpp index 116e26ec7..3ce2452dc 100644 --- a/components/common/cpp/include/ftl/cuda_common.hpp +++ b/components/common/cpp/include/ftl/cuda_common.hpp @@ -28,6 +28,22 @@ bool hasCompute(int major, int minor); int deviceCount(); +template <typename T> +struct Float; + +template <> struct Float<float> { typedef float type; }; +template <> struct Float<int> { typedef float type; }; +template <> struct Float<float4> { typedef float4 type; }; +template <> struct Float<uchar4> { typedef float4 type; }; +template <> struct Float<short2> { typedef float2 type; }; + +template <typename T> +struct ScaleValue; + +template <> struct ScaleValue<uchar4> { static constexpr float value = 255.0f; }; +template <> struct ScaleValue<float> { static constexpr float value = 1.0f; }; +template <> struct ScaleValue<float4> { static constexpr float value = 1.0f; }; + /** * Represent a CUDA texture object. Instances of this class can be used on both * host and device. A texture object base cannot be constructed directly, it @@ -89,7 +105,7 @@ class TextureObject : public TextureObjectBase { static_assert((16u % sizeof(T)) == 0, "Channel format must be aligned with 16 bytes"); __host__ __device__ TextureObject() : TextureObjectBase() {}; - explicit TextureObject(const cv::cuda::GpuMat &d); + explicit TextureObject(const cv::cuda::GpuMat &d, bool interpolated=false); explicit TextureObject(const cv::cuda::PtrStepSz<T> &d); TextureObject(T *ptr, int pitch, int width, int height); TextureObject(size_t width, size_t height); @@ -110,7 +126,8 @@ class TextureObject : public TextureObjectBase { #ifdef __CUDACC__ __device__ inline T tex2D(int u, int v) const { return ::tex2D<T>(texobj_, u, v); } - __device__ inline T tex2D(float u, float v) const { return ::tex2D<T>(texobj_, u, v); } + __device__ inline T tex2D(unsigned int u, unsigned int v) const { return ::tex2D<T>(texobj_, (int)u, (int)v); } + __device__ inline typename Float<T>::type tex2D(float u, float v) const { return ::tex2D<typename Float<T>::type>(texobj_, u, v) * ScaleValue<T>::value; } #endif __host__ __device__ inline const T &operator()(int u, int v) const { return reinterpret_cast<T*>(ptr_)[u+v*pitch2_]; } @@ -137,7 +154,7 @@ TextureObject<T> &TextureObject<T>::cast(TextureObjectBase &b) { * Create a 2D array texture from an OpenCV GpuMat object. */ template <typename T> -TextureObject<T>::TextureObject(const cv::cuda::GpuMat &d) { +TextureObject<T>::TextureObject(const cv::cuda::GpuMat &d, bool interpolated) { // GpuMat must have correct data type CHECK(d.type() == ftl::traits::OpenCVType<T>::value); @@ -153,7 +170,8 @@ TextureObject<T>::TextureObject(const cv::cuda::GpuMat &d) { cudaTextureDesc texDesc; // cppcheck-suppress memsetClassFloat memset(&texDesc, 0, sizeof(texDesc)); - texDesc.readMode = cudaReadModeElementType; + texDesc.readMode = (interpolated) ? cudaReadModeNormalizedFloat : cudaReadModeElementType; + if (interpolated) texDesc.filterMode = cudaFilterModeLinear; cudaTextureObject_t tex = 0; cudaSafeCall(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL)); @@ -188,6 +206,7 @@ TextureObject<T>::TextureObject(const cv::cuda::PtrStepSz<T> &d) { // cppcheck-suppress memsetClassFloat memset(&texDesc, 0, sizeof(texDesc)); texDesc.readMode = cudaReadModeElementType; + //if (std::is_same<T,uchar4>::value) texDesc.filterMode = cudaFilterModeLinear; cudaTextureObject_t tex = 0; cudaSafeCall(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL)); @@ -221,9 +240,10 @@ TextureObject<T>::TextureObject(T *ptr, int pitch, int width, int height) { // cppcheck-suppress memsetClassFloat memset(&texDesc, 0, sizeof(texDesc)); texDesc.readMode = cudaReadModeElementType; + //if (std::is_same<T,uchar4>::value) texDesc.filterMode = cudaFilterModeLinear; cudaTextureObject_t tex = 0; - cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL); + cudaSafeCall(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL)); texobj_ = tex; pitch_ = pitch; pitch2_ = pitch_ / sizeof(T); @@ -255,7 +275,8 @@ TextureObject<T>::TextureObject(size_t width, size_t height) { // cppcheck-suppress memsetClassFloat memset(&texDesc, 0, sizeof(texDesc)); texDesc.readMode = cudaReadModeElementType; - cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL); + //if (std::is_same<T,uchar4>::value) texDesc.filterMode = cudaFilterModeLinear; + cudaSafeCall(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL)); //} texobj_ = tex; diff --git a/components/operators/CMakeLists.txt b/components/operators/CMakeLists.txt index 734734256..f177aa591 100644 --- a/components/operators/CMakeLists.txt +++ b/components/operators/CMakeLists.txt @@ -16,6 +16,8 @@ set(OPERSRC src/segmentation.cpp src/mask.cu src/mask.cpp + src/antialiasing.cpp + src/antialiasing.cu ) if (HAVE_OPTFLOW) diff --git a/components/operators/include/ftl/operators/antialiasing.hpp b/components/operators/include/ftl/operators/antialiasing.hpp new file mode 100644 index 000000000..302631253 --- /dev/null +++ b/components/operators/include/ftl/operators/antialiasing.hpp @@ -0,0 +1,27 @@ +#ifndef _FTL_OPERATORS_ANTIALIASING_HPP_ +#define _FTL_OPERATORS_ANTIALIASING_HPP_ + +#include <ftl/operators/operator.hpp> +#include <ftl/cuda_common.hpp> + +namespace ftl { +namespace operators { + +/** + * Fast Approximate Anti-Aliasing by NVIDIA (2010) + */ +class FXAA : public ftl::operators::Operator { + public: + explicit FXAA(ftl::Configurable*); + ~FXAA(); + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + +}; + +} +} + +#endif // _FTL_OPERATORS_ANTIALIASING_HPP_ diff --git a/components/operators/src/antialiasing.cpp b/components/operators/src/antialiasing.cpp new file mode 100644 index 000000000..d3c63ce1b --- /dev/null +++ b/components/operators/src/antialiasing.cpp @@ -0,0 +1,22 @@ +#include <ftl/operators/antialiasing.hpp> +#include "antialiasing_cuda.hpp" + +using ftl::operators::FXAA; +using ftl::codecs::Channel; + +FXAA::FXAA(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +FXAA::~FXAA() { + +} + +bool FXAA::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) { + ftl::cuda::fxaa( + in.getTexture<uchar4>(Channel::Colour), + stream + ); + + return true; +} diff --git a/components/operators/src/antialiasing.cu b/components/operators/src/antialiasing.cu new file mode 100644 index 000000000..f0b64bd89 --- /dev/null +++ b/components/operators/src/antialiasing.cu @@ -0,0 +1,87 @@ +#include "antialiasing_cuda.hpp" + +#define T_PER_BLOCK 8 + +__device__ inline uchar4 toChar(const float4 rgba) { + return make_uchar4(rgba.x*255.0f, rgba.y*255.0f, rgba.z*255.0f, 255); +} + +__global__ void filter_fxaa2(ftl::cuda::TextureObject<uchar4> data) { + + int x = blockIdx.x*blockDim.x + threadIdx.x; + int y = blockIdx.y*blockDim.y + threadIdx.y; + + if(x >= data.width() || y >= data.height()) + { + return; + } + + uchar4 out_color; + cudaTextureObject_t texRef = data.cudaTexture(); + + const float FXAA_SPAN_MAX = 8.0f; + const float FXAA_REDUCE_MUL = 1.0f/8.0f; + const float FXAA_REDUCE_MIN = (1.0f/128.0f); + + float u = x + 0.5f; + float v = y + 0.5f; + + float4 rgbNW = tex2D<float4>( texRef, u-1.0f,v-1.0f); + float4 rgbNE = tex2D<float4>( texRef, u+1.0f,v-1.0f); + float4 rgbSW = tex2D<float4>( texRef, u-1.0f,v+1.0f); + float4 rgbSE = tex2D<float4>( texRef, u+1.0f,v+1.0f); + float4 rgbM = tex2D<float4>( texRef, u,v); + + const float4 luma = make_float4(0.299f, 0.587f, 0.114f,0.0f); + float lumaNW = dot(rgbNW, luma); + float lumaNE = dot(rgbNE, luma); + float lumaSW = dot(rgbSW, luma); + float lumaSE = dot(rgbSE, luma); + float lumaM = dot( rgbM, luma); + + float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE))); + float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE))); + + float2 dir; + dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE)); + dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE)); + + float dirReduce = max((lumaNW + lumaNE + lumaSW + lumaSE) * (0.25f * FXAA_REDUCE_MUL), FXAA_REDUCE_MIN); + + float rcpDirMin = 1.0f/(min(abs(dir.x), abs(dir.y)) + dirReduce); + + + float2 test = dir * rcpDirMin; + dir = clamp(test,-FXAA_SPAN_MAX,FXAA_SPAN_MAX); + + float4 rgbA = (1.0f/2.0f) * ( + tex2D<float4>( texRef,u+ dir.x * (1.0f/3.0f - 0.5f),v+ dir.y * (1.0f/3.0f - 0.5f))+ + tex2D<float4>( texRef,u+ dir.x * (2.0f/3.0f - 0.5f),v+ dir.y * (2.0f/3.0f - 0.5f))); + float4 rgbB = rgbA * (1.0f/2.0f) + (1.0f/4.0f) * ( + tex2D<float4>( texRef,u+ dir.x * (0.0f/3.0f - 0.5f),v+ dir.y * (0.0f/3.0f - 0.5f))+ + tex2D<float4>( texRef,u+ dir.x * (3.0f/3.0f - 0.5f),v+ dir.y * (3.0f/3.0f - 0.5f))); + float lumaB = dot(rgbB, luma); + + if((lumaB < lumaMin) || (lumaB > lumaMax)){ + out_color=toChar(rgbA); + } else { + out_color=toChar(rgbB); + } + + + //surf2Dwrite<uchar4>(out_color, surfaceWrite, x*sizeof(uchar4), y); + + data(x,y) = out_color; +} + +void ftl::cuda::fxaa(ftl::cuda::TextureObject<uchar4> &colour, cudaStream_t stream) { + const dim3 gridSize((colour.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + filter_fxaa2<<<gridSize, blockSize, 0, stream>>>(colour); + cudaSafeCall( cudaGetLastError() ); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif +} diff --git a/components/operators/src/antialiasing_cuda.hpp b/components/operators/src/antialiasing_cuda.hpp new file mode 100644 index 000000000..afe2c5246 --- /dev/null +++ b/components/operators/src/antialiasing_cuda.hpp @@ -0,0 +1,14 @@ +#ifndef _FTL_CUDA_ANTIALIASING_HPP_ +#define _FTL_CUDA_ANTIALIASING_HPP_ + +#include <ftl/cuda_common.hpp> + +namespace ftl { +namespace cuda { + +void fxaa(ftl::cuda::TextureObject<uchar4> &colour, cudaStream_t stream); + +} +} + +#endif diff --git a/components/operators/src/colours.cpp b/components/operators/src/colours.cpp index 474b783c4..9c6fff8b8 100644 --- a/components/operators/src/colours.cpp +++ b/components/operators/src/colours.cpp @@ -24,5 +24,8 @@ bool ColourChannels::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgb cv::cuda::cvtColor(temp_,col, cv::COLOR_BGR2BGRA, 0, cvstream); } + //in.resetTexture(Channel::Colour); + in.createTexture<uchar4>(Channel::Colour, true); + return true; } diff --git a/components/operators/src/mls.cu b/components/operators/src/mls.cu index d7770f134..70d801f2f 100644 --- a/components/operators/src/mls.cu +++ b/components/operators/src/mls.cu @@ -135,7 +135,7 @@ void ftl::cuda::mls_smooth( if (d0 < camera.minDepth || d0 > camera.maxDepth) return; float3 X = camera.screenToCam((int)(x),(int)(y),d0); - uchar4 c0 = colour_in.tex2D(x, y); + float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f); // Neighbourhood for (int v=-SEARCH_RADIUS; v<=SEARCH_RADIUS; ++v) { @@ -149,7 +149,7 @@ void ftl::cuda::mls_smooth( if (Ni.x+Ni.y+Ni.z == 0.0f) continue; - const uchar4 c = colour_in.tex2D(x+u, y+v); + const float4 c = colour_in.tex2D(float(x+u) + 0.5f, float(y+v) + 0.5f); const float cw = ftl::cuda::colourWeighting(c0,c,colour_smoothing); // Gauss approx weighting function using point distance @@ -259,7 +259,7 @@ __device__ inline int segmentID(int u, int v) { } float3 X = camera.screenToCam((int)(x),(int)(y),d0); - uchar4 c0 = colour_in.tex2D(x, y); + float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f); // Neighbourhood uchar4 base = region.tex2D(x,y); @@ -281,7 +281,7 @@ __device__ inline int segmentID(int u, int v) { if (Ni.x+Ni.y+Ni.z == 0.0f) continue; - const uchar4 c = colour_in.tex2D(x+u, y+v); + const float4 c = colour_in.tex2D(float(x+u) + 0.5f, float(y+v) + 0.5f); const float cw = ftl::cuda::colourWeighting(c0,c,colour_smoothing); // Allow missing point to borrow z value diff --git a/components/operators/src/segmentation.cu b/components/operators/src/segmentation.cu index baeda1023..3bfcbc0f5 100644 --- a/components/operators/src/segmentation.cu +++ b/components/operators/src/segmentation.cu @@ -12,6 +12,11 @@ __device__ inline float cross<uchar4>(uchar4 p1, uchar4 p2) { return max(max(__sad(p1.x,p2.x,0),__sad(p1.y,p2.y,0)), __sad(p1.z,p2.z,0)); } +template <> +__device__ inline float cross<float4>(float4 p1, float4 p2) { + return max(max(fabsf(p1.x - p2.x),fabsf(p1.y - p2.y)), fabsf(p1.z - p2.z)); +} + template <> __device__ inline float cross<float>(float p1, float p2) { return fabs(p1-p2); @@ -26,12 +31,12 @@ __device__ uchar4 calculate_support_region(const TextureObject<T> &img, int x, i uchar4 result = make_uchar4(0, 0, 0, 0); - T colour = img.tex2D(x,y); - T prev_colour = colour; + auto colour = img.tex2D((float)x+0.5f,(float)y+0.5f); + auto prev_colour = colour; int u; for (u=x-1; u >= x_min; --u) { - T next_colour = img.tex2D(u,y); + auto next_colour = img.tex2D((float)u+0.5f,(float)y+0.5f); if (cross(prev_colour, next_colour) > tau) { result.x = x - u - 1; break; @@ -42,7 +47,7 @@ __device__ uchar4 calculate_support_region(const TextureObject<T> &img, int x, i prev_colour = colour; for (u=x+1; u <= x_max; ++u) { - T next_colour = img.tex2D(u,y); + auto next_colour = img.tex2D((float)u+0.5f,(float)y+0.5f); if (cross(prev_colour, next_colour) > tau) { result.y = u - x - 1; break; @@ -54,7 +59,7 @@ __device__ uchar4 calculate_support_region(const TextureObject<T> &img, int x, i int v; prev_colour = colour; for (v=y-1; v >= y_min; --v) { - T next_colour = img.tex2D(x,v); + auto next_colour = img.tex2D((float)x+0.5f,(float)v+0.5f); if (cross(prev_colour, next_colour) > tau) { result.z = y - v - 1; break; @@ -65,7 +70,7 @@ __device__ uchar4 calculate_support_region(const TextureObject<T> &img, int x, i prev_colour = colour; for (v=y+1; v <= y_max; ++v) { - T next_colour = img.tex2D(x,v); + auto next_colour = img.tex2D((float)x+0.5f,(float)v+0.5f); if (cross(prev_colour, next_colour) > tau) { result.w = v - y - 1; break; @@ -147,10 +152,10 @@ __global__ void vis_support_region_kernel(TextureObject<uchar4> colour, TextureO for (int u=-baseY.x; u<=baseY.y; ++u) { if (x+u < 0 || y+v < 0 || x+u >= colour.width() || y+v >= colour.height()) continue; - uchar4 col = colour.tex2D(x+u, y+v); + auto col = colour.tex2D(float(x+u)+0.5f, float(y+v)+0.5f); colour(x+u, y+v) = (u==0 || v == 0) ? - make_uchar4(max(bcolour.x, col.x), max(bcolour.y, col.y), max(bcolour.z, col.z), 0) : - make_uchar4(max(acolour.x, col.x), max(acolour.y, col.y), max(acolour.z, col.z), 0); + make_uchar4(max(bcolour.x, (unsigned char)col.x), max(bcolour.y, (unsigned char)col.y), max(bcolour.z, (unsigned char)col.z), 0) : + make_uchar4(max(acolour.x, (unsigned char)col.x), max(acolour.y, (unsigned char)col.y), max(acolour.z, (unsigned char)col.z), 0); } } } @@ -198,7 +203,7 @@ __global__ void vis_bad_region_kernel( uchar4 base = region.tex2D(x,y); uchar4 baseD = dregion.tex2D(x,y); - uchar4 col = colour.tex2D(x,y); + auto col = colour.tex2D((float)x+0.5f,(float)y+0.5f); float d = depth.tex2D(x,y); if (baseD.x > base.x && baseD.y < base.y) { diff --git a/components/renderers/cpp/include/ftl/cuda/weighting.hpp b/components/renderers/cpp/include/ftl/cuda/weighting.hpp index bffff673d..b0c3f58b9 100644 --- a/components/renderers/cpp/include/ftl/cuda/weighting.hpp +++ b/components/renderers/cpp/include/ftl/cuda/weighting.hpp @@ -44,6 +44,20 @@ __device__ inline float colourDistance(uchar4 a, uchar4 b) { return ch*ch*ch*ch; } +/* + * Colour weighting as suggested in: + * C. Kuster et al. Spatio-Temporal Geometry Fusion for Multiple Hybrid Cameras using Moving Least Squares Surfaces. 2014. + * c = colour distance + */ + __device__ inline float colourWeighting(const float4 &a, const float4 &b, float h) { + const float3 delta = make_float3(a.x - b.x, a.y - b.y, a.z - b.z); + const float c = length(delta); + if (c >= h) return 0.0f; + float ch = c / h; + ch = 1.0f - ch*ch; + return ch*ch*ch*ch; +} + } } diff --git a/components/renderers/cpp/src/reprojection.cu b/components/renderers/cpp/src/reprojection.cu index e74b4bb60..f1a9022a5 100644 --- a/components/renderers/cpp/src/reprojection.cu +++ b/components/renderers/cpp/src/reprojection.cu @@ -80,7 +80,7 @@ __global__ void reprojection_kernel( const float3 camPos = poseInv * worldPos; if (camPos.z < camera.minDepth) return; if (camPos.z > camera.maxDepth) return; - const uint2 screenPos = camera.camToScreen<uint2>(camPos); + const float2 screenPos = camera.camToScreen<float2>(camPos); // Not on screen so stop now... if (screenPos.x >= depth_src.width() || screenPos.y >= depth_src.height()) return; @@ -91,8 +91,8 @@ __global__ void reprojection_kernel( ray = ray / length(ray); const float dotproduct = max(dot(ray,n),0.0f); - const float d2 = depth_src.tex2D((int)screenPos.x, (int)screenPos.y); - const A input = in.tex2D((int)screenPos.x, (int)screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos); + const float d2 = depth_src.tex2D(int(screenPos.x+0.5f), int(screenPos.y+0.5f)); + const auto input = in.tex2D(screenPos.x, screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos); // TODO: Z checks need to interpolate between neighbors if large triangles are used float weight = ftl::cuda::weighting(fabs(camPos.z - d2), 0.02f); diff --git a/components/renderers/cpp/src/tri_render.cpp b/components/renderers/cpp/src/tri_render.cpp index 6d60a5f66..72e4f0546 100644 --- a/components/renderers/cpp/src/tri_render.cpp +++ b/components/renderers/cpp/src/tri_render.cpp @@ -411,6 +411,7 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { out.create<GpuMat>(Channel::Depth, Format<float>(camera.width, camera.height)); out.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height)); + out.createTexture<uchar4>(Channel::Colour, true); // Force interpolated colour if (scene_->frames.size() == 0) return false; @@ -504,7 +505,7 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { if (chan == Channel::Normals) { // Convert normal to single float value - temp_.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height)).setTo(cv::Scalar(0,0,0,0), cvstream); + temp_.create<GpuMat>(Channel::Colour, Format<uchar4>(out.get<GpuMat>(Channel::Colour).size())).setTo(cv::Scalar(0,0,0,0), cvstream); ftl::cuda::normal_visualise(scene_->frames[aligned_source].getTexture<float4>(Channel::Normals), temp_.createTexture<uchar4>(Channel::Colour), light_pos_, light_diffuse_, @@ -532,15 +533,19 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { { // Just convert int depth to float depth //temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 100000.0f, cvstream); - } else if (chan == Channel::Normals) { + } else if (chan == Channel::ColourNormals) { // Visualise normals to RGBA - accum_.create<GpuMat>(Channel::Normals, Format<uchar4>(camera.width, camera.height)).setTo(cv::Scalar(0,0,0,0), cvstream); - ftl::cuda::normal_visualise(out.getTexture<float4>(Channel::Normals), accum_.createTexture<uchar4>(Channel::Normals), + out.create<GpuMat>(Channel::ColourNormals, Format<uchar4>(camera.width, camera.height)).setTo(cv::Scalar(0,0,0,0), cvstream); + + ftl::cuda::normal_visualise(out.getTexture<float4>(Channel::Normals), out.createTexture<uchar4>(Channel::ColourNormals), light_pos_, light_diffuse_, light_ambient_, stream_); - accum_.swapTo(Channels(Channel::Normals), out); + //accum_.swapTo(Channels(Channel::Normals), out); + //cv::cuda::swap(accum_.get<GpuMat>(Channel::Normals), out.get<GpuMat>(Channel::Normals)); + //out.resetTexture(Channel::Normals); + //accum_.resetTexture(Channel::Normals); } //else if (chan == Channel::Contribution) //{ diff --git a/components/rgbd-sources/include/ftl/rgbd/frame.hpp b/components/rgbd-sources/include/ftl/rgbd/frame.hpp index 52bbe9022..d7b8292ff 100644 --- a/components/rgbd-sources/include/ftl/rgbd/frame.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/frame.hpp @@ -81,14 +81,14 @@ public: * argument to also create (or recreate) the associated GpuMat. */ template <typename T> - ftl::cuda::TextureObject<T> &createTexture(ftl::codecs::Channel c, const ftl::rgbd::Format<T> &f); + ftl::cuda::TextureObject<T> &createTexture(ftl::codecs::Channel c, const ftl::rgbd::Format<T> &f, bool interpolated=false); /** * Create a CUDA texture object for a channel. With this version the GpuMat * must already exist and be of the correct type. */ template <typename T> - ftl::cuda::TextureObject<T> &createTexture(ftl::codecs::Channel c); + ftl::cuda::TextureObject<T> &createTexture(ftl::codecs::Channel c, bool interpolated=false); void resetTexture(ftl::codecs::Channel c); @@ -200,7 +200,7 @@ ftl::cuda::TextureObject<T> &Frame::getTexture(ftl::codecs::Channel c) { } template <typename T> -ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c, const ftl::rgbd::Format<T> &f) { +ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c, const ftl::rgbd::Format<T> &f, bool interpolated) { if (!channels_.has(c)) channels_ += c; if (!gpu_.has(c)) gpu_ += c; @@ -221,18 +221,18 @@ ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c, const if (m.tex.devicePtr() == nullptr) { //LOG(INFO) << "Creating texture object"; - m.tex = ftl::cuda::TextureObject<T>(m.gpu); + m.tex = ftl::cuda::TextureObject<T>(m.gpu, interpolated); } else if (m.tex.cvType() != ftl::traits::OpenCVType<T>::value || m.tex.width() != m.gpu.cols || m.tex.height() != m.gpu.rows) { LOG(INFO) << "Recreating texture object for '" << ftl::codecs::name(c) << "'"; m.tex.free(); - m.tex = ftl::cuda::TextureObject<T>(m.gpu); + m.tex = ftl::cuda::TextureObject<T>(m.gpu, interpolated); } return ftl::cuda::TextureObject<T>::cast(m.tex); } template <typename T> -ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c) { +ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c, bool interpolated) { if (!channels_.has(c)) throw ftl::exception("createTexture needs a format if the channel does not exist"); auto &m = _get(c); @@ -254,11 +254,11 @@ ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c) { if (m.tex.devicePtr() == nullptr) { //LOG(INFO) << "Creating texture object"; - m.tex = ftl::cuda::TextureObject<T>(m.gpu); + m.tex = ftl::cuda::TextureObject<T>(m.gpu, interpolated); } else if (m.tex.cvType() != ftl::traits::OpenCVType<T>::value || m.tex.width() != m.gpu.cols || m.tex.height() != m.gpu.rows || m.tex.devicePtr() != m.gpu.data) { LOG(INFO) << "Recreating texture object for '" << ftl::codecs::name(c) << "'."; m.tex.free(); - m.tex = ftl::cuda::TextureObject<T>(m.gpu); + m.tex = ftl::cuda::TextureObject<T>(m.gpu, interpolated); } return ftl::cuda::TextureObject<T>::cast(m.tex); -- GitLab