diff --git a/components/renderers/cpp/include/ftl/cuda/warp.hpp b/components/renderers/cpp/include/ftl/cuda/warp.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9164b0eeeb8b3ef606aef4930f55b38a1afacdc4 --- /dev/null +++ b/components/renderers/cpp/include/ftl/cuda/warp.hpp @@ -0,0 +1,48 @@ +#ifndef _FTL_CUDA_WARP_HPP_ +#define _FTL_CUDA_WARP_HPP_ + +#ifndef WARP_SIZE +#define WARP_SIZE 32 +#endif + +#define FULL_MASK 0xffffffff + +namespace ftl { +namespace cuda { + +__device__ inline float warpMin(float e) { + for (int i = WARP_SIZE/2; i > 0; i /= 2) { + const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e = min(e, other); + } + return e; +} + +__device__ inline float warpMax(float e) { + for (int i = WARP_SIZE/2; i > 0; i /= 2) { + const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e = max(e, other); + } + return e; +} + +__device__ inline float warpSum(float e) { + for (int i = WARP_SIZE/2; i > 0; i /= 2) { + const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e += other; + } + return e; +} + +__device__ inline int warpSum(int e) { + for (int i = WARP_SIZE/2; i > 0; i /= 2) { + const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e += other; + } + return e; +} + +} +} + +#endif // _FTL_CUDA_WARP_HPP_ diff --git a/components/renderers/cpp/include/ftl/render/splat_render.hpp b/components/renderers/cpp/include/ftl/render/splat_render.hpp index 40bfa459b35e850d2147f74e33eabfaaddb3a261..5f8a8ba4940e910efd1e9b6f5a0f0e900fde1ff7 100644 --- a/components/renderers/cpp/include/ftl/render/splat_render.hpp +++ b/components/renderers/cpp/include/ftl/render/splat_render.hpp @@ -45,6 +45,8 @@ class Splatter : public ftl::render::Renderer { bool clipping_; float norm_filter_; bool backcull_; + cv::Scalar background_; + bool splat_; }; } diff --git a/components/renderers/cpp/src/normals.cu b/components/renderers/cpp/src/normals.cu index ad6c34cf4cadcda6f46fa677f48502099ef1a013..eec540ccd1ef022b91d6fb003b3cfa1d62a0a0a6 100644 --- a/components/renderers/cpp/src/normals.cu +++ b/components/renderers/cpp/src/normals.cu @@ -20,7 +20,7 @@ __global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output, const float3 MC = make_float3(input.tex2D((int)x+0, (int)y-1)); //[(y-1)*width+(x+0)]; const float3 CM = make_float3(input.tex2D((int)x-1, (int)y+0)); //[(y+0)*width+(x-1)]; - if(CC.x != MINF) { // && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) { + if(CC.x != MINF && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) { const float3 n = cross(PC-MC, CP-CM); const float l = length(n); diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp index e66c11b651a6def941ed82ee767d76285ff86e34..26f8e893ee3846a06adadfa6d0fd42393d1e4c83 100644 --- a/components/renderers/cpp/src/splat_render.cpp +++ b/components/renderers/cpp/src/splat_render.cpp @@ -6,11 +6,14 @@ #include <opencv2/core/cuda_stream_accessor.hpp> +#include <string> + using ftl::render::Splatter; using ftl::rgbd::Channel; using ftl::rgbd::Channels; using ftl::rgbd::Format; using cv::cuda::GpuMat; +using std::stoul; static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) { Eigen::Affine3d rx = @@ -22,6 +25,22 @@ static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) { return rz * rx * ry; } +static cv::Scalar parseColour(const std::string &colour) { + std::string c = colour; + if (c[0] == '#') { + c.erase(0, 1); + unsigned long value = stoul(c.c_str(), nullptr, 16); + return cv::Scalar( + (value >> 0) & 0xff, + (value >> 8) & 0xff, + (value >> 16) & 0xff, + (value >> 24) & 0xff + ); + } + + return cv::Scalar(0,0,0,0); +} + Splatter::Splatter(nlohmann::json &config, ftl::rgbd::FrameSet *fs) : ftl::render::Renderer(config), scene_(fs) { if (config["clipping"].is_object()) { auto &c = config["clipping"]; @@ -59,6 +78,16 @@ Splatter::Splatter(nlohmann::json &config, ftl::rgbd::FrameSet *fs) : ftl::rende on("back_cull", [this](const ftl::config::Event &e) { backcull_ = value("back_cull", true); }); + + splat_ = value("splatting", true); + on("splatting", [this](const ftl::config::Event &e) { + splat_ = value("splatting", true); + }); + + background_ = parseColour(value("background", std::string("#e0e0e0"))); + on("background", [this](const ftl::config::Event &e) { + background_ = parseColour(value("background", std::string("#e0e0e0"))); + }); } Splatter::~Splatter() { @@ -73,12 +102,12 @@ void Splatter::renderChannel( cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); temp_.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(0x7FFFFFFF), cvstream); temp_.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0x7FFFFFFF), cvstream); - temp_.get<GpuMat>(Channel::Colour).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream); - temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0.0f), cvstream); + //temp_.get<GpuMat>(Channel::Colour).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream); + //temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0.0f), cvstream); if (scene_->frames.size() < 1) return; - bool is_float = scene_->frames[0].get<GpuMat>(channel).type() == CV_32F; //ftl::rgbd::isFloatChannel(channel); - bool is_4chan = scene_->frames[0].get<GpuMat>(channel).type() == CV_32FC4; + bool is_float = out.get<GpuMat>(channel).type() == CV_32F; //ftl::rgbd::isFloatChannel(channel); + bool is_4chan = out.get<GpuMat>(channel).type() == CV_32FC4; // Render each camera into virtual view // TODO: Move out of renderChannel, this is a common step to all channels @@ -94,19 +123,43 @@ void Splatter::renderChannel( ftl::cuda::dibr_merge( f.createTexture<float4>(Channel::Points), f.createTexture<float4>(Channel::Normals), - temp_.getTexture<int>(Channel::Depth), + temp_.createTexture<int>(Channel::Depth2), params, backcull_, stream ); //LOG(INFO) << "DIBR DONE"; } - // TODO: Add the depth splatting step.. + //temp_.createTexture<float4>(Channel::Colour); + //temp_.createTexture<float>(Channel::Contribution); + out.create<GpuMat>(Channel::Normals, Format<float4>(params.camera.width, params.camera.height)); - temp_.createTexture<float4>(Channel::Colour); - temp_.createTexture<float>(Channel::Contribution); + // Create normals first + for (auto &f : scene_->frames) { + ftl::cuda::dibr_attribute( + f.createTexture<float4>(Channel::Normals), + f.createTexture<float4>(Channel::Points), + temp_.getTexture<int>(Channel::Depth2), + out.createTexture<float4>(Channel::Normals), + params, stream + ); + } + + //temp_.get<GpuMat>(Channel::Colour).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream); + //temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0.0f), cvstream); + + if (is_4chan) { + temp_.create<GpuMat>(Channel::Colour2, Format<float4>(params.camera.width, params.camera.height)); + temp_.get<GpuMat>(Channel::Colour2).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream); + } else if (is_float) { + temp_.create<GpuMat>(Channel::Colour2, Format<float>(params.camera.width, params.camera.height)); + temp_.get<GpuMat>(Channel::Colour2).setTo(cv::Scalar(0.0f), cvstream); + } else { + temp_.create<GpuMat>(Channel::Colour2, Format<uchar4>(params.camera.width, params.camera.height)); + temp_.get<GpuMat>(Channel::Colour2).setTo(cv::Scalar(0,0,0,0), cvstream); + } - // Accumulate attribute contributions for each pixel + // Create attribute first for (auto &f : scene_->frames) { // Convert colour from BGR to BGRA if needed if (f.get<GpuMat>(channel).type() == CV_8UC3) { @@ -116,70 +169,66 @@ void Splatter::renderChannel( cv::cuda::swap(col, tmp); cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA); } - - if (is_float) { - ftl::cuda::dibr_attribute( - f.createTexture<float>(channel), - f.createTexture<float4>(Channel::Points), - temp_.getTexture<int>(Channel::Depth), - temp_.getTexture<float4>(Channel::Colour), - temp_.getTexture<float>(Channel::Contribution), - params, stream - ); - } else if (is_4chan) { + + if (is_4chan) { ftl::cuda::dibr_attribute( f.createTexture<float4>(channel), f.createTexture<float4>(Channel::Points), - temp_.getTexture<int>(Channel::Depth), - temp_.getTexture<float4>(Channel::Colour), - temp_.getTexture<float>(Channel::Contribution), + temp_.getTexture<int>(Channel::Depth2), + (splat_) ? temp_.createTexture<float4>(Channel::Colour2) : out.createTexture<float4>(channel), params, stream ); - } else if (channel == Channel::Colour || channel == Channel::Right) { + } else if (is_float) { ftl::cuda::dibr_attribute( - f.createTexture<uchar4>(Channel::Colour), + f.createTexture<float>(channel), f.createTexture<float4>(Channel::Points), - temp_.getTexture<int>(Channel::Depth), - temp_.getTexture<float4>(Channel::Colour), - temp_.getTexture<float>(Channel::Contribution), + temp_.getTexture<int>(Channel::Depth2), + (splat_) ? temp_.createTexture<float>(Channel::Colour2) : out.createTexture<float>(channel), params, stream ); } else { ftl::cuda::dibr_attribute( f.createTexture<uchar4>(channel), f.createTexture<float4>(Channel::Points), - temp_.getTexture<int>(Channel::Depth), - temp_.getTexture<float4>(Channel::Colour), - temp_.getTexture<float>(Channel::Contribution), + temp_.getTexture<int>(Channel::Depth2), + (splat_) ? temp_.createTexture<uchar4>(Channel::Colour2) : out.createTexture<uchar4>(channel), params, stream ); } } - if (is_4chan) { - // Normalise attribute contributions - ftl::cuda::dibr_normalise( - temp_.createTexture<float4>(Channel::Colour), - out.createTexture<float4>(channel), - temp_.createTexture<float>(Channel::Contribution), - stream - ); - } else if (is_float) { - // Normalise attribute contributions - ftl::cuda::dibr_normalise( - temp_.createTexture<float4>(Channel::Colour), - out.createTexture<float>(channel), - temp_.createTexture<float>(Channel::Contribution), - stream - ); - } else { - // Normalise attribute contributions - ftl::cuda::dibr_normalise( - temp_.createTexture<float4>(Channel::Colour), - out.createTexture<uchar4>(channel), - temp_.createTexture<float>(Channel::Contribution), - stream - ); + //out.get<GpuMat>(Channel::Left).setTo(cv::Scalar(0,0,0,0), cvstream); + + // Now splat the points + if (splat_) { + if (is_4chan) { + ftl::cuda::splat( + out.getTexture<float4>(Channel::Normals), + temp_.getTexture<float4>(Channel::Colour2), + temp_.getTexture<int>(Channel::Depth2), + out.createTexture<float>(Channel::Depth), + out.createTexture<float4>(channel), + params, stream + ); + } else if (is_float) { + ftl::cuda::splat( + out.getTexture<float4>(Channel::Normals), + temp_.getTexture<float>(Channel::Colour2), + temp_.getTexture<int>(Channel::Depth2), + out.createTexture<float>(Channel::Depth), + out.createTexture<float>(channel), + params, stream + ); + } else { + ftl::cuda::splat( + out.getTexture<float4>(Channel::Normals), + temp_.getTexture<uchar4>(Channel::Colour2), + temp_.getTexture<int>(Channel::Depth2), + out.createTexture<float>(Channel::Depth), + out.createTexture<uchar4>(channel), + params, stream + ); + } } } @@ -196,38 +245,14 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda out.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height)); // FIXME: Use source resolutions, not virtual resolution - temp_.create<GpuMat>(Channel::Colour, Format<float4>(camera.width, camera.height)); - temp_.create<GpuMat>(Channel::Colour2, Format<uchar4>(camera.width, camera.height)); - temp_.create<GpuMat>(Channel::Contribution, Format<float>(camera.width, camera.height)); + //temp_.create<GpuMat>(Channel::Colour, Format<float4>(camera.width, camera.height)); + //temp_.create<GpuMat>(Channel::Contribution, Format<float>(camera.width, camera.height)); temp_.create<GpuMat>(Channel::Depth, Format<int>(camera.width, camera.height)); temp_.create<GpuMat>(Channel::Depth2, Format<int>(camera.width, camera.height)); temp_.create<GpuMat>(Channel::Normals, Format<float4>(camera.width, camera.height)); cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); - // Create buffers if they don't exist - /*if ((unsigned int)depth1_.width() != camera.width || (unsigned int)depth1_.height() != camera.height) { - depth1_ = ftl::cuda::TextureObject<int>(camera.width, camera.height); - } - if ((unsigned int)depth3_.width() != camera.width || (unsigned int)depth3_.height() != camera.height) { - depth3_ = ftl::cuda::TextureObject<int>(camera.width, camera.height); - } - if ((unsigned int)colour1_.width() != camera.width || (unsigned int)colour1_.height() != camera.height) { - colour1_ = ftl::cuda::TextureObject<uchar4>(camera.width, camera.height); - } - if ((unsigned int)colour_tmp_.width() != camera.width || (unsigned int)colour_tmp_.height() != camera.height) { - colour_tmp_ = ftl::cuda::TextureObject<float4>(camera.width, camera.height); - } - if ((unsigned int)normal1_.width() != camera.width || (unsigned int)normal1_.height() != camera.height) { - normal1_ = ftl::cuda::TextureObject<float4>(camera.width, camera.height); - } - if ((unsigned int)depth2_.width() != camera.width || (unsigned int)depth2_.height() != camera.height) { - depth2_ = ftl::cuda::TextureObject<float>(camera.width, camera.height); - } - if ((unsigned int)colour2_.width() != camera.width || (unsigned int)colour2_.height() != camera.height) { - colour2_ = ftl::cuda::TextureObject<uchar4>(camera.width, camera.height); - }*/ - // Parameters object to pass to CUDA describing the camera SplatParams params; params.m_flags = 0; @@ -242,7 +267,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda // Clear all channels to 0 or max depth out.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(1000.0f), cvstream); - out.get<GpuMat>(Channel::Colour).setTo(cv::Scalar(76,76,76), cvstream); + out.get<GpuMat>(Channel::Colour).setTo(background_, cvstream); //LOG(INFO) << "Render ready: " << camera.width << "," << camera.height; @@ -289,7 +314,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda Channel chan = src->getChannel(); if (chan == Channel::Depth) { - temp_.get<GpuMat>(Channel::Depth).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream); + //temp_.get<GpuMat>(Channel::Depth).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream); } else if (chan == Channel::Normals) { out.create<GpuMat>(Channel::Normals, Format<float4>(camera.width, camera.height)); @@ -297,7 +322,8 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda renderChannel(params, out, Channel::Normals, stream); // Convert normal to single float value - ftl::cuda::normal_visualise(out.getTexture<float4>(Channel::Normals), temp_.getTexture<float>(Channel::Contribution), camera, params.m_viewMatrixInverse, stream); + temp_.create<GpuMat>(Channel::Contribution, Format<float>(camera.width, camera.height)); + ftl::cuda::normal_visualise(out.getTexture<float4>(Channel::Normals), temp_.createTexture<float>(Channel::Contribution), camera, params.m_viewMatrixInverse, stream); // Put in output as single float cv::cuda::swap(temp_.get<GpuMat>(Channel::Contribution), out.create<GpuMat>(Channel::Normals)); @@ -315,7 +341,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda params.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix); out.create<GpuMat>(Channel::Right, Format<uchar4>(camera.width, camera.height)); - out.get<GpuMat>(Channel::Right).setTo(cv::Scalar(76,76,76), cvstream); + out.get<GpuMat>(Channel::Right).setTo(background_, cvstream); renderChannel(params, out, Channel::Right, stream); } else if (chan != Channel::None) { if (ftl::rgbd::isFloatChannel(chan)) { @@ -323,7 +349,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda out.get<GpuMat>(chan).setTo(cv::Scalar(0.0f), cvstream); } else { out.create<GpuMat>(chan, Format<uchar4>(camera.width, camera.height)); - out.get<GpuMat>(chan).setTo(cv::Scalar(76,76,76,255), cvstream); + out.get<GpuMat>(chan).setTo(background_, cvstream); } renderChannel(params, out, chan, stream); } diff --git a/components/renderers/cpp/src/splatter.cu b/components/renderers/cpp/src/splatter.cu index c91673756663d1e2c4aefea5e785ff52d25a487d..a4f5d97ff91b62561cfa4fda990c44fc7790c72f 100644 --- a/components/renderers/cpp/src/splatter.cu +++ b/components/renderers/cpp/src/splatter.cu @@ -4,6 +4,8 @@ #include <ftl/cuda_common.hpp> #include <ftl/cuda/weighting.hpp> +#include <ftl/cuda/intersections.hpp> +#include <ftl/cuda/warp.hpp> #define T_PER_BLOCK 8 #define UPSAMPLE_FACTOR 1.8f @@ -13,8 +15,17 @@ #define MAX_ITERATIONS 32 // Note: Must be multiple of 32 #define SPATIAL_SMOOTHING 0.005f +#define ENERGY_THRESHOLD 0.1f +#define SMOOTHING_MULTIPLIER_A 10.0f // For surface search +#define SMOOTHING_MULTIPLIER_B 4.0f // For z contribution +#define SMOOTHING_MULTIPLIER_C 2.0f // For colour contribution + +#define ACCUM_DIAMETER 8 + using ftl::cuda::TextureObject; using ftl::render::SplatParams; +using ftl::cuda::warpMin; +using ftl::cuda::warpSum; /* * Pass 1: Directly render each camera into virtual view but with no upsampling @@ -76,163 +87,59 @@ __device__ inline float4 make_float4(const uchar4 &c) { return make_float4(c.x,c.y,c.z,c.w); } - -#define ENERGY_THRESHOLD 0.1f -#define SMOOTHING_MULTIPLIER_A 10.0f // For surface search -#define SMOOTHING_MULTIPLIER_B 4.0f // For z contribution -#define SMOOTHING_MULTIPLIER_C 1.0f // For colour contribution - -#define ACCUM_DIAMETER 8 - -/* - * Pass 2: Accumulate attribute contributions if the points pass a visibility test. - */ -__global__ void dibr_attribute_contrib_kernel( - TextureObject<uchar4> colour_in, // Original colour image - TextureObject<float4> points, // Original 3D points - TextureObject<int> depth_in, // Virtual depth map - TextureObject<float4> colour_out, // Accumulated output - //TextureObject<float4> normal_out, - TextureObject<float> contrib_out, - SplatParams params) { - - //const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - - const int tid = (threadIdx.x + threadIdx.y * blockDim.x); - //const int warp = tid / WARP_SIZE; - const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; - const int y = blockIdx.y*blockDim.y + threadIdx.y; - - const float4 worldPos = points.tex2D(x, y); - //const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y)); - if (worldPos.x == MINF) return; - //const float r = (camera.poseInverse * worldPos).z / camera.params.fx; - - const float3 camPos = params.m_viewMatrix * make_float3(worldPos); - if (camPos.z < params.camera.minDepth) return; - if (camPos.z > params.camera.maxDepth) return; - const uint2 screenPos = params.camera.camToScreen<uint2>(camPos); - - //const int upsample = 8; //min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z)); - - // Not on screen so stop now... - if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return; - - // Is this point near the actual surface and therefore a contributor? - const float d = ((float)depth_in.tex2D((int)screenPos.x, (int)screenPos.y)/1000.0f); - //if (abs(d - camPos.z) > DEPTH_THRESHOLD) return; - - const float4 colour = (params.m_flags & ftl::render::kShowDisconMask && worldPos.w < 0.0f) ? - make_float4(0.0f,0.0f,255.0f,255.0f) : // Show discontinuity mask in red - make_float4(colour_in.tex2D(x, y)); - //const float4 normal = tex2D<float4>(camera.normal, x, y); - - // Each thread in warp takes an upsample point and updates corresponding depth buffer. - const int lane = tid % WARP_SIZE; - for (int i=lane; i<ACCUM_DIAMETER*ACCUM_DIAMETER; i+=WARP_SIZE) { - const float u = (i % ACCUM_DIAMETER) - (ACCUM_DIAMETER / 2); - const float v = (i / ACCUM_DIAMETER) - (ACCUM_DIAMETER / 2); - - // Use the depth buffer to determine this pixels 3D position in camera space - const float d = ((float)depth_in.tex2D(screenPos.x+u, screenPos.y+v)/1000.0f); - const float3 nearest = params.camera.screenToCam((int)(screenPos.x+u),(int)(screenPos.y+v),d); - - // What is contribution of our current point at this pixel? - const float weight = ftl::cuda::spatialWeighting(nearest, camPos, SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx)); - if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) { // TODO: Use confidence threshold here - const float4 wcolour = colour * weight; - //const float4 wnormal = normal * weight; - - //printf("Z %f\n", d); - - // Add this points contribution to the pixel buffer - atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v), wcolour.x); - atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+1, wcolour.y); - atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+2, wcolour.z); - atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+3, wcolour.w); - //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v), wnormal.x); - //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+1, wnormal.y); - //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+2, wnormal.z); - //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+3, wnormal.w); - atomicAdd(&contrib_out(screenPos.x+u, screenPos.y+v), weight); - } - } +__device__ inline float4 make_float4(const float4 &v) { + return v; } -__global__ void dibr_attribute_contrib_kernel( - TextureObject<float> colour_in, // Original colour image - TextureObject<float4> points, // Original 3D points - TextureObject<int> depth_in, // Virtual depth map - TextureObject<float4> colour_out, // Accumulated output - //TextureObject<float4> normal_out, - TextureObject<float> contrib_out, - SplatParams params) { - - //const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - - const int tid = (threadIdx.x + threadIdx.y * blockDim.x); - //const int warp = tid / WARP_SIZE; - const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; - const int y = blockIdx.y*blockDim.y + threadIdx.y; +template <typename T> +__device__ inline T make(); - const float3 worldPos = make_float3(points.tex2D(x, y)); - //const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y)); - if (worldPos.x == MINF) return; - //const float r = (camera.poseInverse * worldPos).z / camera.params.fx; - - const float3 camPos = params.m_viewMatrix * worldPos; - if (camPos.z < params.camera.minDepth) return; - if (camPos.z > params.camera.maxDepth) return; - const uint2 screenPos = params.camera.camToScreen<uint2>(camPos); +template <> +__device__ inline uchar4 make() { + return make_uchar4(0,0,0,0); +} - const int upsample = 8; //min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z)); +template <> +__device__ inline float4 make() { + return make_float4(0.0f,0.0f,0.0f,0.0f); +} - // Not on screen so stop now... - if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return; - - // Is this point near the actual surface and therefore a contributor? - const float d = ((float)depth_in.tex2D((int)screenPos.x, (int)screenPos.y)/1000.0f); - //if (abs(d - camPos.z) > DEPTH_THRESHOLD) return; +template <> +__device__ inline float make() { + return 0.0f; +} - // TODO:(Nick) Should just one thread load these to shared mem? - const float colour = (colour_in.tex2D(x, y)); - //const float4 normal = tex2D<float4>(camera.normal, x, y); +template <typename T> +__device__ inline T make(const float4 &); - // Each thread in warp takes an upsample point and updates corresponding depth buffer. - const int lane = tid % WARP_SIZE; - for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { - const float u = (i % upsample) - (upsample / 2); - const float v = (i / upsample) - (upsample / 2); +template <> +__device__ inline uchar4 make(const float4 &v) { + return make_uchar4((int)v.x, (int)v.y, (int)v.z, (int)v.w); +} - // Use the depth buffer to determine this pixels 3D position in camera space - const float d = ((float)depth_in.tex2D(screenPos.x+u, screenPos.y+v)/1000.0f); - const float3 nearest = params.camera.screenToCam((int)(screenPos.x+u),(int)(screenPos.y+v),d); - - // What is contribution of our current point at this pixel? - const float weight = ftl::cuda::spatialWeighting(nearest, camPos, SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx)); - if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) { // TODO: Use confidence threshold here - const float wcolour = colour * weight; - //const float4 wnormal = normal * weight; - - //printf("Z %f\n", d); +template <> +__device__ inline float4 make(const float4 &v) { + return v; +} - // Add this points contribution to the pixel buffer - atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v), wcolour); - atomicAdd(&contrib_out(screenPos.x+u, screenPos.y+v), weight); - } - } +template <> +__device__ inline float make(const float4 &v) { + return v.x; } /* - * Pass 2: Accumulate attribute contributions if the points pass a visibility test. + * Pass 1b: Expand splats to full size and merge */ - __global__ void dibr_attribute_contrib_kernel( - TextureObject<float4> colour_in, // Original colour image - TextureObject<float4> points, // Original 3D points + template <int SEARCH_DIAMETER, typename T> + __global__ void splat_kernel( + //TextureObject<float4> points, // Original 3D points + TextureObject<float4> normals, + TextureObject<T> in, TextureObject<int> depth_in, // Virtual depth map - TextureObject<float4> colour_out, // Accumulated output - //TextureObject<float4> normal_out, - TextureObject<float> contrib_out, + TextureObject<float> depth_out, // Accumulated output + TextureObject<T> out, + //ftl::rgbd::Camera camera, + //float4x4 pose_inv, SplatParams params) { //const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; @@ -242,130 +149,247 @@ __global__ void dibr_attribute_contrib_kernel( const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; const int y = blockIdx.y*blockDim.y + threadIdx.y; - const float3 worldPos = make_float3(points.tex2D(x, y)); - //const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y)); - if (worldPos.x == MINF) return; - //const float r = (camera.poseInverse * worldPos).z / camera.params.fx; + if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return; - const float3 camPos = params.m_viewMatrix * worldPos; - if (camPos.z < params.camera.minDepth) return; - if (camPos.z > params.camera.maxDepth) return; - const uint2 screenPos = params.camera.camToScreen<uint2>(camPos); + const float3 origin = params.m_viewMatrixInverse * make_float3(0.0f); + float3 ray = params.camera.screenToCam(x,y,1.0f); + ray = ray / length(ray); + const float scale = ray.z; + ray = params.m_viewMatrixInverse.getFloat3x3() * ray; - //const int upsample = 8; //min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z)); + //float depth = 0.0f; + //float contrib = 0.0f; + float depth = 1000.0f; - // Not on screen so stop now... - if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return; - - // Is this point near the actual surface and therefore a contributor? - const float d = ((float)depth_in.tex2D((int)screenPos.x, (int)screenPos.y)/1000.0f); - //if (abs(d - camPos.z) > DEPTH_THRESHOLD) return; + struct Result { + float weight; + float depth; + T attr; + }; - // TODO:(Nick) Should just one thread load these to shared mem? - const float4 colour = (colour_in.tex2D(x, y)); - //const float4 normal = tex2D<float4>(camera.normal, x, y); + Result results[(SEARCH_DIAMETER*SEARCH_DIAMETER) / WARP_SIZE]; // Each thread in warp takes an upsample point and updates corresponding depth buffer. const int lane = tid % WARP_SIZE; - for (int i=lane; i<ACCUM_DIAMETER*ACCUM_DIAMETER; i+=WARP_SIZE) { - const float u = (i % ACCUM_DIAMETER) - (ACCUM_DIAMETER / 2); - const float v = (i / ACCUM_DIAMETER) - (ACCUM_DIAMETER / 2); + for (int i=lane; i<SEARCH_DIAMETER*SEARCH_DIAMETER; i+=WARP_SIZE) { + const float u = (i % SEARCH_DIAMETER) - (SEARCH_DIAMETER / 2); + const float v = (i / SEARCH_DIAMETER) - (SEARCH_DIAMETER / 2); + + results[i/WARP_SIZE] = {0.0f, 0.0f, make<T>()}; // Use the depth buffer to determine this pixels 3D position in camera space - const float d = ((float)depth_in.tex2D(screenPos.x+u, screenPos.y+v)/1000.0f); - const float3 nearest = params.camera.screenToCam((int)(screenPos.x+u),(int)(screenPos.y+v),d); - - // What is contribution of our current point at this pixel? - const float weight = ftl::cuda::spatialWeighting(nearest, camPos, SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx)); - if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) { // TODO: Use confidence threshold here - const float4 wcolour = colour * weight; - //const float4 wnormal = normal * weight; - - //printf("Z %f\n", d); - - // Add this points contribution to the pixel buffer - atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v), wcolour.x); - atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+1, wcolour.y); - atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+2, wcolour.z); - atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+3, wcolour.w); - //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v), wnormal.x); - //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+1, wnormal.y); - //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+2, wnormal.z); - //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+3, wnormal.w); - atomicAdd(&contrib_out(screenPos.x+u, screenPos.y+v), weight); + const float d = ((float)depth_in.tex2D(x+u, y+v)/1000.0f); + + if (d < params.camera.minDepth || d > params.camera.maxDepth) continue; + + const float3 camPos = params.camera.screenToCam((int)(x+u),(int)(y+v),d); + const float3 camPos2 = params.camera.screenToCam((int)(x),(int)(y),d); + //if (length(camPos - camPos2) > 2.0f*(camPos.z/params.camera.fx)) continue; + const float3 worldPos = params.m_viewMatrixInverse * camPos; + //const float3 camPos2 = pose_inv * worldPos; + //const uint2 screenPos = camera.camToScreen<uint2>(camPos2); + + //if (screenPos.x < points.width() && screenPos.y < points.height()) { + // Can now read points, normals and colours from source cam + + // What is contribution of our current point at this pixel? + //const float3 p = make_float3(points.tex2D((int)screenPos.x, (int)screenPos.y)); + //const float weight = ftl::cuda::spatialWeighting(worldPos, p, (camPos.z/params.camera.fx)); //*(camPos2.z/camera.fx)); + //if (weight <= 0.0f) continue; + + float3 n = make_float3(normals.tex2D((int)(x+u), (int)(y+v))); + const float l = length(n); + if (l == 0.0f) continue; + n /= l; + + // Does the ray intersect plane of splat? + float t = 1000.0f; + if (ftl::cuda::intersectPlane(n, worldPos, origin, ray, t)) { //} && fabs(t-camPos.z) < 0.01f) { + //t *= (params.m_viewMatrix.getFloat3x3() * ray).z; + t *= scale; + const float3 camPos3 = params.camera.screenToCam((int)(x),(int)(y),t); + const float weight = ftl::cuda::spatialWeighting(camPos, camPos3, 2.0f*(camPos3.z/params.camera.fx)); //*(camPos2.z/camera.fx)); + if (weight == 0.0f) continue; + //depth += t * weight; + //contrib += weight; + depth = min(depth, t); + results[i/WARP_SIZE] = {weight, t, in.tex2D((int)x+u, (int)y+v)}; //make_float2(t, weight); + //atomicMin(&depth_out(x,y), (int)(depth * 1000.0f)); + } + //} + } + + depth = warpMin(depth); + + float adepth = 0.0f; + float contrib = 0.0f; + float4 attr = make_float4(0.0f); + + // Loop over results array + for (int i=0; i<(SEARCH_DIAMETER*SEARCH_DIAMETER) / WARP_SIZE; ++i) { + if (results[i].depth - depth < 0.04f) { + adepth += results[i].depth * results[i].weight; + attr += make_float4(results[i].attr) * results[i].weight; + contrib += results[i].weight; } } + + // Sum all attributes and contributions + adepth = warpSum(adepth); + attr.x = warpSum(attr.x); + attr.y = warpSum(attr.y); + attr.z = warpSum(attr.z); + contrib = warpSum(contrib); + + if (lane == 0 && contrib > 0.0f) { + depth_out(x,y) = adepth / contrib; + out(x,y) = make<T>(attr / contrib); + } } -void ftl::cuda::dibr_attribute( - TextureObject<uchar4> &colour_in, // Original colour image - TextureObject<float4> &points, // Original 3D points +template <typename T> +void ftl::cuda::splat( + TextureObject<float4> &normals, + TextureObject<T> &colour_in, TextureObject<int> &depth_in, // Virtual depth map - TextureObject<float4> &colour_out, // Accumulated output - //TextureObject<float4> normal_out, - TextureObject<float> &contrib_out, - SplatParams ¶ms, cudaStream_t stream) { + TextureObject<float> &depth_out, + TextureObject<T> &colour_out, + const SplatParams ¶ms, cudaStream_t stream) { const dim3 gridSize((depth_in.width() + 2 - 1)/2, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK); + const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK); - dibr_attribute_contrib_kernel<<<gridSize, blockSize, 0, stream>>>( + splat_kernel<8,T><<<gridSize, blockSize, 0, stream>>>( + normals, colour_in, - points, depth_in, + depth_out, colour_out, - contrib_out, params ); cudaSafeCall( cudaGetLastError() ); } -void ftl::cuda::dibr_attribute( - TextureObject<float> &colour_in, // Original colour image - TextureObject<float4> &points, // Original 3D points +template void ftl::cuda::splat<uchar4>( + TextureObject<float4> &normals, + TextureObject<uchar4> &colour_in, TextureObject<int> &depth_in, // Virtual depth map - TextureObject<float4> &colour_out, // Accumulated output - //TextureObject<float4> normal_out, - TextureObject<float> &contrib_out, - SplatParams ¶ms, cudaStream_t stream) { - const dim3 gridSize((depth_in.width() + 2 - 1)/2, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK); + TextureObject<float> &depth_out, + TextureObject<uchar4> &colour_out, + const SplatParams ¶ms, cudaStream_t stream); + +template void ftl::cuda::splat<float4>( + TextureObject<float4> &normals, + TextureObject<float4> &colour_in, + TextureObject<int> &depth_in, // Virtual depth map + TextureObject<float> &depth_out, + TextureObject<float4> &colour_out, + const SplatParams ¶ms, cudaStream_t stream); + +template void ftl::cuda::splat<float>( + TextureObject<float4> &normals, + TextureObject<float> &colour_in, + TextureObject<int> &depth_in, // Virtual depth map + TextureObject<float> &depth_out, + TextureObject<float> &colour_out, + const SplatParams ¶ms, cudaStream_t stream); - dibr_attribute_contrib_kernel<<<gridSize, blockSize, 0, stream>>>( - colour_in, - points, - depth_in, - colour_out, - contrib_out, - params - ); - cudaSafeCall( cudaGetLastError() ); +//============================================================================== + +template <typename T> +__device__ inline T generateInput(const T &in, const SplatParams ¶ms, const float4 &worldPos) { + return in; +} + +template <> +__device__ inline uchar4 generateInput(const uchar4 &in, const SplatParams ¶ms, const float4 &worldPos) { + return (params.m_flags & ftl::render::kShowDisconMask && worldPos.w < 0.0f) ? + make_uchar4(0,0,255,255) : // Show discontinuity mask in red + in; } +/* + * Pass 2: Accumulate attribute contributions if the points pass a visibility test. + */ + template <typename T> +__global__ void dibr_attribute_contrib_kernel( + TextureObject<T> in, // Attribute input + TextureObject<float4> points, // Original 3D points + TextureObject<int> depth_in, // Virtual depth map + TextureObject<T> out, // Accumulated output + SplatParams params) { + + const int x = (blockIdx.x*blockDim.x + threadIdx.x); + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float4 worldPos = points.tex2D(x, y); + if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return; + + const float3 camPos = params.m_viewMatrix * make_float3(worldPos); + if (camPos.z < params.camera.minDepth) return; + if (camPos.z > params.camera.maxDepth) return; + const uint2 screenPos = params.camera.camToScreen<uint2>(camPos); + + // Not on screen so stop now... + if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return; + + // Is this point near the actual surface and therefore a contributor? + const int d = depth_in.tex2D((int)screenPos.x, (int)screenPos.y); + + const T input = generateInput(in.tex2D(x, y), params, worldPos); + + //const float3 nearest = params.camera.screenToCam((int)(screenPos.x),(int)(screenPos.y),d); + + //const float l = length(nearest - camPos); + if (d == (int)(camPos.z*1000.0f)) { + out(screenPos.x, screenPos.y) = input; + } +} + + +template <typename T> void ftl::cuda::dibr_attribute( - TextureObject<float4> &colour_in, // Original colour image + TextureObject<T> &in, TextureObject<float4> &points, // Original 3D points TextureObject<int> &depth_in, // Virtual depth map - TextureObject<float4> &colour_out, // Accumulated output - //TextureObject<float4> normal_out, - TextureObject<float> &contrib_out, + TextureObject<T> &out, // Accumulated output SplatParams ¶ms, cudaStream_t stream) { - const dim3 gridSize((depth_in.width() + 2 - 1)/2, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK); + const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); dibr_attribute_contrib_kernel<<<gridSize, blockSize, 0, stream>>>( - colour_in, + in, points, depth_in, - colour_out, - contrib_out, + out, params ); cudaSafeCall( cudaGetLastError() ); } +template void ftl::cuda::dibr_attribute<uchar4>( + ftl::cuda::TextureObject<uchar4> &in, // Original colour image + ftl::cuda::TextureObject<float4> &points, // Original 3D points + ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map + ftl::cuda::TextureObject<uchar4> &out, // Accumulated output + ftl::render::SplatParams ¶ms, cudaStream_t stream); + +template void ftl::cuda::dibr_attribute<float>( + ftl::cuda::TextureObject<float> &in, // Original colour image + ftl::cuda::TextureObject<float4> &points, // Original 3D points + ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map + ftl::cuda::TextureObject<float> &out, // Accumulated output + ftl::render::SplatParams ¶ms, cudaStream_t stream); + +template void ftl::cuda::dibr_attribute<float4>( + ftl::cuda::TextureObject<float4> &in, // Original colour image + ftl::cuda::TextureObject<float4> &points, // Original 3D points + ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map + ftl::cuda::TextureObject<float4> &out, // Accumulated output + ftl::render::SplatParams ¶ms, cudaStream_t stream); + //============================================================================== -__global__ void dibr_normalise_kernel( +/*__global__ void dibr_normalise_kernel( TextureObject<float4> colour_in, TextureObject<uchar4> colour_out, //TextureObject<float4> normals, @@ -447,4 +471,4 @@ void ftl::cuda::dibr_normalise(TextureObject<float4> &colour_in, TextureObject<f dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(colour_in, colour_out, contribs); cudaSafeCall( cudaGetLastError() ); -} +}*/ diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp index a90e1445b2bdd45235efff301b16652e5b496f16..1888a586720d388567a6c4d48f4a29b6b50848ac 100644 --- a/components/renderers/cpp/src/splatter_cuda.hpp +++ b/components/renderers/cpp/src/splatter_cuda.hpp @@ -14,50 +14,22 @@ namespace cuda { bool culling, cudaStream_t stream); + template <typename T> + void splat( + ftl::cuda::TextureObject<float4> &normals, + ftl::cuda::TextureObject<T> &colour_in, + ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map + ftl::cuda::TextureObject<float> &depth_out, + ftl::cuda::TextureObject<T> &colour_out, + const ftl::render::SplatParams ¶ms, cudaStream_t stream); + + template <typename T> void dibr_attribute( - ftl::cuda::TextureObject<uchar4> &in, // Original colour image + ftl::cuda::TextureObject<T> &in, // Original colour image ftl::cuda::TextureObject<float4> &points, // Original 3D points ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map - ftl::cuda::TextureObject<float4> &out, // Accumulated output - //TextureObject<float4> normal_out, - ftl::cuda::TextureObject<float> &contrib_out, + ftl::cuda::TextureObject<T> &out, // Accumulated output ftl::render::SplatParams ¶ms, cudaStream_t stream); - - void dibr_attribute( - ftl::cuda::TextureObject<float> &in, // Original colour image - ftl::cuda::TextureObject<float4> &points, // Original 3D points - ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map - ftl::cuda::TextureObject<float4> &out, // Accumulated output - //TextureObject<float4> normal_out, - ftl::cuda::TextureObject<float> &contrib_out, - ftl::render::SplatParams ¶ms, cudaStream_t stream); - - void dibr_attribute( - ftl::cuda::TextureObject<float4> &in, // Original colour image - ftl::cuda::TextureObject<float4> &points, // Original 3D points - ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map - ftl::cuda::TextureObject<float4> &out, // Accumulated output - //TextureObject<float4> normal_out, - ftl::cuda::TextureObject<float> &contrib_out, - ftl::render::SplatParams ¶ms, cudaStream_t stream); - - void dibr_normalise( - ftl::cuda::TextureObject<float4> &in, - ftl::cuda::TextureObject<uchar4> &out, - ftl::cuda::TextureObject<float> &contribs, - cudaStream_t stream); - - void dibr_normalise( - ftl::cuda::TextureObject<float4> &in, - ftl::cuda::TextureObject<float> &out, - ftl::cuda::TextureObject<float> &contribs, - cudaStream_t stream); - - void dibr_normalise( - ftl::cuda::TextureObject<float4> &in, - ftl::cuda::TextureObject<float4> &out, - ftl::cuda::TextureObject<float> &contribs, - cudaStream_t stream); } }