diff --git a/components/renderers/cpp/CMakeLists.txt b/components/renderers/cpp/CMakeLists.txt index 45a6e30d890c405d45589effa6fbd1554472a56f..056bcf6434f5e4dcd5b5443611ee83e75d577ce9 100644 --- a/components/renderers/cpp/CMakeLists.txt +++ b/components/renderers/cpp/CMakeLists.txt @@ -6,6 +6,7 @@ add_library(ftlrender src/mask.cu src/screen.cu src/triangle_render.cu + src/reprojection.cu ) # These cause errors in CI build and are being removed from PCL in newer versions diff --git a/components/renderers/cpp/include/ftl/cuda/makers.hpp b/components/renderers/cpp/include/ftl/cuda/makers.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7994caaca7f01cf68b9daae91019c75b7b0691c1 --- /dev/null +++ b/components/renderers/cpp/include/ftl/cuda/makers.hpp @@ -0,0 +1,66 @@ +#ifndef _FTL_CUDA_MAKERS_HPP_ +#define _FTL_CUDA_MAKERS_HPP_ + +#include <ftl/cuda_common.hpp> + +__device__ inline float4 make_float4(const uchar4 &c) { + return make_float4(c.x,c.y,c.z,c.w); +} + +__device__ inline float4 make_float4(const float4 &v) { + return v; +} + +template <typename T> +__device__ inline T make(); + +template <> +__device__ inline uchar4 make() { + return make_uchar4(0,0,0,0); +} + +template <> +__device__ inline float4 make() { + return make_float4(0.0f,0.0f,0.0f,0.0f); +} + +template <> +__device__ inline float make() { + return 0.0f; +} + +template <typename T> +__device__ inline T make(const float4 &); + +template <> +__device__ inline uchar4 make(const float4 &v) { + return make_uchar4((int)v.x, (int)v.y, (int)v.z, (int)v.w); +} + +template <> +__device__ inline float4 make(const float4 &v) { + return v; +} + +template <> +__device__ inline float make(const float4 &v) { + return v.x; +} + +template <typename T> +__device__ inline T make(const uchar4 &v); + +template <> +__device__ inline float4 make(const uchar4 &v) { + return make_float4((float)v.x, (float)v.y, (float)v.z, (float)v.w); +} + +template <typename T> +__device__ inline T make(float v); + +template <> +__device__ inline float make(float v) { + return v; +} + +#endif // _FTL_CUDA_MAKERS_HPP_ diff --git a/components/renderers/cpp/include/ftl/render/splat_render.hpp b/components/renderers/cpp/include/ftl/render/splat_render.hpp index 3b36e8ec98dd37aaf40b0e3a7e12cad6b3b5a14e..4b2d129f026bf29eceaeeebde9cf021d62ac030c 100644 --- a/components/renderers/cpp/include/ftl/render/splat_render.hpp +++ b/components/renderers/cpp/include/ftl/render/splat_render.hpp @@ -55,9 +55,13 @@ class Splatter : public ftl::render::Renderer { cudaStream_t stream_; float3 light_pos_; + //template <typename T> + //void __blendChannel(ftl::rgbd::Frame &, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t); + //void _blendChannel(ftl::rgbd::Frame &, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t); + template <typename T> - void __blendChannel(ftl::rgbd::Frame &, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t); - void _blendChannel(ftl::rgbd::Frame &, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t); + void __reprojectChannel(ftl::rgbd::Frame &, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t); + void _reprojectChannel(ftl::rgbd::Frame &, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t); void _dibr(cudaStream_t); }; diff --git a/components/renderers/cpp/src/reprojection.cu b/components/renderers/cpp/src/reprojection.cu new file mode 100644 index 0000000000000000000000000000000000000000..53ec03ebb0afc6df47f61a8f457856db66395864 --- /dev/null +++ b/components/renderers/cpp/src/reprojection.cu @@ -0,0 +1,154 @@ +#include <ftl/render/splat_params.hpp> +#include "splatter_cuda.hpp" +#include <ftl/rgbd/camera.hpp> +#include <ftl/cuda_common.hpp> + +#include <ftl/cuda/weighting.hpp> +#include <ftl/cuda/makers.hpp> + +#define T_PER_BLOCK 8 +#define ACCUM_DIAMETER 8 + +using ftl::cuda::TextureObject; +using ftl::render::SplatParams; +using ftl::rgbd::Camera; + +/*template <typename T> +__device__ inline T generateInput(const T &in, const SplatParams ¶ms, const float4 &worldPos) { + return in; +} + +template <> +__device__ inline uchar4 generateInput(const uchar4 &in, const SplatParams ¶ms, const float4 &worldPos) { + return (params.m_flags & ftl::render::kShowDisconMask && worldPos.w < 0.0f) ? + make_uchar4(0,0,255,255) : // Show discontinuity mask in red + in; +}*/ + +template <typename A, typename B> +__device__ inline B weightInput(const A &in, float weight) { + return in * weight; +} + +template <> +__device__ inline float4 weightInput(const uchar4 &in, float weight) { + return make_float4( + (float)in.x * weight, + (float)in.y * weight, + (float)in.z * weight, + (float)in.w * weight); +} + +template <typename T> +__device__ inline void accumulateOutput(TextureObject<T> &out, TextureObject<float> &contrib, const uint2 &pos, const T &in, float w) { + atomicAdd(&out(pos.x, pos.y), in); + atomicAdd(&contrib(pos.x, pos.y), w); +} + +template <> +__device__ inline void accumulateOutput(TextureObject<float4> &out, TextureObject<float> &contrib, const uint2 &pos, const float4 &in, float w) { + atomicAdd((float*)&out(pos.x, pos.y), in.x); + atomicAdd(((float*)&out(pos.x, pos.y))+1, in.y); + atomicAdd(((float*)&out(pos.x, pos.y))+2, in.z); + atomicAdd(((float*)&out(pos.x, pos.y))+3, in.w); + atomicAdd(&contrib(pos.x, pos.y), w); +} + +/* + * Pass 2: Accumulate attribute contributions if the points pass a visibility test. + */ + template <typename A, typename B> +__global__ void reprojection_kernel( + TextureObject<A> in, // Attribute input + TextureObject<float> depth_src, + TextureObject<int> depth_in, // Virtual depth map + TextureObject<B> out, // Accumulated output + TextureObject<float> contrib, + SplatParams params, + Camera camera, float4x4 poseInv) { + + const int x = (blockIdx.x*blockDim.x + threadIdx.x); + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float d = (float)depth_in.tex2D((int)x, (int)y) / 1000.0f; + if (d < params.camera.minDepth || d > params.camera.maxDepth) return; + + const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d); + //if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return; + + const float3 camPos = poseInv * worldPos; + if (camPos.z < camera.minDepth) return; + if (camPos.z > camera.maxDepth) return; + const uint2 screenPos = camera.camToScreen<uint2>(camPos); + + // Not on screen so stop now... + if (screenPos.x >= depth_src.width() || screenPos.y >= depth_src.height()) return; + + // Is this point near the actual surface and therefore a contributor? + + const float d2 = depth_src.tex2D((int)screenPos.x, (int)screenPos.y); + const A input = in.tex2D((int)screenPos.x, (int)screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos); + const float weight = 1.0f; //ftl::cuda::weighting(fabs(camPos.z - d2), 0.02f); + const B weighted = make<B>(input) * weight; //weightInput(input, weight); + + if (weight > 0.0f) { + accumulateOutput(out, contrib, make_uint2(x,y), weighted, weight); + //out(screenPos.x, screenPos.y) = input; + } +} + + +template <typename A, typename B> +void ftl::cuda::reproject( + TextureObject<A> &in, + TextureObject<float> &depth_src, // Original 3D points + TextureObject<int> &depth_in, // Virtual depth map + TextureObject<B> &out, // Accumulated output + TextureObject<float> &contrib, + const SplatParams ¶ms, + const Camera &camera, const float4x4 &poseInv, cudaStream_t stream) { + const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + reprojection_kernel<<<gridSize, blockSize, 0, stream>>>( + in, + depth_src, + depth_in, + out, + contrib, + params, + camera, + poseInv + ); + cudaSafeCall( cudaGetLastError() ); +} + +template void ftl::cuda::reproject( + ftl::cuda::TextureObject<uchar4> &in, // Original colour image + ftl::cuda::TextureObject<float> &depth_src, // Original 3D points + ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map + ftl::cuda::TextureObject<float4> &out, // Accumulated output + ftl::cuda::TextureObject<float> &contrib, + const ftl::render::SplatParams ¶ms, + const ftl::rgbd::Camera &camera, + const float4x4 &poseInv, cudaStream_t stream); + +template void ftl::cuda::reproject( + ftl::cuda::TextureObject<float> &in, // Original colour image + ftl::cuda::TextureObject<float> &depth_src, // Original 3D points + ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map + ftl::cuda::TextureObject<float> &out, // Accumulated output + ftl::cuda::TextureObject<float> &contrib, + const ftl::render::SplatParams ¶ms, + const ftl::rgbd::Camera &camera, + const float4x4 &poseInv, cudaStream_t stream); + +template void ftl::cuda::reproject( + ftl::cuda::TextureObject<float4> &in, // Original colour image + ftl::cuda::TextureObject<float> &depth_src, // Original 3D points + ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map + ftl::cuda::TextureObject<float4> &out, // Accumulated output + ftl::cuda::TextureObject<float> &contrib, + const ftl::render::SplatParams ¶ms, + const ftl::rgbd::Camera &camera, + const float4x4 &poseInv, cudaStream_t stream); diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp index 67f7620d3f7c2a4ee11b42ba9a6fdeefeef2dcf5..1de452f9304943cb8e7f7d418a519aa35a3ba99f 100644 --- a/components/renderers/cpp/src/splat_render.cpp +++ b/components/renderers/cpp/src/splat_render.cpp @@ -149,7 +149,7 @@ struct AccumSelector<float> { //static constexpr cv::Scalar value = cv::Scalar(0.0f); }; -template <typename T> +/*template <typename T> void Splatter::__blendChannel(ftl::rgbd::Frame &output, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t stream) { cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); temp_.create<GpuMat>( @@ -179,6 +179,51 @@ void Splatter::__blendChannel(ftl::rgbd::Frame &output, ftl::codecs::Channel in, ); } + ftl::cuda::dibr_normalise( + temp_.getTexture<typename AccumSelector<T>::type>(AccumSelector<T>::channel), + output.createTexture<T>(out), + temp_.getTexture<float>(Channel::Contribution), + stream + ); +}*/ + +template <typename T> +void Splatter::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t stream) { + cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + temp_.create<GpuMat>( + AccumSelector<T>::channel, + Format<typename AccumSelector<T>::type>(params_.camera.width, params_.camera.height) + ).setTo(cv::Scalar(0.0f), cvstream); + temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0.0f), cvstream); + + temp_.createTexture<float>(Channel::Contribution); + + for (size_t i=0; i < scene_->frames.size(); ++i) { + auto &f = scene_->frames[i]; + auto *s = scene_->sources[i]; + + if (f.get<GpuMat>(in).type() == CV_8UC3) { + // Convert to 4 channel colour + auto &col = f.get<GpuMat>(in); + GpuMat tmp(col.size(), CV_8UC4); + cv::cuda::swap(col, tmp); + cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA); + } + + auto poseInv = MatrixConversion::toCUDA(s->getPose().cast<float>().inverse()); + + ftl::cuda::reproject( + f.createTexture<T>(in), + f.createTexture<float>(Channel::Depth), // TODO: Use depth? + temp_.getTexture<int>(Channel::Depth2), + temp_.createTexture<typename AccumSelector<T>::type>(AccumSelector<T>::channel), + temp_.getTexture<float>(Channel::Contribution), + params_, + s->parameters(), + poseInv, stream + ); + } + ftl::cuda::dibr_normalise( temp_.getTexture<typename AccumSelector<T>::type>(AccumSelector<T>::channel), output.createTexture<T>(out), @@ -187,7 +232,7 @@ void Splatter::__blendChannel(ftl::rgbd::Frame &output, ftl::codecs::Channel in, ); } -void Splatter::_blendChannel(ftl::rgbd::Frame &output, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t stream) { +/*void Splatter::_blendChannel(ftl::rgbd::Frame &output, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t stream) { int type = output.get<GpuMat>(out).type(); // == CV_32F; //ftl::rgbd::isFloatChannel(channel); switch (type) { @@ -196,6 +241,17 @@ void Splatter::_blendChannel(ftl::rgbd::Frame &output, ftl::codecs::Channel in, case CV_8UC4 : __blendChannel<uchar4>(output, in, out, stream); break; default : LOG(ERROR) << "Invalid output channel format"; } +}*/ + +void Splatter::_reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t stream) { + int type = output.get<GpuMat>(out).type(); // == CV_32F; //ftl::rgbd::isFloatChannel(channel); + + switch (type) { + case CV_32F : __reprojectChannel<float>(output, in, out, stream); break; + case CV_32FC4 : __reprojectChannel<float4>(output, in, out, stream); break; + case CV_8UC4 : __reprojectChannel<uchar4>(output, in, out, stream); break; + default : LOG(ERROR) << "Invalid output channel format"; + } } void Splatter::_dibr(cudaStream_t stream) { @@ -253,14 +309,14 @@ void Splatter::_renderChannel( temp_.createTexture<float>(Channel::Contribution); // Generate initial normals for the splats - accum_.create<GpuMat>(Channel::Normals, Format<float4>(params_.camera.width, params_.camera.height)); - _blendChannel(accum_, Channel::Normals, Channel::Normals, stream); + //accum_.create<GpuMat>(Channel::Normals, Format<float4>(params_.camera.width, params_.camera.height)); + //_blendChannel(accum_, Channel::Normals, Channel::Normals, stream); // Put normals in camera space here... - ftl::cuda::transform_normals(accum_.getTexture<float4>(Channel::Normals), params_.m_viewMatrix.getFloat3x3(), stream); + //ftl::cuda::transform_normals(accum_.getTexture<float4>(Channel::Normals), params_.m_viewMatrix.getFloat3x3(), stream); // Estimate point density - accum_.create<GpuMat>(Channel::Density, Format<float>(params_.camera.width, params_.camera.height)); - _blendChannel(accum_, Channel::Depth, Channel::Density, stream); + //accum_.create<GpuMat>(Channel::Density, Format<float>(params_.camera.width, params_.camera.height)); + //_blendChannel(accum_, Channel::Depth, Channel::Density, stream); // FIXME: Using colour 2 in this way seems broken since it is already used if (is_4chan) { @@ -274,8 +330,10 @@ void Splatter::_renderChannel( accum_.get<GpuMat>(channel_out).setTo(cv::Scalar(0,0,0,0), cvstream); } + _reprojectChannel(out, channel_in, channel_out, stream); + //if (splat_) { - _blendChannel(accum_, channel_in, channel_out, stream); + //_blendChannel(accum_, channel_in, channel_out, stream); //} else { // _blendChannel(out, channel, channel, stream); //} @@ -313,11 +371,12 @@ void Splatter::_renderChannel( params_, stream ); }*/ - temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f); + //temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f); + //accum_.swapTo(Channels(channel_out), out); } else { // Swap accum frames directly to output. - accum_.swapTo(Channels(channel_out), out); - temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f); + //accum_.swapTo(Channels(channel_out), out); + //temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f); } } @@ -445,7 +504,9 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { if (chan == Channel::Depth) { + LOG(INFO) << "Copying depth"; temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream); + LOG(INFO) << "DEPTH COPIED"; } else if (chan == Channel::Normals) { out.create<GpuMat>(Channel::Normals, Format<float4>(camera.width, camera.height)); diff --git a/components/renderers/cpp/src/splatter.cu b/components/renderers/cpp/src/splatter.cu index 3a9270e542ee58e836410e0de598cbd4ab9e269c..e79b3d32038985743d9e657d6df6eb0aceb7231a 100644 --- a/components/renderers/cpp/src/splatter.cu +++ b/components/renderers/cpp/src/splatter.cu @@ -6,6 +6,7 @@ #include <ftl/cuda/weighting.hpp> #include <ftl/cuda/intersections.hpp> #include <ftl/cuda/warp.hpp> +#include <ftl/cuda/makers.hpp> #define T_PER_BLOCK 8 #define UPSAMPLE_FACTOR 1.8f @@ -85,65 +86,6 @@ void ftl::cuda::dibr_merge(TextureObject<float4> &points, TextureObject<float4> //============================================================================== -__device__ inline float4 make_float4(const uchar4 &c) { - return make_float4(c.x,c.y,c.z,c.w); -} - -__device__ inline float4 make_float4(const float4 &v) { - return v; -} - -template <typename T> -__device__ inline T make(); - -template <> -__device__ inline uchar4 make() { - return make_uchar4(0,0,0,0); -} - -template <> -__device__ inline float4 make() { - return make_float4(0.0f,0.0f,0.0f,0.0f); -} - -template <> -__device__ inline float make() { - return 0.0f; -} - -template <typename T> -__device__ inline T make(const float4 &); - -template <> -__device__ inline uchar4 make(const float4 &v) { - return make_uchar4((int)v.x, (int)v.y, (int)v.z, (int)v.w); -} - -template <> -__device__ inline float4 make(const float4 &v) { - return v; -} - -template <> -__device__ inline float make(const float4 &v) { - return v.x; -} - -template <typename T> -__device__ inline T make(const uchar4 &v); - -template <> -__device__ inline float4 make(const uchar4 &v) { - return make_float4((float)v.x, (float)v.y, (float)v.z, (float)v.w); -} - -template <typename T> -__device__ inline T make(float v); - -template <> -__device__ inline float make(float v) { - return v; -} /* * Pass 1b: Expand splats to full size and merge diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp index fd88969931c3d37d3d47a6103f53bbebaf343ddd..2f036714a7a4c54f6932317d8d2bfb13408c8589 100644 --- a/components/renderers/cpp/src/splatter_cuda.hpp +++ b/components/renderers/cpp/src/splatter_cuda.hpp @@ -48,6 +48,17 @@ namespace cuda { ftl::cuda::TextureObject<float> &contrib, ftl::render::SplatParams ¶ms, cudaStream_t stream); + template <typename A, typename B> + void reproject( + ftl::cuda::TextureObject<A> &in, // Original colour image + ftl::cuda::TextureObject<float> &depth_src, // Original 3D points + ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map + ftl::cuda::TextureObject<B> &out, // Accumulated output + ftl::cuda::TextureObject<float> &contrib, + const ftl::render::SplatParams ¶ms, + const ftl::rgbd::Camera &camera, + const float4x4 &poseInv, cudaStream_t stream); + template <typename A, typename B> void dibr_normalise( ftl::cuda::TextureObject<A> &in, diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu index 21c565ccc8a042f45e95cf0c5e4b745b90430b74..4fa7b68acf29a49775c210107d58e056b5722702 100644 --- a/components/renderers/cpp/src/triangle_render.cu +++ b/components/renderers/cpp/src/triangle_render.cu @@ -128,6 +128,9 @@ float getZAtCoordinate(const float3 &barycentricCoord, const float (&tri)[3]) { v[1] = screen.tex2D(x+A,y); v[2] = screen.tex2D(x,y+B); + // Attempt to back face cull, but not great + //if ((v[1].x - v[0].x) * A < 0 || (v[2].y - v[0].y) * B < 0) return; + const int minX = min(v[0].x, min(v[1].x, v[2].x)); const int minY = min(v[0].y, min(v[1].y, v[2].y)); const int maxX = max(v[0].x, max(v[1].x, v[2].x)); diff --git a/components/rgbd-sources/src/frame.cpp b/components/rgbd-sources/src/frame.cpp index 212ca55375993789bb832e678fe4f1a8f22d5c48..3c9fe54d8c1177fd8ea6f2dedce87aec5c6b71b4 100644 --- a/components/rgbd-sources/src/frame.cpp +++ b/components/rgbd-sources/src/frame.cpp @@ -112,7 +112,7 @@ template<> cv::Mat& Frame::get(ftl::codecs::Channel channel) { // Add channel if not already there if (!channels_.has(channel)) { - throw ftl::exception("Frame channel does not exist"); + throw ftl::exception(ftl::Formatter() << "Frame channel does not exist: " << (int)channel); } return _get(channel).host; @@ -132,7 +132,7 @@ template<> cv::cuda::GpuMat& Frame::get(ftl::codecs::Channel channel) { // Add channel if not already there if (!channels_.has(channel)) { - throw ftl::exception("Frame channel does not exist"); + throw ftl::exception(ftl::Formatter() << "Frame channel does not exist: " << (int)channel); } return _get(channel).gpu; @@ -147,7 +147,7 @@ template<> const cv::Mat& Frame::get(ftl::codecs::Channel channel) const { LOG(FATAL) << "Getting GPU channel on CPU without explicit 'download'"; } - if (!channels_.has(channel)) throw ftl::exception("Frame channel does not exist"); + if (!channels_.has(channel)) throw ftl::exception(ftl::Formatter() << "Frame channel does not exist: " << (int)channel); return _get(channel).host; } @@ -163,7 +163,7 @@ template<> const cv::cuda::GpuMat& Frame::get(ftl::codecs::Channel channel) cons // Add channel if not already there if (!channels_.has(channel)) { - throw ftl::exception("Frame channel does not exist"); + throw ftl::exception(ftl::Formatter() << "Frame channel does not exist: " << (int)channel); } return _get(channel).gpu;