diff --git a/applications/gui/src/camera.cpp b/applications/gui/src/camera.cpp index fe99677391afe445b9a684d8b952c4994c479289..d4c85b5d2e5e72b6c6d6c296ebf2921a7d13c636 100644 --- a/applications/gui/src/camera.cpp +++ b/applications/gui/src/camera.cpp @@ -235,6 +235,7 @@ void ftl::gui::Camera::setChannel(Channel c) { channel_ = c; switch (c) { case Channel::Energy: + case Channel::Density: case Channel::Flow: case Channel::Confidence: case Channel::Normals: @@ -381,6 +382,7 @@ const GLTexture &ftl::gui::Camera::captureFrame() { visualizeEnergy(depth_, tmp, 1.0); texture_.update(tmp); break; + case Channel::Density: case Channel::Energy: if (depth_.rows == 0) { break; } visualizeEnergy(depth_, tmp, 10.0); diff --git a/applications/gui/src/media_panel.cpp b/applications/gui/src/media_panel.cpp index cb44400bb1c850669332376e0134f138b9c460f3..9aa81932da0eab2aa814846fccc9f107e176a7ff 100644 --- a/applications/gui/src/media_panel.cpp +++ b/applications/gui/src/media_panel.cpp @@ -191,6 +191,15 @@ MediaPanel::MediaPanel(ftl::gui::Screen *screen) : nanogui::Window(screen, ""), } }); + button = new Button(popup, "Density"); + button->setFlags(Button::RadioButton); + button->setCallback([this]() { + ftl::gui::Camera *cam = screen_->activeCamera(); + if (cam) { + cam->setChannel(Channel::Density); + } + }); + } MediaPanel::~MediaPanel() { diff --git a/applications/reconstruct/src/ilw/ilw.cu b/applications/reconstruct/src/ilw/ilw.cu index 831ff641b50dd7396a7282b99119a72164f6a34e..97ea74eebdc8f3b3d8aeec75f5153ce468808a33 100644 --- a/applications/reconstruct/src/ilw/ilw.cu +++ b/applications/reconstruct/src/ilw/ilw.cu @@ -195,14 +195,14 @@ __global__ void correspondence_energy_vector_kernel( const float avgcolour = totalcolour/(float)count; const float confidence = bestcolour / totalcolour; //bestcolour - avgcolour; - if (bestweight > 0.0f) { + //if (bestweight > 0.0f) { float old = conf.tex2D(x,y); - if (bestweight * confidence > old) { + if (bestweight * confidence >= old) { dout(x,y) = bestdepth; conf(x,y) = bestweight * confidence; } - } + //} } void ftl::cuda::correspondence( diff --git a/components/common/cpp/include/ftl/exception.hpp b/components/common/cpp/include/ftl/exception.hpp index 6719158bba5f5f53abfcdeb0fb39a5b5dda0d5f2..921e53493eda023a35af6f4c53b43ca2e26125d9 100644 --- a/components/common/cpp/include/ftl/exception.hpp +++ b/components/common/cpp/include/ftl/exception.hpp @@ -1,18 +1,49 @@ #ifndef _FTL_EXCEPTION_HPP_ #define _FTL_EXCEPTION_HPP_ +#include <sstream> + namespace ftl { +class Formatter { + public: + Formatter() {} + ~Formatter() {} + + template <typename Type> + inline Formatter & operator << (const Type & value) + { + stream_ << value; + return *this; + } + + inline std::string str() const { return stream_.str(); } + inline operator std::string () const { return stream_.str(); } + + enum ConvertToString + { + to_str + }; + inline std::string operator >> (ConvertToString) { return stream_.str(); } + +private: + std::stringstream stream_; + + Formatter(const Formatter &); + Formatter & operator = (Formatter &); +}; + class exception : public std::exception { public: - explicit exception(const char *msg) : msg_(msg) {}; + explicit exception(const char *msg) : msg_(msg) {} + explicit exception(const Formatter &msg) : msg_(msg.str()) {} const char * what () const throw () { - return msg_; + return msg_.c_str(); } private: - const char *msg_; + std::string msg_; }; } diff --git a/components/renderers/cpp/include/ftl/cuda/intersections.hpp b/components/renderers/cpp/include/ftl/cuda/intersections.hpp index 9cfdbc2544d9c1bd32c9f5e12a0a161f45c50d54..9008d68c66ac258d1dca651f7cc0e4a3230bdbda 100644 --- a/components/renderers/cpp/include/ftl/cuda/intersections.hpp +++ b/components/renderers/cpp/include/ftl/cuda/intersections.hpp @@ -49,8 +49,7 @@ __device__ inline bool intersectDisk(const float3 &n, const float3 &p0, float ra * @param l Normalised ray direction in camera space * @return Radius from centre of disk where intersection occurred. */ -__device__ inline float intersectDistance(const float3 &n, const float3 &p0, const float3 &l0, const float3 &l) { - float t = 0; +__device__ inline float intersectDistance(const float3 &n, const float3 &p0, const float3 &l0, const float3 &l, float &t) { if (intersectPlane(n, p0, l0, l, t)) { const float3 p = l0 + l * t; const float3 v = p - p0; diff --git a/components/renderers/cpp/include/ftl/cuda/normals.hpp b/components/renderers/cpp/include/ftl/cuda/normals.hpp index b9801743a5e505cf38a7b632127755f248e618c9..da2247723206cc9a1167ffbb0bc659ec847208c2 100644 --- a/components/renderers/cpp/include/ftl/cuda/normals.hpp +++ b/components/renderers/cpp/include/ftl/cuda/normals.hpp @@ -11,9 +11,19 @@ namespace cuda { void normals(ftl::cuda::TextureObject<float4> &output, ftl::cuda::TextureObject<float4> &temp, ftl::cuda::TextureObject<float4> &input, + int radius, + float smoothing, const ftl::rgbd::Camera &camera, const float3x3 &pose, cudaStream_t stream); +void normals(ftl::cuda::TextureObject<float4> &output, + ftl::cuda::TextureObject<float4> &temp, + ftl::cuda::TextureObject<int> &input, // Integer depth values + int radius, + float smoothing, + const ftl::rgbd::Camera &camera, + const float3x3 &pose_inv, const float3x3 &pose, cudaStream_t stream); + void normal_visualise(ftl::cuda::TextureObject<float4> &norm, ftl::cuda::TextureObject<uchar4> &output, const float3 &light, const uchar4 &diffuse, const uchar4 &ambient, @@ -24,6 +34,10 @@ void normal_filter(ftl::cuda::TextureObject<float4> &norm, const ftl::rgbd::Camera &camera, const float4x4 &pose, float thresh, cudaStream_t stream); +void transform_normals(ftl::cuda::TextureObject<float4> &norm, + const float3x3 &pose, + cudaStream_t stream); + } } diff --git a/components/renderers/cpp/include/ftl/render/splat_render.hpp b/components/renderers/cpp/include/ftl/render/splat_render.hpp index af6affb1020ee66b52796bb2cd0e26effa2d11ea..8fc0e10311668c3938b77c835ef31f8100905f64 100644 --- a/components/renderers/cpp/include/ftl/render/splat_render.hpp +++ b/components/renderers/cpp/include/ftl/render/splat_render.hpp @@ -26,7 +26,7 @@ class Splatter : public ftl::render::Renderer { //void setOutputDevice(int); protected: - void renderChannel(ftl::render::SplatParams ¶ms, ftl::rgbd::Frame &out, const ftl::rgbd::Channel &channel, cudaStream_t stream); + void _renderChannel(ftl::rgbd::Frame &out, ftl::rgbd::Channel channel_in, ftl::rgbd::Channel channel_out, cudaStream_t stream); private: int device_; @@ -40,6 +40,7 @@ class Splatter : public ftl::render::Renderer { //SplatParams params_; ftl::rgbd::Frame temp_; + ftl::rgbd::Frame accum_; ftl::rgbd::FrameSet *scene_; ftl::cuda::ClipSpace clip_; bool clipping_; @@ -50,6 +51,12 @@ class Splatter : public ftl::render::Renderer { float3 light_dir_; uchar4 light_diffuse_; uchar4 light_ambient_; + ftl::render::SplatParams params_; + + template <typename T> + void __blendChannel(ftl::rgbd::Frame &, ftl::rgbd::Channel in, ftl::rgbd::Channel out, cudaStream_t); + void _blendChannel(ftl::rgbd::Frame &, ftl::rgbd::Channel in, ftl::rgbd::Channel out, cudaStream_t); + void _dibr(cudaStream_t); }; } diff --git a/components/renderers/cpp/src/normals.cu b/components/renderers/cpp/src/normals.cu index 4d66869316bb3cc4f7f9bd56f889bf92ecc4635c..626015ce7ad9f9471079917b0678369bbb23f5cb 100644 --- a/components/renderers/cpp/src/normals.cu +++ b/components/renderers/cpp/src/normals.cu @@ -31,6 +31,38 @@ __global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output, } } +__device__ inline bool isValid(const ftl::rgbd::Camera &camera, const float3 &d) { + return d.z >= camera.minDepth && d.z <= camera.maxDepth; +} + +__global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output, + ftl::cuda::TextureObject<int> input, ftl::rgbd::Camera camera, float3x3 pose) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if(x >= input.width() || y >= input.height()) return; + + output(x,y) = make_float4(0, 0, 0, 0); + + if(x > 0 && x < input.width()-1 && y > 0 && y < input.height()-1) { + const float3 CC = camera.screenToCam(x+0, y+0, (float)input.tex2D((int)x+0, (int)y+0) / 1000.0f); + const float3 PC = camera.screenToCam(x+0, y+1, (float)input.tex2D((int)x+0, (int)y+1) / 1000.0f); + const float3 CP = camera.screenToCam(x+1, y+0, (float)input.tex2D((int)x+1, (int)y+0) / 1000.0f); + const float3 MC = camera.screenToCam(x+0, y-1, (float)input.tex2D((int)x+0, (int)y-1) / 1000.0f); + const float3 CM = camera.screenToCam(x-1, y+0, (float)input.tex2D((int)x-1, (int)y+0) / 1000.0f); + + //if(CC.z < && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) { + if (isValid(camera,CC) && isValid(camera,PC) && isValid(camera,CP) && isValid(camera,MC) && isValid(camera,CM)) { + const float3 n = cross(PC-MC, CP-CM); + const float l = length(n); + + if(l > 0.0f) { + output(x,y) = make_float4((n/-l), 1.0f); + } + } + } +} + template <int RADIUS> __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms, ftl::cuda::TextureObject<float4> output, @@ -74,9 +106,54 @@ __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms, output(x,y) = (contrib > 0.0f) ? make_float4(nsum, dot(nsum, ray)) : make_float4(0.0f); } +template <int RADIUS> +__global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms, + ftl::cuda::TextureObject<float4> output, + ftl::cuda::TextureObject<int> depth, + ftl::rgbd::Camera camera, float3x3 pose, float smoothing) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if(x >= depth.width() || y >= depth.height()) return; + + const float3 p0 = camera.screenToCam(x,y, (float)depth.tex2D((int)x,(int)y) / 1000.0f); + float3 nsum = make_float3(0.0f); + float contrib = 0.0f; + + if (p0.z < camera.minDepth || p0.z > camera.maxDepth) return; + + for (int v=-RADIUS; v<=RADIUS; ++v) { + for (int u=-RADIUS; u<=RADIUS; ++u) { + const float3 p = camera.screenToCam(x+u,y+v, (float)depth.tex2D((int)x+u,(int)y+v) / 1000.0f); + if (p.z < camera.minDepth || p.z > camera.maxDepth) continue; + const float s = ftl::cuda::spatialWeighting(p0, p, smoothing); + //const float s = 1.0f; + + //if (s > 0.0f) { + const float4 n = norms.tex2D((int)x+u,(int)y+v); + if (n.w > 0.0f) { + nsum += make_float3(n) * s; + contrib += s; + } + //} + } + } + + // Compute dot product of normal with camera to obtain measure of how + // well this point faces the source camera, a measure of confidence + float3 ray = camera.screenToCam(x, y, 1.0f); + ray = ray / length(ray); + nsum /= contrib; + nsum /= length(nsum); + + output(x,y) = (contrib > 0.0f) ? make_float4(pose*nsum, 1.0f) : make_float4(0.0f); +} + void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output, ftl::cuda::TextureObject<float4> &temp, - ftl::cuda::TextureObject<float4> &input, + ftl::cuda::TextureObject<float4> &input, + int radius, + float smoothing, const ftl::rgbd::Camera &camera, const float3x3 &pose,cudaStream_t stream) { const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); @@ -85,7 +162,12 @@ void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output, computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(temp, input); cudaSafeCall( cudaGetLastError() ); - smooth_normals_kernel<3><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, 0.04f); + switch (radius) { + case 9: smooth_normals_kernel<9><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break; + case 7: smooth_normals_kernel<7><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break; + case 5: smooth_normals_kernel<5><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break; + case 3: smooth_normals_kernel<3><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break; + } cudaSafeCall( cudaGetLastError() ); #ifdef _DEBUG @@ -94,6 +176,32 @@ void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output, #endif } +void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output, + ftl::cuda::TextureObject<float4> &temp, + ftl::cuda::TextureObject<int> &input, + int radius, + float smoothing, + const ftl::rgbd::Camera &camera, + const float3x3 &pose_inv, const float3x3 &pose,cudaStream_t stream) { + const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(temp, input, camera, pose); + cudaSafeCall( cudaGetLastError() ); + + switch (radius) { + case 7: smooth_normals_kernel<7><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); + case 5: smooth_normals_kernel<5><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); + case 3: smooth_normals_kernel<3><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); + } + cudaSafeCall( cudaGetLastError() ); + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + //cutilCheckMsg(__FUNCTION__); + #endif +} + //============================================================================== __global__ void vis_normals_kernel(ftl::cuda::TextureObject<float4> norm, @@ -179,3 +287,33 @@ void ftl::cuda::normal_filter(ftl::cuda::TextureObject<float4> &norm, //cutilCheckMsg(__FUNCTION__); #endif } + +//============================================================================== + +__global__ void transform_normals_kernel(ftl::cuda::TextureObject<float4> norm, + float3x3 pose) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if(x >= norm.width() || y >= norm.height()) return; + + float3 normal = pose * make_float3(norm.tex2D((int)x,(int)y)); + normal /= length(normal); + norm(x,y) = make_float4(normal, 0.0f); +} + +void ftl::cuda::transform_normals(ftl::cuda::TextureObject<float4> &norm, + const float3x3 &pose, + cudaStream_t stream) { + + const dim3 gridSize((norm.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (norm.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + transform_normals_kernel<<<gridSize, blockSize, 0, stream>>>(norm, pose); + + cudaSafeCall( cudaGetLastError() ); + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + //cutilCheckMsg(__FUNCTION__); + #endif +} diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp index 1ccaf156cc31d7ba2097364e627d6d93500d293a..9b597e90e1af60d28b789f708392961b083e7436 100644 --- a/components/renderers/cpp/src/splat_render.cpp +++ b/components/renderers/cpp/src/splat_render.cpp @@ -126,23 +126,73 @@ Splatter::~Splatter() { } -void Splatter::renderChannel( - ftl::render::SplatParams ¶ms, ftl::rgbd::Frame &out, - const Channel &channel, cudaStream_t stream) -{ - if (channel == Channel::None) return; +template <typename T> +struct AccumSelector { + typedef float4 type; + static constexpr Channel channel = Channel::Colour; + //static constexpr cv::Scalar value = cv::Scalar(0.0f,0.0f,0.0f,0.0f); +}; + +template <> +struct AccumSelector<float> { + typedef float type; + static constexpr Channel channel = Channel::Colour2; + //static constexpr cv::Scalar value = cv::Scalar(0.0f); +}; + +template <typename T> +void Splatter::__blendChannel(ftl::rgbd::Frame &output, ftl::rgbd::Channel in, ftl::rgbd::Channel out, cudaStream_t stream) { cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); - temp_.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(0x7FFFFFFF), cvstream); - temp_.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0x7FFFFFFF), cvstream); - //temp_.get<GpuMat>(Channel::Colour).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream); - //temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0.0f), cvstream); + temp_.create<GpuMat>( + AccumSelector<T>::channel, + Format<typename AccumSelector<T>::type>(params_.camera.width, params_.camera.height) + ).setTo(cv::Scalar(0.0f), cvstream); + temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0.0f), cvstream); - if (scene_->frames.size() < 1) return; - bool is_float = out.get<GpuMat>(channel).type() == CV_32F; //ftl::rgbd::isFloatChannel(channel); - bool is_4chan = out.get<GpuMat>(channel).type() == CV_32FC4; + temp_.createTexture<float>(Channel::Contribution); + + for (auto &f : scene_->frames) { + if (f.get<GpuMat>(in).type() == CV_8UC3) { + // Convert to 4 channel colour + auto &col = f.get<GpuMat>(in); + GpuMat tmp(col.size(), CV_8UC4); + cv::cuda::swap(col, tmp); + cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA); + } + + ftl::cuda::dibr_attribute( + f.createTexture<T>(in), + f.createTexture<float4>(Channel::Points), + temp_.getTexture<int>(Channel::Depth2), + temp_.createTexture<typename AccumSelector<T>::type>(AccumSelector<T>::channel), + temp_.getTexture<float>(Channel::Contribution), + params_, stream + ); + } + + ftl::cuda::dibr_normalise( + temp_.getTexture<typename AccumSelector<T>::type>(AccumSelector<T>::channel), + output.createTexture<T>(out), + temp_.getTexture<float>(Channel::Contribution), + stream + ); +} + +void Splatter::_blendChannel(ftl::rgbd::Frame &output, ftl::rgbd::Channel in, ftl::rgbd::Channel out, cudaStream_t stream) { + int type = output.get<GpuMat>(out).type(); // == CV_32F; //ftl::rgbd::isFloatChannel(channel); - // Render each camera into virtual view - // TODO: Move out of renderChannel, this is a common step to all channels + switch (type) { + case CV_32F : __blendChannel<float>(output, in, out, stream); break; + case CV_32FC4 : __blendChannel<float4>(output, in, out, stream); break; + case CV_8UC4 : __blendChannel<uchar4>(output, in, out, stream); break; + default : LOG(ERROR) << "Invalid output channel format"; + } +} + +void Splatter::_dibr(cudaStream_t stream) { + cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + temp_.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0x7FFFFFFF), cvstream); + for (size_t i=0; i < scene_->frames.size(); ++i) { auto &f = scene_->frames[i]; auto *s = scene_->sources[i]; @@ -156,111 +206,91 @@ void Splatter::renderChannel( f.createTexture<float4>(Channel::Points), f.createTexture<float4>(Channel::Normals), temp_.createTexture<int>(Channel::Depth2), - params, backcull_, stream + params_, backcull_, stream ); //LOG(INFO) << "DIBR DONE"; } +} - //temp_.createTexture<float4>(Channel::Colour); - //temp_.createTexture<float>(Channel::Contribution); - out.create<GpuMat>(Channel::Normals, Format<float4>(params.camera.width, params.camera.height)); +void Splatter::_renderChannel( + ftl::rgbd::Frame &out, + Channel channel_in, Channel channel_out, cudaStream_t stream) +{ + if (channel_out == Channel::None || channel_in == Channel::None) return; + cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); - // Create normals first - for (auto &f : scene_->frames) { - ftl::cuda::dibr_attribute( - f.createTexture<float4>(Channel::Normals), - f.createTexture<float4>(Channel::Points), - temp_.getTexture<int>(Channel::Depth2), - out.createTexture<float4>(Channel::Normals), - params, stream - ); - } + if (scene_->frames.size() < 1) return; + bool is_float = out.get<GpuMat>(channel_out).type() == CV_32F; //ftl::rgbd::isFloatChannel(channel); + bool is_4chan = out.get<GpuMat>(channel_out).type() == CV_32FC4; + + + temp_.createTexture<float4>(Channel::Colour); + temp_.createTexture<float>(Channel::Contribution); + + // Generate initial normals for the splats + accum_.create<GpuMat>(Channel::Normals, Format<float4>(params_.camera.width, params_.camera.height)); + _blendChannel(accum_, Channel::Normals, Channel::Normals, stream); + // Put normals in camera space here... + ftl::cuda::transform_normals(accum_.getTexture<float4>(Channel::Normals), params_.m_viewMatrix.getFloat3x3(), stream); - //temp_.get<GpuMat>(Channel::Colour).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream); - //temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0.0f), cvstream); + // Estimate point density + accum_.create<GpuMat>(Channel::Density, Format<float>(params_.camera.width, params_.camera.height)); + _blendChannel(accum_, Channel::Depth, Channel::Density, stream); + // FIXME: Using colour 2 in this way seems broken since it is already used if (is_4chan) { - temp_.create<GpuMat>(Channel::Colour2, Format<float4>(params.camera.width, params.camera.height)); - temp_.get<GpuMat>(Channel::Colour2).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream); + accum_.create<GpuMat>(channel_out, Format<float4>(params_.camera.width, params_.camera.height)); + accum_.get<GpuMat>(channel_out).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream); } else if (is_float) { - temp_.create<GpuMat>(Channel::Colour2, Format<float>(params.camera.width, params.camera.height)); - temp_.get<GpuMat>(Channel::Colour2).setTo(cv::Scalar(0.0f), cvstream); + accum_.create<GpuMat>(channel_out, Format<float>(params_.camera.width, params_.camera.height)); + accum_.get<GpuMat>(channel_out).setTo(cv::Scalar(0.0f), cvstream); } else { - temp_.create<GpuMat>(Channel::Colour2, Format<uchar4>(params.camera.width, params.camera.height)); - temp_.get<GpuMat>(Channel::Colour2).setTo(cv::Scalar(0,0,0,0), cvstream); + accum_.create<GpuMat>(channel_out, Format<uchar4>(params_.camera.width, params_.camera.height)); + accum_.get<GpuMat>(channel_out).setTo(cv::Scalar(0,0,0,0), cvstream); } - // Create attribute first - for (auto &f : scene_->frames) { - // Convert colour from BGR to BGRA if needed - if (f.get<GpuMat>(channel).type() == CV_8UC3) { - // Convert to 4 channel colour - auto &col = f.get<GpuMat>(Channel::Colour); - GpuMat tmp(col.size(), CV_8UC4); - cv::cuda::swap(col, tmp); - cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA); - } - - if (is_4chan) { - ftl::cuda::dibr_attribute( - f.createTexture<float4>(channel), - f.createTexture<float4>(Channel::Points), - temp_.getTexture<int>(Channel::Depth2), - (splat_) ? temp_.createTexture<float4>(Channel::Colour2) : out.createTexture<float4>(channel), - params, stream - ); - } else if (is_float) { - ftl::cuda::dibr_attribute( - f.createTexture<float>(channel), - f.createTexture<float4>(Channel::Points), - temp_.getTexture<int>(Channel::Depth2), - (splat_) ? temp_.createTexture<float>(Channel::Colour2) : out.createTexture<float>(channel), - params, stream - ); - } else { - ftl::cuda::dibr_attribute( - f.createTexture<uchar4>(channel), - f.createTexture<float4>(Channel::Points), - temp_.getTexture<int>(Channel::Depth2), - (splat_) ? temp_.createTexture<uchar4>(Channel::Colour2) : out.createTexture<uchar4>(channel), - params, stream - ); - } - } - - //out.get<GpuMat>(Channel::Left).setTo(cv::Scalar(0,0,0,0), cvstream); + //if (splat_) { + _blendChannel(accum_, channel_in, channel_out, stream); + //} else { + // _blendChannel(out, channel, channel, stream); + //} // Now splat the points if (splat_) { if (is_4chan) { ftl::cuda::splat( - out.getTexture<float4>(Channel::Normals), - temp_.getTexture<float4>(Channel::Colour2), + accum_.getTexture<float4>(Channel::Normals), + accum_.getTexture<float>(Channel::Density), + accum_.getTexture<float4>(channel_out), temp_.getTexture<int>(Channel::Depth2), out.createTexture<float>(Channel::Depth), - out.createTexture<float4>(channel), - params, stream + out.createTexture<float4>(channel_out), + params_, stream ); } else if (is_float) { ftl::cuda::splat( - out.getTexture<float4>(Channel::Normals), - temp_.getTexture<float>(Channel::Colour2), + accum_.getTexture<float4>(Channel::Normals), + accum_.getTexture<float>(Channel::Density), + accum_.getTexture<float>(channel_out), temp_.getTexture<int>(Channel::Depth2), out.createTexture<float>(Channel::Depth), - out.createTexture<float>(channel), - params, stream + out.createTexture<float>(channel_out), + params_, stream ); } else { ftl::cuda::splat( - out.getTexture<float4>(Channel::Normals), - temp_.getTexture<uchar4>(Channel::Colour2), + accum_.getTexture<float4>(Channel::Normals), + accum_.getTexture<float>(Channel::Density), + accum_.getTexture<uchar4>(channel_out), temp_.getTexture<int>(Channel::Depth2), out.createTexture<float>(Channel::Depth), - out.createTexture<uchar4>(channel), - params, stream + out.createTexture<uchar4>(channel_out), + params_, stream ); } + } else { + // Swap accum frames directly to output. } } @@ -277,8 +307,8 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda out.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height)); // FIXME: Use source resolutions, not virtual resolution - //temp_.create<GpuMat>(Channel::Colour, Format<float4>(camera.width, camera.height)); - //temp_.create<GpuMat>(Channel::Contribution, Format<float>(camera.width, camera.height)); + temp_.create<GpuMat>(Channel::Colour, Format<float4>(camera.width, camera.height)); + temp_.create<GpuMat>(Channel::Contribution, Format<float>(camera.width, camera.height)); temp_.create<GpuMat>(Channel::Depth, Format<int>(camera.width, camera.height)); temp_.create<GpuMat>(Channel::Depth2, Format<int>(camera.width, camera.height)); temp_.create<GpuMat>(Channel::Normals, Format<float4>(camera.width, camera.height)); @@ -286,7 +316,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); // Parameters object to pass to CUDA describing the camera - SplatParams params; + SplatParams ¶ms = params_; params.m_flags = 0; if (value("show_discontinuity_mask", false)) params.m_flags |= ftl::render::kShowDisconMask; if (value("normal_weight_colours", true)) params.m_flags |= ftl::render::kNormalWeightColours; @@ -302,6 +332,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda //LOG(INFO) << "Render ready: " << camera.width << "," << camera.height; temp_.createTexture<int>(Channel::Depth); + //temp_.get<GpuMat>(Channel::Normals).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream); // First make sure each input has normals temp_.createTexture<float4>(Channel::Normals); @@ -332,7 +363,9 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda auto &g = f.get<GpuMat>(Channel::Colour); ftl::cuda::normals(f.createTexture<float4>(Channel::Normals, Format<float4>(g.cols, g.rows)), temp_.getTexture<float4>(Channel::Normals), // FIXME: Uses assumption of vcam res same as input res - f.getTexture<float4>(Channel::Points), s->parameters(), pose.getFloat3x3(), stream); + f.getTexture<float4>(Channel::Points), + 3, 0.04f, + s->parameters(), pose.getFloat3x3(), stream); if (norm_filter_ > -0.1f) { ftl::cuda::normal_filter(f.getTexture<float4>(Channel::Normals), f.getTexture<float4>(Channel::Points), s->parameters(), pose, norm_filter_, stream); @@ -340,7 +373,8 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda } } - renderChannel(params, out, Channel::Colour, stream); + _dibr(stream); + _renderChannel(out, Channel::Colour, Channel::Colour, stream); Channel chan = src->getChannel(); if (chan == Channel::Depth) @@ -350,12 +384,12 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda out.create<GpuMat>(Channel::Normals, Format<float4>(camera.width, camera.height)); // Render normal attribute - renderChannel(params, out, Channel::Normals, stream); + _renderChannel(out, Channel::Normals, Channel::Normals, stream); // Convert normal to single float value temp_.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height)); ftl::cuda::normal_visualise(out.getTexture<float4>(Channel::Normals), temp_.createTexture<uchar4>(Channel::Colour), - make_float3(-0.3f, 0.2f, 1.0f), + make_float3(0.3f, 0.2f, 1.0f), light_diffuse_, light_ambient_, stream); @@ -367,6 +401,11 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda //{ // cv::cuda::swap(temp_.get<GpuMat>(Channel::Contribution), out.create<GpuMat>(Channel::Contribution)); //} + else if (chan == Channel::Density) { + out.create<GpuMat>(chan, Format<float>(camera.width, camera.height)); + out.get<GpuMat>(chan).setTo(cv::Scalar(0.0f), cvstream); + _renderChannel(out, Channel::Depth, Channel::Density, stream); + } else if (chan == Channel::Right) { Eigen::Affine3f transform(Eigen::Translation3f(camera.baseline,0.0f,0.0f)); @@ -376,7 +415,9 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda out.create<GpuMat>(Channel::Right, Format<uchar4>(camera.width, camera.height)); out.get<GpuMat>(Channel::Right).setTo(background_, cvstream); - renderChannel(params, out, Channel::Right, stream); + + _dibr(stream); // Need to re-dibr due to pose change + _renderChannel(out, Channel::Right, Channel::Right, stream); } else if (chan != Channel::None) { if (ftl::rgbd::isFloatChannel(chan)) { out.create<GpuMat>(chan, Format<float>(camera.width, camera.height)); @@ -385,7 +426,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda out.create<GpuMat>(chan, Format<uchar4>(camera.width, camera.height)); out.get<GpuMat>(chan).setTo(background_, cvstream); } - renderChannel(params, out, chan, stream); + _renderChannel(out, chan, chan, stream); } return true; diff --git a/components/renderers/cpp/src/splatter.cu b/components/renderers/cpp/src/splatter.cu index 3567d5c07dcdc819b93989992069b3244149855f..cd69098ea6f80c269f66deb935e652ffc33398c8 100644 --- a/components/renderers/cpp/src/splatter.cu +++ b/components/renderers/cpp/src/splatter.cu @@ -127,13 +127,30 @@ __device__ inline float make(const float4 &v) { return v.x; } +template <typename T> +__device__ inline T make(const uchar4 &v); + +template <> +__device__ inline float4 make(const uchar4 &v) { + return make_float4((float)v.x, (float)v.y, (float)v.z, (float)v.w); +} + +template <typename T> +__device__ inline T make(float v); + +template <> +__device__ inline float make(float v) { + return v; +} + /* * Pass 1b: Expand splats to full size and merge */ - template <int SEARCH_DIAMETER, typename T> + template <int SEARCH_RADIUS, typename T> __global__ void splat_kernel( //TextureObject<float4> points, // Original 3D points TextureObject<float4> normals, + TextureObject<float> density, TextureObject<T> in, TextureObject<int> depth_in, // Virtual depth map TextureObject<float> depth_out, // Accumulated output @@ -144,59 +161,64 @@ __device__ inline float make(const float4 &v) { //const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - const int tid = (threadIdx.x + threadIdx.y * blockDim.x); + //const int tid = (threadIdx.x + threadIdx.y * blockDim.x); //const int warp = tid / WARP_SIZE; - const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; + const int x = blockIdx.x*blockDim.x + threadIdx.x; const int y = blockIdx.y*blockDim.y + threadIdx.y; if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return; - const float3 origin = params.m_viewMatrixInverse * make_float3(0.0f); + //const float3 origin = params.m_viewMatrixInverse * make_float3(0.0f); float3 ray = params.camera.screenToCam(x,y,1.0f); ray = ray / length(ray); const float scale = ray.z; - ray = params.m_viewMatrixInverse.getFloat3x3() * ray; + //ray = params.m_viewMatrixInverse.getFloat3x3() * ray; //float depth = 0.0f; //float contrib = 0.0f; float depth = 1000.0f; + //float pdepth = 1000.0f; struct Result { float weight; float depth; - T attr; }; - Result results[(SEARCH_DIAMETER*SEARCH_DIAMETER) / WARP_SIZE]; + Result results[2*SEARCH_RADIUS+1][2*SEARCH_RADIUS+1]; // Each thread in warp takes an upsample point and updates corresponding depth buffer. - const int lane = tid % WARP_SIZE; - for (int i=lane; i<SEARCH_DIAMETER*SEARCH_DIAMETER; i+=WARP_SIZE) { - const float u = (i % SEARCH_DIAMETER) - (SEARCH_DIAMETER / 2); - const float v = (i / SEARCH_DIAMETER) - (SEARCH_DIAMETER / 2); + //const int lane = tid % WARP_SIZE; + //for (int i=lane; i<SEARCH_DIAMETER*SEARCH_DIAMETER; i+=WARP_SIZE) { + // const float u = (i % SEARCH_DIAMETER) - (SEARCH_DIAMETER / 2); + // const float v = (i / SEARCH_DIAMETER) - (SEARCH_DIAMETER / 2); + for (int v=-SEARCH_RADIUS; v<=SEARCH_RADIUS; ++v) { + for (int u=-SEARCH_RADIUS; u<=SEARCH_RADIUS; ++u) { - results[i/WARP_SIZE] = {0.0f, 0.0f, make<T>()}; + results[v+SEARCH_RADIUS][u+SEARCH_RADIUS] = {0.0f, 1000.0f}; // Use the depth buffer to determine this pixels 3D position in camera space const float d = ((float)depth_in.tex2D(x+u, y+v)/1000.0f); + const float3 n = make_float3(normals.tex2D((int)(x)+u, (int)(y)+v)); + const float dens = density.tex2D((int)(x)+u, (int)(y)+v); + if (d < params.camera.minDepth || d > params.camera.maxDepth) continue; - const float3 camPos = params.camera.screenToCam((int)(x+u),(int)(y+v),d); - const float3 camPos2 = params.camera.screenToCam((int)(x),(int)(y),d); - const float3 worldPos = params.m_viewMatrixInverse * camPos; + const float3 camPos = params.camera.screenToCam((int)(x)+u,(int)(y)+v,d); + //const float3 camPos2 = params.camera.screenToCam((int)(x),(int)(y),d); + //const float3 worldPos = params.m_viewMatrixInverse * camPos; - // Assumed to be normalised - float4 n = normals.tex2D((int)(x+u), (int)(y+v)); + + //if (length(make_float3(n)) == 0.0f) printf("BAD NORMAL\n"); // Does the ray intersect plane of splat? float t = 1000.0f; - if (ftl::cuda::intersectPlane(make_float3(n), worldPos, origin, ray, t)) { //} && fabs(t-camPos.z) < 0.01f) { + const float r = ftl::cuda::intersectDistance(n, camPos, make_float3(0.0f), ray, t); + //if (r != PINF) { //} && fabs(t-camPos.z) < 0.01f) { // Adjust from normalised ray back to original meters units t *= scale; - const float3 camPos3 = params.camera.screenToCam((int)(x),(int)(y),t); - float weight = ftl::cuda::spatialWeighting(camPos, camPos3, 2.0f*(camPos3.z/params.camera.fx)); + float weight = ftl::cuda::weighting(r, dens/params.camera.fx); // (1.0f/params.camera.fx) / (t/params.camera.fx) /* Buehler C. et al. 2001. Unstructured Lumigraph Rendering. */ /* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. */ @@ -204,39 +226,45 @@ __device__ inline float make(const float4 &v) { // enough for our purposes if the alignment step prevents ghosting // TODO: Use depth and perhaps the neighbourhood consistency in: // Kuster C. et al. 2011. FreeCam: A hybrid camera system for interactive free-viewpoint video - if (params.m_flags & ftl::render::kNormalWeightColours) weight *= n.w * n.w; + //if (params.m_flags & ftl::render::kNormalWeightColours) weight *= n.w * n.w; //if (params.m_flags & ftl::render::kDepthWeightColours) weight *= ??? if (weight <= 0.0f) continue; depth = min(depth, t); - results[i/WARP_SIZE] = {weight, t, in.tex2D((int)x+u, (int)y+v)}; - } + results[v+SEARCH_RADIUS][u+SEARCH_RADIUS] = {weight, t}; + //} + } } - depth = warpMin(depth); + //depth = warpMin(depth); + //pdepth = warpMin(pdepth); float adepth = 0.0f; float contrib = 0.0f; float4 attr = make_float4(0.0f); // Loop over results array - for (int i=0; i<(SEARCH_DIAMETER*SEARCH_DIAMETER) / WARP_SIZE; ++i) { - if (results[i].depth - depth < 0.04f) { - adepth += results[i].depth * results[i].weight; - attr += make_float4(results[i].attr) * results[i].weight; - contrib += results[i].weight; - } + for (int v=-SEARCH_RADIUS; v<=SEARCH_RADIUS; ++v) { + for (int u=-SEARCH_RADIUS; u<=SEARCH_RADIUS; ++u) { + auto &result = results[v+SEARCH_RADIUS][u+SEARCH_RADIUS]; + float s = ftl::cuda::weighting(fabs(result.depth - depth), 0.04f); + //if (result.depth - depth < 0.04f) { + adepth += result.depth * result.weight * s; + attr += make_float4(in.tex2D((int)x+u, (int)y+v)) * result.weight * s; + contrib += result.weight * s; + //} + } } // Sum all attributes and contributions - adepth = warpSum(adepth); - attr.x = warpSum(attr.x); - attr.y = warpSum(attr.y); - attr.z = warpSum(attr.z); - contrib = warpSum(contrib); + //adepth = warpSum(adepth); + //attr.x = warpSum(attr.x); + //attr.y = warpSum(attr.y); + //attr.z = warpSum(attr.z); + //contrib = warpSum(contrib); - if (lane == 0 && contrib > 0.0f) { + if (contrib > 0.0f) { depth_out(x,y) = adepth / contrib; out(x,y) = make<T>(attr / contrib); } @@ -245,16 +273,18 @@ __device__ inline float make(const float4 &v) { template <typename T> void ftl::cuda::splat( TextureObject<float4> &normals, + TextureObject<float> &density, TextureObject<T> &colour_in, TextureObject<int> &depth_in, // Virtual depth map TextureObject<float> &depth_out, TextureObject<T> &colour_out, const SplatParams ¶ms, cudaStream_t stream) { - const dim3 gridSize((depth_in.width() + 2 - 1)/2, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK); + const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - splat_kernel<8,T><<<gridSize, blockSize, 0, stream>>>( + splat_kernel<4,T><<<gridSize, blockSize, 0, stream>>>( normals, + density, colour_in, depth_in, depth_out, @@ -266,6 +296,7 @@ void ftl::cuda::splat( template void ftl::cuda::splat<uchar4>( TextureObject<float4> &normals, + TextureObject<float> &density, TextureObject<uchar4> &colour_in, TextureObject<int> &depth_in, // Virtual depth map TextureObject<float> &depth_out, @@ -273,7 +304,8 @@ template void ftl::cuda::splat<uchar4>( const SplatParams ¶ms, cudaStream_t stream); template void ftl::cuda::splat<float4>( - TextureObject<float4> &normals, + TextureObject<float4> &normals, + TextureObject<float> &density, TextureObject<float4> &colour_in, TextureObject<int> &depth_in, // Virtual depth map TextureObject<float> &depth_out, @@ -281,7 +313,8 @@ template void ftl::cuda::splat<float4>( const SplatParams ¶ms, cudaStream_t stream); template void ftl::cuda::splat<float>( - TextureObject<float4> &normals, + TextureObject<float4> &normals, + TextureObject<float> &density, TextureObject<float> &colour_in, TextureObject<int> &depth_in, // Virtual depth map TextureObject<float> &depth_out, @@ -302,15 +335,45 @@ __device__ inline uchar4 generateInput(const uchar4 &in, const SplatParams ¶ in; } +template <typename A, typename B> +__device__ inline B weightInput(const A &in, float weight) { + return in * weight; +} + +template <> +__device__ inline float4 weightInput(const uchar4 &in, float weight) { + return make_float4( + (float)in.x * weight, + (float)in.y * weight, + (float)in.z * weight, + (float)in.w * weight); +} + +template <typename T> +__device__ inline void accumulateOutput(TextureObject<T> &out, TextureObject<float> &contrib, const uint2 &pos, const T &in, float w) { + atomicAdd(&out(pos.x, pos.y), in); + atomicAdd(&contrib(pos.x, pos.y), w); +} + +template <> +__device__ inline void accumulateOutput(TextureObject<float4> &out, TextureObject<float> &contrib, const uint2 &pos, const float4 &in, float w) { + atomicAdd((float*)&out(pos.x, pos.y), in.x); + atomicAdd(((float*)&out(pos.x, pos.y))+1, in.y); + atomicAdd(((float*)&out(pos.x, pos.y))+2, in.z); + atomicAdd(((float*)&out(pos.x, pos.y))+3, in.w); + atomicAdd(&contrib(pos.x, pos.y), w); +} + /* * Pass 2: Accumulate attribute contributions if the points pass a visibility test. */ - template <typename T> + template <typename A, typename B> __global__ void dibr_attribute_contrib_kernel( - TextureObject<T> in, // Attribute input + TextureObject<A> in, // Attribute input TextureObject<float4> points, // Original 3D points TextureObject<int> depth_in, // Virtual depth map - TextureObject<T> out, // Accumulated output + TextureObject<B> out, // Accumulated output + TextureObject<float> contrib, SplatParams params) { const int x = (blockIdx.x*blockDim.x + threadIdx.x); @@ -328,25 +391,26 @@ __global__ void dibr_attribute_contrib_kernel( if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return; // Is this point near the actual surface and therefore a contributor? - const int d = depth_in.tex2D((int)screenPos.x, (int)screenPos.y); + const float d = (float)depth_in.tex2D((int)screenPos.x, (int)screenPos.y) / 1000.0f; - const T input = generateInput(in.tex2D(x, y), params, worldPos); + const A input = generateInput(in.tex2D(x, y), params, worldPos); + const float weight = ftl::cuda::weighting(fabs(camPos.z - d), 0.02f); + const B weighted = make<B>(input) * weight; //weightInput(input, weight); - //const float3 nearest = params.camera.screenToCam((int)(screenPos.x),(int)(screenPos.y),d); - - //const float l = length(nearest - camPos); - if (d == (int)(camPos.z*1000.0f)) { - out(screenPos.x, screenPos.y) = input; + if (weight > 0.0f) { + accumulateOutput(out, contrib, screenPos, weighted, weight); + //out(screenPos.x, screenPos.y) = input; } } -template <typename T> +template <typename A, typename B> void ftl::cuda::dibr_attribute( - TextureObject<T> &in, + TextureObject<A> &in, TextureObject<float4> &points, // Original 3D points TextureObject<int> &depth_in, // Virtual depth map - TextureObject<T> &out, // Accumulated output + TextureObject<B> &out, // Accumulated output + TextureObject<float> &contrib, SplatParams ¶ms, cudaStream_t stream) { const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); @@ -355,115 +419,68 @@ void ftl::cuda::dibr_attribute( in, points, depth_in, - out, + out, + contrib, params ); cudaSafeCall( cudaGetLastError() ); } -template void ftl::cuda::dibr_attribute<uchar4>( +template void ftl::cuda::dibr_attribute( ftl::cuda::TextureObject<uchar4> &in, // Original colour image ftl::cuda::TextureObject<float4> &points, // Original 3D points ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map - ftl::cuda::TextureObject<uchar4> &out, // Accumulated output + ftl::cuda::TextureObject<float4> &out, // Accumulated output + ftl::cuda::TextureObject<float> &contrib, ftl::render::SplatParams ¶ms, cudaStream_t stream); -template void ftl::cuda::dibr_attribute<float>( +template void ftl::cuda::dibr_attribute( ftl::cuda::TextureObject<float> &in, // Original colour image ftl::cuda::TextureObject<float4> &points, // Original 3D points ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map ftl::cuda::TextureObject<float> &out, // Accumulated output + ftl::cuda::TextureObject<float> &contrib, ftl::render::SplatParams ¶ms, cudaStream_t stream); -template void ftl::cuda::dibr_attribute<float4>( +template void ftl::cuda::dibr_attribute( ftl::cuda::TextureObject<float4> &in, // Original colour image ftl::cuda::TextureObject<float4> &points, // Original 3D points ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map ftl::cuda::TextureObject<float4> &out, // Accumulated output + ftl::cuda::TextureObject<float> &contrib, ftl::render::SplatParams ¶ms, cudaStream_t stream); //============================================================================== -/*__global__ void dibr_normalise_kernel( - TextureObject<float4> colour_in, - TextureObject<uchar4> colour_out, - //TextureObject<float4> normals, - TextureObject<float> contribs) { - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < colour_in.width() && y < colour_in.height()) { - const float4 colour = colour_in.tex2D((int)x,(int)y); - //const float4 normal = normals.tex2D((int)x,(int)y); - const float contrib = contribs.tex2D((int)x,(int)y); - - if (contrib > 0.0f) { - colour_out(x,y) = make_uchar4(colour.x / contrib, colour.y / contrib, colour.z / contrib, 0); - //normals(x,y) = normal / contrib; - } - } -} - -__global__ void dibr_normalise_kernel( - TextureObject<float4> colour_in, - TextureObject<float> colour_out, - //TextureObject<float4> normals, - TextureObject<float> contribs) { - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < colour_in.width() && y < colour_in.height()) { - const float4 colour = colour_in.tex2D((int)x,(int)y); - //const float4 normal = normals.tex2D((int)x,(int)y); - const float contrib = contribs.tex2D((int)x,(int)y); - - if (contrib > 0.0f) { - colour_out(x,y) = colour.x / contrib; - //normals(x,y) = normal / contrib; - } - } -} - +template <typename A, typename B> __global__ void dibr_normalise_kernel( - TextureObject<float4> colour_in, - TextureObject<float4> colour_out, - //TextureObject<float4> normals, + TextureObject<A> in, + TextureObject<B> out, TextureObject<float> contribs) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - if (x < colour_in.width() && y < colour_in.height()) { - const float4 colour = colour_in.tex2D((int)x,(int)y); + if (x < in.width() && y < in.height()) { + const A a = in.tex2D((int)x,(int)y); //const float4 normal = normals.tex2D((int)x,(int)y); const float contrib = contribs.tex2D((int)x,(int)y); if (contrib > 0.0f) { - colour_out(x,y) = make_float4(colour.x / contrib, colour.y / contrib, colour.z / contrib, 0); + out(x,y) = make<B>(a / contrib); //normals(x,y) = normal / contrib; } } } -void ftl::cuda::dibr_normalise(TextureObject<float4> &colour_in, TextureObject<uchar4> &colour_out, TextureObject<float> &contribs, cudaStream_t stream) { - const dim3 gridSize((colour_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); +template <typename A, typename B> +void ftl::cuda::dibr_normalise(TextureObject<A> &in, TextureObject<B> &out, TextureObject<float> &contribs, cudaStream_t stream) { + const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(colour_in, colour_out, contribs); + dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(in, out, contribs); cudaSafeCall( cudaGetLastError() ); } -void ftl::cuda::dibr_normalise(TextureObject<float4> &colour_in, TextureObject<float> &colour_out, TextureObject<float> &contribs, cudaStream_t stream) { - const dim3 gridSize((colour_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(colour_in, colour_out, contribs); - cudaSafeCall( cudaGetLastError() ); -} - -void ftl::cuda::dibr_normalise(TextureObject<float4> &colour_in, TextureObject<float4> &colour_out, TextureObject<float> &contribs, cudaStream_t stream) { - const dim3 gridSize((colour_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(colour_in, colour_out, contribs); - cudaSafeCall( cudaGetLastError() ); -}*/ +template void ftl::cuda::dibr_normalise<float4,uchar4>(TextureObject<float4> &in, TextureObject<uchar4> &out, TextureObject<float> &contribs, cudaStream_t stream); +template void ftl::cuda::dibr_normalise<float,float>(TextureObject<float> &in, TextureObject<float> &out, TextureObject<float> &contribs, cudaStream_t stream); +template void ftl::cuda::dibr_normalise<float4,float4>(TextureObject<float4> &in, TextureObject<float4> &out, TextureObject<float> &contribs, cudaStream_t stream); diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp index 1888a586720d388567a6c4d48f4a29b6b50848ac..684b0de5597f79ab8df78188d6552c08b5734c9a 100644 --- a/components/renderers/cpp/src/splatter_cuda.hpp +++ b/components/renderers/cpp/src/splatter_cuda.hpp @@ -17,19 +17,28 @@ namespace cuda { template <typename T> void splat( ftl::cuda::TextureObject<float4> &normals, + ftl::cuda::TextureObject<float> &density, ftl::cuda::TextureObject<T> &colour_in, ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map ftl::cuda::TextureObject<float> &depth_out, ftl::cuda::TextureObject<T> &colour_out, const ftl::render::SplatParams ¶ms, cudaStream_t stream); - template <typename T> + template <typename A, typename B> void dibr_attribute( - ftl::cuda::TextureObject<T> &in, // Original colour image + ftl::cuda::TextureObject<A> &in, // Original colour image ftl::cuda::TextureObject<float4> &points, // Original 3D points ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map - ftl::cuda::TextureObject<T> &out, // Accumulated output + ftl::cuda::TextureObject<B> &out, // Accumulated output + ftl::cuda::TextureObject<float> &contrib, ftl::render::SplatParams ¶ms, cudaStream_t stream); + + template <typename A, typename B> + void dibr_normalise( + ftl::cuda::TextureObject<A> &in, + ftl::cuda::TextureObject<B> &out, + ftl::cuda::TextureObject<float> &contribs, + cudaStream_t stream); } } diff --git a/components/rgbd-sources/include/ftl/rgbd/channels.hpp b/components/rgbd-sources/include/ftl/rgbd/channels.hpp index 467df0b1eead1518acab99306a0729aa5fcc5849..e00647271574584a1305fc86b41491a93e88a88a 100644 --- a/components/rgbd-sources/include/ftl/rgbd/channels.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/channels.hpp @@ -25,6 +25,7 @@ enum struct Channel : int { Flow, // 32F Energy, // 32F Mask, // 32U + Density, // 32F LeftGray, RightGray, Overlay1 @@ -106,6 +107,7 @@ inline bool isFloatChannel(ftl::rgbd::Channel chan) { case Channel::Depth : //case Channel::Normals : case Channel::Confidence: + case Channel::Density: case Channel::Energy : return true; default : return false; } diff --git a/components/rgbd-sources/include/ftl/rgbd/frame.hpp b/components/rgbd-sources/include/ftl/rgbd/frame.hpp index 338674745392a1c180b65af9c8a1d35d37d1c916..45bf3f4a9b8f862e3d70c22abfcddb096855d7c2 100644 --- a/components/rgbd-sources/include/ftl/rgbd/frame.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/frame.hpp @@ -181,12 +181,13 @@ template <> cv::cuda::GpuMat &Frame::create(ftl::rgbd::Channel c); template <typename T> ftl::cuda::TextureObject<T> &Frame::getTexture(ftl::rgbd::Channel c) { - if (!channels_.has(c)) throw ftl::exception("Texture channel does not exist"); + if (!channels_.has(c)) throw ftl::exception(ftl::Formatter() << "Texture channel does not exist: " << (int)c); if (!gpu_.has(c)) throw ftl::exception("Texture channel is not on GPU"); auto &m = _get(c); if (m.tex.cvType() != ftl::traits::OpenCVType<T>::value || m.tex.width() != m.gpu.cols || m.tex.height() != m.gpu.rows || m.gpu.type() != m.tex.cvType()) { + LOG(ERROR) << "Texture has not been created for channel = " << (int)c; throw ftl::exception("Texture has not been created properly for this channel"); }