diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp index c81977cf38a4380df4143d2cd8d0609c69de8897..53b9f675e9f26144e9f2d31fff9c889b3ef58720 100644 --- a/components/renderers/cpp/src/splatter_cuda.hpp +++ b/components/renderers/cpp/src/splatter_cuda.hpp @@ -117,6 +117,11 @@ namespace cuda { ftl::cuda::TextureObject<uchar4> &colour, ftl::cuda::TextureObject<int> &mask, int id, uchar4 style, cudaStream_t stream); + + void merge_convert_depth( + ftl::cuda::TextureObject<int> &d1, + ftl::cuda::TextureObject<float> &d2, + float factor, cudaStream_t stream); } } diff --git a/components/renderers/cpp/src/tri_render.cpp b/components/renderers/cpp/src/tri_render.cpp index 5d38742cf979575fd24388689b6cb5899821af65..05edcb2efab4aa2ea124ca167175c852864b2c86 100644 --- a/components/renderers/cpp/src/tri_render.cpp +++ b/components/renderers/cpp/src/tri_render.cpp @@ -374,7 +374,8 @@ void Triangular::_mesh(ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream } // Convert from int depth to float depth - temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 100000.0f, cvstream); + //temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 100000.0f, cvstream); + ftl::cuda::merge_convert_depth(temp_.getTexture<int>(Channel::Depth2), out.createTexture<float>(Channel::Depth), 1.0f / 100000.0f, stream_); //filters_->filter(out, src, stream); @@ -470,25 +471,8 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { //scene_->upload(Channel::Colour + Channel::Depth, stream_); const auto &camera = src->parameters(); - //cudaSafeCall(cudaSetDevice(scene_->getCUDADevice())); - - // Create all the required channels - - out.create<GpuMat>(Channel::Depth, Format<float>(camera.width, camera.height)); - out.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height)); - out.createTexture<uchar4>(Channel::Colour, true); // Force interpolated colour - - - if (scene_->frames.size() == 0) return false; - auto &g = scene_->frames[0].get<GpuMat>(Channel::Colour); - - temp_.create<GpuMat>(Channel::Colour, Format<float4>(camera.width, camera.height)); - temp_.create<GpuMat>(Channel::Contribution, Format<float>(camera.width, camera.height)); - temp_.create<GpuMat>(Channel::Depth, Format<int>(camera.width, camera.height)); - temp_.create<GpuMat>(Channel::Depth2, Format<int>(camera.width, camera.height)); - temp_.create<GpuMat>(Channel::Normals, Format<float4>(camera.width, camera.height)); //g.cols, g.rows)); - cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream_); + //cudaSafeCall(cudaSetDevice(scene_->getCUDADevice())); // Parameters object to pass to CUDA describing the camera SplatParams ¶ms = params_; @@ -500,20 +484,36 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { params.m_viewMatrix = MatrixConversion::toCUDA(src->getPose().cast<float>().inverse()); params.m_viewMatrixInverse = MatrixConversion::toCUDA(src->getPose().cast<float>()); params.camera = camera; - // Clear all channels to 0 or max depth - out.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(1000.0f), cvstream); + // Create all the required channels + + if (!out.hasChannel(Channel::Depth)) { + out.create<GpuMat>(Channel::Depth, Format<float>(camera.width, camera.height)); + out.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height)); + out.createTexture<uchar4>(Channel::Colour, true); // Force interpolated colour - if (env_image_.empty() || !value("environment_enabled", false)) { - out.get<GpuMat>(Channel::Colour).setTo(background_, cvstream); - } else { - auto pose = params.m_viewMatrixInverse.getFloat3x3(); - ftl::cuda::equirectangular_reproject( - env_tex_, - out.createTexture<uchar4>(Channel::Colour, true), - camera, pose, stream_); + out.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(1000.0f), cvstream); + + if (env_image_.empty() || !value("environment_enabled", false)) { + out.get<GpuMat>(Channel::Colour).setTo(background_, cvstream); + } else { + auto pose = params.m_viewMatrixInverse.getFloat3x3(); + ftl::cuda::equirectangular_reproject( + env_tex_, + out.createTexture<uchar4>(Channel::Colour, true), + camera, pose, stream_); + } } + if (scene_->frames.size() == 0) return false; + auto &g = scene_->frames[0].get<GpuMat>(Channel::Colour); + + temp_.create<GpuMat>(Channel::Colour, Format<float4>(camera.width, camera.height)); + temp_.create<GpuMat>(Channel::Contribution, Format<float>(camera.width, camera.height)); + temp_.create<GpuMat>(Channel::Depth, Format<int>(camera.width, camera.height)); + temp_.create<GpuMat>(Channel::Depth2, Format<int>(camera.width, camera.height)); + temp_.create<GpuMat>(Channel::Normals, Format<float4>(camera.width, camera.height)); //g.cols, g.rows)); + //LOG(INFO) << "Render ready: " << camera.width << "," << camera.height; bool show_discon = value("show_discontinuity_mask", false); diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu index 7311e50b9bbbd7b7ba78f106e360998b937646d8..2e1966c4252fcf751639cc70a94dbfbdccd43b3b 100644 --- a/components/renderers/cpp/src/triangle_render.cu +++ b/components/renderers/cpp/src/triangle_render.cu @@ -165,6 +165,30 @@ void ftl::cuda::triangle_render1(TextureObject<float> &depth_in, TextureObject<i cudaSafeCall( cudaGetLastError() ); } +// ==== Merge convert =========== + +__global__ void merge_convert_kernel( + TextureObject<int> depth_in, + TextureObject<float> depth_out, + float alpha) { + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < 0 || x >= depth_in.width() || y < 0 || y >= depth_in.height()) return; + + float a = float(depth_in.tex2D(x,y))*alpha; + float b = depth_out.tex2D(x,y); + depth_out(x,y) = min(a,b); +} + +void ftl::cuda::merge_convert_depth(TextureObject<int> &depth_in, TextureObject<float> &depth_out, float alpha, cudaStream_t stream) { + const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + merge_convert_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, alpha); + cudaSafeCall( cudaGetLastError() ); +} + // ==== BLENDER ======== /*