diff --git a/components/renderers/cpp/src/CUDARender.cpp b/components/renderers/cpp/src/CUDARender.cpp index c573d23b1fdcf0f035be865883213b8c842f9a27..d44a8cfd102b4a1bf90d250ded75a543f09e01b2 100644 --- a/components/renderers/cpp/src/CUDARender.cpp +++ b/components/renderers/cpp/src/CUDARender.cpp @@ -386,8 +386,8 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre auto transform = pose_ * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>()); ftl::cuda::reverse_verify( - out.getTexture<float>(_getDepthChannel()), - f.getTexture<float>(Channel::Depth), + out.create<cv::cuda::GpuMat>(_getDepthChannel()), + f.get<cv::cuda::GpuMat>(Channel::Depth), transformR, transform, params_.camera, diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp index e8be62b96f28baa390823575ad8bcd3572a6a278..aee5d0bc7dab2357c34713ec4eb628d141b0f248 100644 --- a/components/renderers/cpp/src/splatter_cuda.hpp +++ b/components/renderers/cpp/src/splatter_cuda.hpp @@ -171,8 +171,8 @@ namespace cuda { float factor, cudaStream_t stream); void reverse_verify( - ftl::cuda::TextureObject<float> &depth_in, - ftl::cuda::TextureObject<float> &depth_original, + cv::cuda::GpuMat &depth_in, + const cv::cuda::GpuMat &depth_original, const float4x4 &transformR, const float4x4 &transform, const ftl::rgbd::Camera &vintrin, diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu index 6bc257aa01a347f642249234c6503da9b69ffa87..ff20436a95fe10f27fe9b881146a7ffe6f3e4d91 100644 --- a/components/renderers/cpp/src/triangle_render.cu +++ b/components/renderers/cpp/src/triangle_render.cu @@ -227,8 +227,10 @@ void ftl::cuda::merge_convert_depth(TextureObject<int> &depth_in, TextureObject< // ==== Reverse Verify Result ================================================== __global__ void reverse_check_kernel( - TextureObject<float> depth_in, - TextureObject<float> depth_original, + float* __restrict__ depth_in, + const float* __restrict__ depth_original, + int pitch4, + int opitch4, float4x4 transformR, float4x4 transform, ftl::rgbd::Camera vintrin, @@ -237,9 +239,9 @@ __global__ void reverse_check_kernel( const int x = blockIdx.x*blockDim.x + threadIdx.x; const int y = blockIdx.y*blockDim.y + threadIdx.y; - if (x < 0 || x >= depth_in.width() || y < 0 || y >= depth_in.height()) return; + if (x < 0 || x >= vintrin.width || y < 0 || y >= vintrin.height) return; - float d = depth_in.tex2D(x,y); + float d = depth_in[y*pitch4+x]; // FIXME: This is dangerous, need to check through alternates instead while (true) { @@ -249,26 +251,23 @@ __global__ void reverse_check_kernel( int oy = spos.y; if (campos.z > 0.0f && ox >= 0 && ox < ointrin.width && oy >= 0 && oy < ointrin.height) { - float d2 = depth_original.tex2D(ox,oy); - - if (!(d2 < ointrin.maxDepth && d2 - campos.z > d2*0.001f)) { - //printf("Original %f, %f\n", d2, campos.z); - //depth_in(x,y) = 1.5f; //(transform * ointrin.screenToCam(ox,oy,d2)).z; - //d = 0.0f; - break; - } - d += 0.002f; + float d2 = depth_original[oy*opitch4+ox]; + if (!(d2 < ointrin.maxDepth && d2 - campos.z > d2*0.001f)) break; + d += 0.001f; } else break; } - depth_in(x,y) = d; + depth_in[y*pitch4+x] = d; } -void ftl::cuda::reverse_verify(TextureObject<float> &depth_in, TextureObject<float> &depth_original, const float4x4 &transformR, const float4x4 &transform, const ftl::rgbd::Camera &vintrin, const ftl::rgbd::Camera &ointrin, cudaStream_t stream) { - const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); +void ftl::cuda::reverse_verify(cv::cuda::GpuMat &depth_in, const cv::cuda::GpuMat &depth_original, const float4x4 &transformR, const float4x4 &transform, const ftl::rgbd::Camera &vintrin, const ftl::rgbd::Camera &ointrin, cudaStream_t stream) { + static constexpr int THREADS_X = 8; + static constexpr int THREADS_Y = 8; + + const dim3 gridSize((depth_in.cols + THREADS_X - 1)/THREADS_X, (depth_in.rows + THREADS_Y - 1)/THREADS_Y); + const dim3 blockSize(THREADS_X, THREADS_Y); - reverse_check_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_original, transformR, transform, vintrin, ointrin); + reverse_check_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in.ptr<float>(), depth_original.ptr<float>(), depth_in.step1(), depth_original.step1(), transformR, transform, vintrin, ointrin); cudaSafeCall( cudaGetLastError() ); }