#include <ftl/cuda/points.hpp> #define T_PER_BLOCK 8 template <int RADIUS> __global__ void point_cloud_kernel(ftl::cuda::TextureObject<float4> output, ftl::cuda::TextureObject<float> depth, ftl::rgbd::Camera params, float4x4 pose) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; if (x < params.width && y < params.height) { output(x,y) = make_float4(MINF, MINF, MINF, MINF); const float d = depth.tex2D((int)x, (int)y); // Calculate depth between 0.0 and 1.0 float p = (d - params.minDepth) / (params.maxDepth - params.minDepth); if (d >= params.minDepth && d <= params.maxDepth) { /* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. */ // Is there a discontinuity nearby? for (int u=-RADIUS; u<=RADIUS; ++u) { for (int v=-RADIUS; v<=RADIUS; ++v) { // If yes, the flag using w = -1 if (fabs(depth.tex2D((int)x+u, (int)y+v) - d) > 0.1f) p = -1.0f; } } output(x,y) = make_float4(pose * params.screenToCam(x, y, d), p); } } } template <> __global__ void point_cloud_kernel<0>(ftl::cuda::TextureObject<float4> output, ftl::cuda::TextureObject<float> depth, ftl::rgbd::Camera params, float4x4 pose) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; if (x < params.width && y < params.height) { output(x,y) = make_float4(MINF, MINF, MINF, MINF); float d = depth.tex2D((int)x, (int)y); if (d >= params.minDepth && d <= params.maxDepth) { output(x,y) = make_float4(pose * params.screenToCam(x, y, d), d); } } } void ftl::cuda::point_cloud(ftl::cuda::TextureObject<float4> &output, ftl::cuda::TextureObject<float> &depth, const ftl::rgbd::Camera ¶ms, const float4x4 &pose, uint discon, cudaStream_t stream) { const dim3 gridSize((params.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (params.height + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); switch (discon) { case 4 : point_cloud_kernel<4><<<gridSize, blockSize, 0, stream>>>(output, depth, params, pose); break; case 3 : point_cloud_kernel<3><<<gridSize, blockSize, 0, stream>>>(output, depth, params, pose); break; case 2 : point_cloud_kernel<2><<<gridSize, blockSize, 0, stream>>>(output, depth, params, pose); break; case 1 : point_cloud_kernel<1><<<gridSize, blockSize, 0, stream>>>(output, depth, params, pose); break; default: point_cloud_kernel<0><<<gridSize, blockSize, 0, stream>>>(output, depth, params, pose); } cudaSafeCall( cudaGetLastError() ); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); #endif } //============================================================================== __device__ bool isClipped(const float4 &p, const ftl::cuda::ClipSpace &clip) { const float3 tp = clip.origin * make_float3(p); return fabs(tp.x) > clip.size.x || fabs(tp.y) > clip.size.y || fabs(tp.z) > clip.size.z; } __global__ void clipping_kernel(ftl::cuda::TextureObject<float4> points, ftl::cuda::ClipSpace clip) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; if (x < points.width() && y < points.height()) { float4 p = points(x,y); if (isClipped(p, clip)) { points(x,y) = make_float4(MINF, MINF, MINF, MINF); } } } __global__ void clipping_kernel(ftl::cuda::TextureObject<float> depth, ftl::rgbd::Camera camera, ftl::cuda::ClipSpace clip) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; if (x < depth.width() && y < depth.height()) { float d = depth(x,y); float4 p = make_float4(camera.screenToCam(x,y,d), 0.0f); if (isClipped(p, clip)) { depth(x,y) = 0.0f; } } } void ftl::cuda::clipping(ftl::cuda::TextureObject<float4> &points, const ClipSpace &clip, cudaStream_t stream) { const dim3 gridSize((points.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (points.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); clipping_kernel<<<gridSize, blockSize, 0, stream>>>(points, clip); cudaSafeCall( cudaGetLastError() ); } void ftl::cuda::clipping(ftl::cuda::TextureObject<float> &depth, const ftl::rgbd::Camera &camera, const ClipSpace &clip, cudaStream_t stream) { const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); clipping_kernel<<<gridSize, blockSize, 0, stream>>>(depth, camera, clip); cudaSafeCall( cudaGetLastError() ); }