#include <ftl/operators/mask_cuda.hpp> #define T_PER_BLOCK 8 using ftl::cuda::Mask; /* OLD VERSION */ __global__ void discontinuity_kernel(ftl::cuda::TextureObject<uint8_t> mask_out, ftl::cuda::TextureObject<uchar4> support, ftl::cuda::TextureObject<float> depth, const cv::Size size, const double minDepth, const double maxDepth, float depthCoef, int radius) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; if (x < size.width && y < size.height) { Mask mask(0); const float d = depth.tex2D((int)x, (int)y); // Multiples of pixel size at given depth //const float threshold = (depthCoef / ((depthCoef / d) - (radius+disconDisparities-1))) - d; const float threshold = depthCoef * d; // Where depthCoef = 1 / focal * N, N = number of pixel distances equal to a discon. if (d > minDepth && d < maxDepth) { /* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. */ // If colour cross support region terminates within the requested // radius, and the absolute depth difference on the other side is // greater than threshold, then is is a discontinuity. // Repeat for left, right, up and down. const uchar4 sup = support.tex2D((int)x, (int)y); if (sup.x <= radius) { float dS = depth.tex2D((int)x - sup.x - radius, (int)y); if (fabs(dS - d) > threshold) mask.isDiscontinuity(true); } if (sup.y <= radius) { float dS = depth.tex2D((int)x + sup.y + radius, (int)y); if (fabs(dS - d) > threshold) mask.isDiscontinuity(true); } if (sup.z <= radius) { float dS = depth.tex2D((int)x, (int)y - sup.z - radius); if (fabs(dS - d) > threshold) mask.isDiscontinuity(true); } if (sup.w <= radius) { float dS = depth.tex2D((int)x, (int)y + sup.w + radius); if (fabs(dS - d) > threshold) mask.isDiscontinuity(true); } // FIXME: The above results in a cross formation, need to test all 8 directions } mask_out(x,y) = (int)mask; } } /* New / Current version */ __global__ void discontinuity_kernel(ftl::cuda::TextureObject<uint8_t> mask_out, ftl::cuda::TextureObject<uchar4> support, ftl::cuda::TextureObject<float> depth, const cv::Size size, const double minDepth, const double maxDepth, float depthCoef, float discon_thresh, float noise_thresh, float area_max) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; if (x < size.width && y < size.height) { Mask mask(0); const float d = depth.tex2D((int)x, (int)y); // Multiples of pixel size at given depth //const float threshold = (depthCoef / ((depthCoef / d) - (radius+disconDisparities-1))) - d; const float threshold = depthCoef * d; // Where depthCoef = 1 / focal * N, N = number of pixel distances equal to a discon. if (d > minDepth && d < maxDepth) { /* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. * This paper just says to remove values around discontinuities. */ // Find max change in depth gradient in each direction const float g1 = fabsf((depth.tex2D(x-1, y) - d) - (d - depth.tex2D(x+1,y))); const float g2 = fabsf((depth.tex2D(x, y-1) - d) - (d - depth.tex2D(x,y+1))); const float g3 = fabsf((depth.tex2D(x-1, y-1) - d) - (d - depth.tex2D(x+1,y+1))); const float g4 = fabsf((depth.tex2D(x+1, y-1) - d) - (d - depth.tex2D(x-1,y+1))); const float g = max(g1,max(g2,(max(g3,g4)))); // Calculate support window area //const uchar4 sup = support.tex2D((int)x, (int)y); const uchar4 sup = getScaledTex2D(x, y, support, depth); const float supx = min(sup.x,sup.y); const float supy = min(sup.z,sup.w); const float area = supx * supy; float grad_weight = min(1.0f, g / threshold); float area_weight = min(1.0f, area / area_max); if (grad_weight * (1.0f - area_weight) > discon_thresh) mask.isDiscontinuity(true); if (grad_weight * (area_weight) > noise_thresh) mask.isNoise(true); } mask_out(x,y) = (int)mask; } } void ftl::cuda::discontinuity( ftl::cuda::TextureObject<uint8_t> &mask_out, ftl::cuda::TextureObject<uchar4> &support, ftl::cuda::TextureObject<float> &depth, const cv::Size size, const double minDepth, const double maxDepth, int discon, float depthCoef, cudaStream_t stream) { const dim3 gridSize((size.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (size.height + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); discontinuity_kernel<<<gridSize, blockSize, 0, stream>>>(mask_out, support, depth, size, minDepth, maxDepth, depthCoef, discon); cudaSafeCall( cudaGetLastError() ); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); #endif } void ftl::cuda::discontinuity( ftl::cuda::TextureObject<uint8_t> &mask_out, ftl::cuda::TextureObject<uchar4> &support, ftl::cuda::TextureObject<float> &depth, const cv::Size size, const double minDepth, const double maxDepth, float depthCoef, float discon_thresh, float noise_thresh, float area_max, cudaStream_t stream) { const dim3 gridSize((size.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (size.height + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); discontinuity_kernel<<<gridSize, blockSize, 0, stream>>>(mask_out, support, depth, size, minDepth, maxDepth, depthCoef, discon_thresh, noise_thresh, area_max); cudaSafeCall( cudaGetLastError() ); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); #endif } // ============================================================================= template <int RADIUS> __global__ void cull_mask_kernel(ftl::cuda::TextureObject<uint8_t> mask, ftl::cuda::TextureObject<float> depth, uint8_t id) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; if (x < depth.width()-RADIUS && y < depth.height()-RADIUS) { bool isdiscon = false; #pragma unroll for (int v=-RADIUS; v<=RADIUS; ++v) { #pragma unroll for (int u=-RADIUS; u<=RADIUS; ++u) { Mask m(mask.tex2D((int)x+u,(int)y+v)); isdiscon = isdiscon || m.is(id); } } if (isdiscon) { depth(x,y) = 0.0f; } } } void ftl::cuda::cull_mask(ftl::cuda::TextureObject<uint8_t> &mask, ftl::cuda::TextureObject<float> &depth, uint8_t id, unsigned int radius, cudaStream_t stream) { const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); switch (radius) { case 0 : cull_mask_kernel<0><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; case 1 : cull_mask_kernel<1><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; case 2 : cull_mask_kernel<2><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; case 3 : cull_mask_kernel<3><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; case 4 : cull_mask_kernel<4><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; case 5 : cull_mask_kernel<5><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; default: break; } cudaSafeCall( cudaGetLastError() ); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); #endif }