Newer
Older
#include <ftl/operators/mask_cuda.hpp>
#define T_PER_BLOCK 8
using ftl::cuda::Mask;
/* OLD VERSION */
__global__ void discontinuity_kernel(ftl::cuda::TextureObject<uint8_t> mask_out,
ftl::cuda::TextureObject<uchar4> support,
ftl::cuda::TextureObject<float> depth,
const cv::Size size, const double minDepth, const double maxDepth,
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
Mask mask(0);
const float d = depth.tex2D((int)x, (int)y);
// Multiples of pixel size at given depth
//const float threshold = (depthCoef / ((depthCoef / d) - (radius+disconDisparities-1))) - d;
const float threshold = depthCoef * d; // Where depthCoef = 1 / focal * N, N = number of pixel distances equal to a discon.
if (d > minDepth && d < maxDepth) {
/* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. */
// If colour cross support region terminates within the requested
// radius, and the absolute depth difference on the other side is
// greater than threshold, then is is a discontinuity.
// Repeat for left, right, up and down.
const uchar4 sup = support.tex2D((int)x, (int)y);
if (sup.x <= radius) {
float dS = depth.tex2D((int)x - sup.x - radius, (int)y);
if (fabs(dS - d) > threshold) mask.isDiscontinuity(true);
}
if (sup.y <= radius) {
float dS = depth.tex2D((int)x + sup.y + radius, (int)y);
if (fabs(dS - d) > threshold) mask.isDiscontinuity(true);
}
if (sup.z <= radius) {
float dS = depth.tex2D((int)x, (int)y - sup.z - radius);
if (fabs(dS - d) > threshold) mask.isDiscontinuity(true);
}
if (sup.w <= radius) {
float dS = depth.tex2D((int)x, (int)y + sup.w + radius);
if (fabs(dS - d) > threshold) mask.isDiscontinuity(true);
}
// FIXME: The above results in a cross formation, need to test all 8 directions
/* New / Current version */
__global__ void discontinuity_kernel(ftl::cuda::TextureObject<uint8_t> mask_out,
ftl::cuda::TextureObject<uchar4> support,
ftl::cuda::TextureObject<float> depth,
const cv::Size size, const double minDepth, const double maxDepth,
float depthCoef, float discon_thresh, float noise_thresh, float area_max) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < size.width && y < size.height) {
Mask mask(0);
const float d = depth.tex2D((int)x, (int)y);
// Multiples of pixel size at given depth
//const float threshold = (depthCoef / ((depthCoef / d) - (radius+disconDisparities-1))) - d;
const float threshold = depthCoef * d; // Where depthCoef = 1 / focal * N, N = number of pixel distances equal to a discon.
if (d > minDepth && d < maxDepth) {
/* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time.
* This paper just says to remove values around discontinuities. */
// Find max change in depth gradient in each direction
const float g1 = fabsf((depth.tex2D(x-1, y) - d) - (d - depth.tex2D(x+1,y)));
const float g2 = fabsf((depth.tex2D(x, y-1) - d) - (d - depth.tex2D(x,y+1)));
const float g3 = fabsf((depth.tex2D(x-1, y-1) - d) - (d - depth.tex2D(x+1,y+1)));
const float g4 = fabsf((depth.tex2D(x+1, y-1) - d) - (d - depth.tex2D(x-1,y+1)));
const float g = max(g1,max(g2,(max(g3,g4))));
// Calculate support window area
//const uchar4 sup = support.tex2D((int)x, (int)y);
const uchar4 sup = getScaledTex2D(x, y, support, depth);
const float supx = min(sup.x,sup.y);
const float supy = min(sup.z,sup.w);
const float area = supx * supy;
float grad_weight = min(1.0f, g / threshold);
float area_weight = min(1.0f, area / area_max);
if (grad_weight * (1.0f - area_weight) > discon_thresh) mask.isDiscontinuity(true);
if (grad_weight * (area_weight) > noise_thresh) mask.isNoise(true);
}
mask_out(x,y) = (int)mask;
}
}
void ftl::cuda::discontinuity( ftl::cuda::TextureObject<uint8_t> &mask_out, ftl::cuda::TextureObject<uchar4> &support,
ftl::cuda::TextureObject<float> &depth,
const cv::Size size, const double minDepth, const double maxDepth,
int discon, float depthCoef, cudaStream_t stream) {
const dim3 gridSize((size.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (size.height + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
discontinuity_kernel<<<gridSize, blockSize, 0, stream>>>(mask_out, support, depth, size, minDepth, maxDepth, depthCoef, discon);
cudaSafeCall( cudaGetLastError() );
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
#endif
}
void ftl::cuda::discontinuity( ftl::cuda::TextureObject<uint8_t> &mask_out, ftl::cuda::TextureObject<uchar4> &support,
ftl::cuda::TextureObject<float> &depth,
const cv::Size size, const double minDepth, const double maxDepth,
float depthCoef, float discon_thresh, float noise_thresh, float area_max, cudaStream_t stream) {
const dim3 gridSize((size.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (size.height + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
discontinuity_kernel<<<gridSize, blockSize, 0, stream>>>(mask_out, support, depth, size, minDepth, maxDepth, depthCoef, discon_thresh, noise_thresh, area_max);
cudaSafeCall( cudaGetLastError() );
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
#endif
}
// =============================================================================
template <int RADIUS>
__global__ void cull_mask_kernel(ftl::cuda::TextureObject<uint8_t> mask, ftl::cuda::TextureObject<float> depth, uint8_t id) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < depth.width()-RADIUS && y < depth.height()-RADIUS) {
bool isdiscon = false;
#pragma unroll
for (int v=-RADIUS; v<=RADIUS; ++v) {
#pragma unroll
for (int u=-RADIUS; u<=RADIUS; ++u) {
Mask m(mask.tex2D((int)x+u,(int)y+v));
isdiscon = isdiscon || m.is(id);
}
}
if (isdiscon) {
depth(x,y) = 0.0f;
}
void ftl::cuda::cull_mask(ftl::cuda::TextureObject<uint8_t> &mask, ftl::cuda::TextureObject<float> &depth, uint8_t id, unsigned int radius, cudaStream_t stream) {
const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
switch (radius) {
case 0 : cull_mask_kernel<0><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break;
case 1 : cull_mask_kernel<1><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break;
case 2 : cull_mask_kernel<2><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break;
case 3 : cull_mask_kernel<3><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break;
case 4 : cull_mask_kernel<4><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break;
case 5 : cull_mask_kernel<5><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break;
default: break;
cudaSafeCall( cudaGetLastError() );
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
#endif
}