Newer
Older
#include <ftl/render/splat_params.hpp>
#include "splatter_cuda.hpp"
#include <ftl/rgbd/camera.hpp>
#include <ftl/cuda_common.hpp>
#include <ftl/cuda/intersections.hpp>
#include <ftl/cuda/warp.hpp>
#define T_PER_BLOCK 8
#define UPSAMPLE_FACTOR 1.8f
#define WARP_SIZE 32
#define DEPTH_THRESHOLD 0.05f
#define UPSAMPLE_MAX 60
#define MAX_ITERATIONS 32 // Note: Must be multiple of 32
#define SPATIAL_SMOOTHING 0.005f
#define ENERGY_THRESHOLD 0.1f
#define SMOOTHING_MULTIPLIER_A 10.0f // For surface search
#define SMOOTHING_MULTIPLIER_B 4.0f // For z contribution
#define SMOOTHING_MULTIPLIER_C 2.0f // For colour contribution
#define ACCUM_DIAMETER 8
using ftl::cuda::TextureObject;
using ftl::render::SplatParams;
using ftl::cuda::warpMin;
using ftl::cuda::warpSum;
/*
* Pass 1: Directly render each camera into virtual view but with no upsampling
* for sparse points.
*/
template <bool CULLING>
__global__ void dibr_merge_kernel(TextureObject<float4> points,
TextureObject<float4> normals,
TextureObject<int> depth, SplatParams params) {
const int x = blockIdx.x*blockDim.x + threadIdx.x;
const int y = blockIdx.y*blockDim.y + threadIdx.y;
const float4 worldPos = points.tex2D(x, y);
if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return;
// Compile time enable/disable of culling back facing points
if (CULLING) {
float3 ray = params.m_viewMatrixInverse.getFloat3x3() * params.camera.screenToCam(x,y,1.0f);
ray = ray / length(ray);
float3 n = make_float3(normals.tex2D((int)x,(int)y));
float l = length(n);
if (l == 0) {
return;
}
n /= l;
const float facing = dot(ray, n);
if (facing <= 0.0f) return;
}
// Find the virtual screen position of current point
const float3 camPos = params.m_viewMatrix * make_float3(worldPos);
if (camPos.z < params.camera.minDepth) return;
if (camPos.z > params.camera.maxDepth) return;
const float d = camPos.z;
const uint2 screenPos = params.camera.camToScreen<uint2>(camPos);
const unsigned int cx = screenPos.x;
const unsigned int cy = screenPos.y;
if (d > params.camera.minDepth && d < params.camera.maxDepth && cx < depth.width() && cy < depth.height()) {
// Transform estimated point to virtual cam space and output z
atomicMin(&depth(cx,cy), d * 1000.0f);
}
}
void ftl::cuda::dibr_merge(TextureObject<float4> &points, TextureObject<float4> &normals, TextureObject<int> &depth, SplatParams params, bool culling, cudaStream_t stream) {
const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
if (culling) dibr_merge_kernel<true><<<gridSize, blockSize, 0, stream>>>(points, normals, depth, params);
else dibr_merge_kernel<false><<<gridSize, blockSize, 0, stream>>>(points, normals, depth, params);
//==============================================================================
__device__ inline float4 make_float4(const uchar4 &c) {
return make_float4(c.x,c.y,c.z,c.w);
}
__device__ inline float4 make_float4(const float4 &v) {
return v;
template <typename T>
__device__ inline T make();
template <>
__device__ inline uchar4 make() {
return make_uchar4(0,0,0,0);
}
template <>
__device__ inline float4 make() {
return make_float4(0.0f,0.0f,0.0f,0.0f);
}
template <>
__device__ inline float make() {
return 0.0f;
}
template <typename T>
__device__ inline T make(const float4 &);
template <>
__device__ inline uchar4 make(const float4 &v) {
return make_uchar4((int)v.x, (int)v.y, (int)v.z, (int)v.w);
}
template <>
__device__ inline float4 make(const float4 &v) {
return v;
}
template <>
__device__ inline float make(const float4 &v) {
return v.x;
template <typename T>
__device__ inline T make(const uchar4 &v);
template <>
__device__ inline float4 make(const uchar4 &v) {
return make_float4((float)v.x, (float)v.y, (float)v.z, (float)v.w);
}
template <typename T>
__device__ inline T make(float v);
template <>
__device__ inline float make(float v) {
return v;
}
* Pass 1b: Expand splats to full size and merge
template <int SEARCH_DIAMETER, typename T>
__global__ void splat_kernel(
//TextureObject<float4> points, // Original 3D points
TextureObject<float4> normals,
TextureObject<float> density,
TextureObject<int> depth_in, // Virtual depth map
TextureObject<float> depth_out, // Accumulated output
TextureObject<T> out,
//ftl::rgbd::Camera camera,
//float4x4 pose_inv,
SplatParams params) {
//const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
//const int warp = tid / WARP_SIZE;
const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
const int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return;
const float3 origin = params.m_viewMatrixInverse * make_float3(0.0f);
float3 ray = params.camera.screenToCam(x,y,1.0f);
ray = ray / length(ray);
const float scale = ray.z;
ray = params.m_viewMatrixInverse.getFloat3x3() * ray;
//float depth = 0.0f;
//float contrib = 0.0f;
float depth = 1000.0f;
struct Result {
float weight;
float depth;
T attr;
};
Result results[(SEARCH_DIAMETER*SEARCH_DIAMETER) / WARP_SIZE];
// Each thread in warp takes an upsample point and updates corresponding depth buffer.
const int lane = tid % WARP_SIZE;
for (int i=lane; i<SEARCH_DIAMETER*SEARCH_DIAMETER; i+=WARP_SIZE) {
const float u = (i % SEARCH_DIAMETER) - (SEARCH_DIAMETER / 2);
const float v = (i / SEARCH_DIAMETER) - (SEARCH_DIAMETER / 2);
results[i/WARP_SIZE] = {0.0f, 0.0f, make<T>()};
// Use the depth buffer to determine this pixels 3D position in camera space
const float d = ((float)depth_in.tex2D(x+u, y+v)/1000.0f);
if (d < params.camera.minDepth || d > params.camera.maxDepth) continue;
const float3 camPos = params.camera.screenToCam((int)(x)+u,(int)(y)+v,d);
const float3 camPos2 = params.camera.screenToCam((int)(x),(int)(y),d);
const float3 worldPos = params.m_viewMatrixInverse * camPos;
// Assumed to be normalised
float4 n = normals.tex2D((int)(x)+u, (int)(y)+v);
n /= length(n);
//if (length(make_float3(n)) == 0.0f) printf("BAD NORMAL\n");
// Does the ray intersect plane of splat?
float t = 1000.0f;
const float r = ftl::cuda::intersectDistance(make_float3(n), worldPos, origin, ray, t);
if (r != PINF) { //} && fabs(t-camPos.z) < 0.01f) {
// Adjust from normalised ray back to original meters units
t *= scale;
const float dens = density.tex2D((int)(x)+u, (int)(y)+v);
float weight = ftl::cuda::weighting(r, dens/params.camera.fx); // (1.0f/params.camera.fx) / (t/params.camera.fx)
/* Buehler C. et al. 2001. Unstructured Lumigraph Rendering. */
/* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. */
// This is the simple naive colour weighting. It might be good
// enough for our purposes if the alignment step prevents ghosting
// TODO: Use depth and perhaps the neighbourhood consistency in:
// Kuster C. et al. 2011. FreeCam: A hybrid camera system for interactive free-viewpoint video
if (params.m_flags & ftl::render::kNormalWeightColours) weight *= n.w * n.w;
//if (params.m_flags & ftl::render::kDepthWeightColours) weight *= ???
if (weight <= 0.0f) continue;
//depth = min(depth, t);
if (t < depth) {
pdepth = depth;
depth = t;
}
results[i/WARP_SIZE] = {weight, t, in.tex2D((int)x+u, (int)y+v)};
}
pdepth = warpMin(pdepth);
float adepth = 0.0f;
float contrib = 0.0f;
float4 attr = make_float4(0.0f);
// Loop over results array
for (int i=0; i<(SEARCH_DIAMETER*SEARCH_DIAMETER) / WARP_SIZE; ++i) {
if (results[i].depth - depth < 0.04f) {
adepth += results[i].depth * results[i].weight;
attr += make_float4(results[i].attr) * results[i].weight;
contrib += results[i].weight;
// Sum all attributes and contributions
adepth = warpSum(adepth);
attr.x = warpSum(attr.x);
attr.y = warpSum(attr.y);
attr.z = warpSum(attr.z);
contrib = warpSum(contrib);
if (lane == 0 && contrib > 0.0f) {
depth_out(x,y) = adepth / contrib;
out(x,y) = make<T>(attr / contrib);
}
template <typename T>
void ftl::cuda::splat(
TextureObject<float4> &normals,
TextureObject<float> &density,
TextureObject<int> &depth_in, // Virtual depth map
TextureObject<float> &depth_out,
TextureObject<T> &colour_out,
const SplatParams ¶ms, cudaStream_t stream) {
const dim3 gridSize((depth_in.width() + 2 - 1)/2, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK);
splat_kernel<8,T><<<gridSize, blockSize, 0, stream>>>(
normals,
colour_out,
params
);
cudaSafeCall( cudaGetLastError() );
}
template void ftl::cuda::splat<uchar4>(
TextureObject<float4> &normals,
TextureObject<float> &density,
TextureObject<int> &depth_in, // Virtual depth map
TextureObject<float> &depth_out,
TextureObject<uchar4> &colour_out,
const SplatParams ¶ms, cudaStream_t stream);
template void ftl::cuda::splat<float4>(
TextureObject<float4> &normals,
TextureObject<float> &density,
TextureObject<float4> &colour_in,
TextureObject<int> &depth_in, // Virtual depth map
TextureObject<float> &depth_out,
TextureObject<float4> &colour_out,
const SplatParams ¶ms, cudaStream_t stream);
template void ftl::cuda::splat<float>(
TextureObject<float4> &normals,
TextureObject<float> &density,
TextureObject<float> &colour_in,
TextureObject<int> &depth_in, // Virtual depth map
TextureObject<float> &depth_out,
TextureObject<float> &colour_out,
const SplatParams ¶ms, cudaStream_t stream);
//==============================================================================
template <typename T>
__device__ inline T generateInput(const T &in, const SplatParams ¶ms, const float4 &worldPos) {
return in;
}
template <>
__device__ inline uchar4 generateInput(const uchar4 &in, const SplatParams ¶ms, const float4 &worldPos) {
return (params.m_flags & ftl::render::kShowDisconMask && worldPos.w < 0.0f) ?
make_uchar4(0,0,255,255) : // Show discontinuity mask in red
in;
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
template <typename A, typename B>
__device__ inline B weightInput(const A &in, float weight) {
return in * weight;
}
template <>
__device__ inline float4 weightInput(const uchar4 &in, float weight) {
return make_float4(
(float)in.x * weight,
(float)in.y * weight,
(float)in.z * weight,
(float)in.w * weight);
}
template <typename T>
__device__ inline void accumulateOutput(TextureObject<T> &out, TextureObject<float> &contrib, const uint2 &pos, const T &in, float w) {
atomicAdd(&out(pos.x, pos.y), in);
atomicAdd(&contrib(pos.x, pos.y), w);
}
template <>
__device__ inline void accumulateOutput(TextureObject<float4> &out, TextureObject<float> &contrib, const uint2 &pos, const float4 &in, float w) {
atomicAdd((float*)&out(pos.x, pos.y), in.x);
atomicAdd(((float*)&out(pos.x, pos.y))+1, in.y);
atomicAdd(((float*)&out(pos.x, pos.y))+2, in.z);
atomicAdd(((float*)&out(pos.x, pos.y))+3, in.w);
atomicAdd(&contrib(pos.x, pos.y), w);
}
/*
* Pass 2: Accumulate attribute contributions if the points pass a visibility test.
*/
template <typename A, typename B>
TextureObject<A> in, // Attribute input
TextureObject<float4> points, // Original 3D points
TextureObject<int> depth_in, // Virtual depth map
TextureObject<B> out, // Accumulated output
TextureObject<float> contrib,
SplatParams params) {
const int x = (blockIdx.x*blockDim.x + threadIdx.x);
const int y = blockIdx.y*blockDim.y + threadIdx.y;
const float4 worldPos = points.tex2D(x, y);
if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return;
const float3 camPos = params.m_viewMatrix * make_float3(worldPos);
if (camPos.z < params.camera.minDepth) return;
if (camPos.z > params.camera.maxDepth) return;
const uint2 screenPos = params.camera.camToScreen<uint2>(camPos);
// Not on screen so stop now...
if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return;
// Is this point near the actual surface and therefore a contributor?
const float d = (float)depth_in.tex2D((int)screenPos.x, (int)screenPos.y) / 1000.0f;
const A input = generateInput(in.tex2D(x, y), params, worldPos);
const float weight = ftl::cuda::weighting(fabs(camPos.z - d), 0.02f);
const B weighted = make<B>(input) * weight; //weightInput(input, weight);
if (weight > 0.0f) {
accumulateOutput(out, contrib, screenPos, weighted, weight);
//out(screenPos.x, screenPos.y) = input;
template <typename A, typename B>
TextureObject<float4> &points, // Original 3D points
TextureObject<int> &depth_in, // Virtual depth map
TextureObject<B> &out, // Accumulated output
TextureObject<float> &contrib,
const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
dibr_attribute_contrib_kernel<<<gridSize, blockSize, 0, stream>>>(
params
);
cudaSafeCall( cudaGetLastError() );
}
template void ftl::cuda::dibr_attribute(
ftl::cuda::TextureObject<uchar4> &in, // Original colour image
ftl::cuda::TextureObject<float4> &points, // Original 3D points
ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map
ftl::cuda::TextureObject<float4> &out, // Accumulated output
ftl::cuda::TextureObject<float> &contrib,
ftl::render::SplatParams ¶ms, cudaStream_t stream);
template void ftl::cuda::dibr_attribute(
ftl::cuda::TextureObject<float> &in, // Original colour image
ftl::cuda::TextureObject<float4> &points, // Original 3D points
ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map
ftl::cuda::TextureObject<float> &out, // Accumulated output
ftl::cuda::TextureObject<float> &contrib,
ftl::render::SplatParams ¶ms, cudaStream_t stream);
template void ftl::cuda::dibr_attribute(
ftl::cuda::TextureObject<float4> &in, // Original colour image
ftl::cuda::TextureObject<float4> &points, // Original 3D points
ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map
ftl::cuda::TextureObject<float4> &out, // Accumulated output
ftl::cuda::TextureObject<float> &contrib,
ftl::render::SplatParams ¶ms, cudaStream_t stream);
//==============================================================================
template <typename A, typename B>
TextureObject<A> in,
TextureObject<B> out,
TextureObject<float> contribs) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < in.width() && y < in.height()) {
const A a = in.tex2D((int)x,(int)y);
//const float4 normal = normals.tex2D((int)x,(int)y);
const float contrib = contribs.tex2D((int)x,(int)y);
if (contrib > 0.0f) {
out(x,y) = make<B>(a / contrib);
//normals(x,y) = normal / contrib;
}
}
}
template <typename A, typename B>
void ftl::cuda::dibr_normalise(TextureObject<A> &in, TextureObject<B> &out, TextureObject<float> &contribs, cudaStream_t stream) {
const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(in, out, contribs);
template void ftl::cuda::dibr_normalise<float4,uchar4>(TextureObject<float4> &in, TextureObject<uchar4> &out, TextureObject<float> &contribs, cudaStream_t stream);
template void ftl::cuda::dibr_normalise<float,float>(TextureObject<float> &in, TextureObject<float> &out, TextureObject<float> &contribs, cudaStream_t stream);
template void ftl::cuda::dibr_normalise<float4,float4>(TextureObject<float4> &in, TextureObject<float4> &out, TextureObject<float> &contribs, cudaStream_t stream);