Skip to content
Snippets Groups Projects
Commit b0fec3cd authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Resolve #135 with principal surface splats

parent 645edb44
No related branches found
No related tags found
No related merge requests found
Showing
with 1215 additions and 616 deletions
...@@ -10,7 +10,7 @@ set(REPSRC ...@@ -10,7 +10,7 @@ set(REPSRC
src/garbage.cu src/garbage.cu
src/integrators.cu src/integrators.cu
#src/ray_cast_sdf.cu #src/ray_cast_sdf.cu
src/splat_render.cu src/voxel_render.cu
src/camera_util.cu src/camera_util.cu
src/voxel_hash.cu src/voxel_hash.cu
src/voxel_hash.cpp src/voxel_hash.cpp
...@@ -19,6 +19,7 @@ set(REPSRC ...@@ -19,6 +19,7 @@ set(REPSRC
#src/virtual_source.cpp #src/virtual_source.cpp
src/splat_render.cpp src/splat_render.cpp
src/dibr.cu src/dibr.cu
src/mls.cu
src/depth_camera.cu src/depth_camera.cu
src/depth_camera.cpp src/depth_camera.cpp
) )
......
...@@ -406,6 +406,12 @@ inline __host__ __device__ float length(float3 v) ...@@ -406,6 +406,12 @@ inline __host__ __device__ float length(float3 v)
return sqrtf(dot(v, v)); return sqrtf(dot(v, v));
} }
// length squared
inline __host__ __device__ float length2(const float3 &v)
{
return dot(v, v);
}
// normalize // normalize
inline __host__ __device__ float3 normalize(float3 v) inline __host__ __device__ float3 normalize(float3 v)
{ {
......
...@@ -47,6 +47,21 @@ void ftl::cuda::clear_depth(const ftl::cuda::TextureObject<int> &depth, cudaStre ...@@ -47,6 +47,21 @@ void ftl::cuda::clear_depth(const ftl::cuda::TextureObject<int> &depth, cudaStre
clear_depth_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth); clear_depth_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth);
} }
__global__ void clear_to_zero_kernel(ftl::cuda::TextureObject<float> depth) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < depth.width() && y < depth.height()) {
depth(x,y) = 0.0f; //PINF;
}
}
void ftl::cuda::clear_to_zero(const ftl::cuda::TextureObject<float> &depth, cudaStream_t stream) {
const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK);
clear_to_zero_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth);
}
__global__ void clear_points_kernel(ftl::cuda::TextureObject<float4> depth) { __global__ void clear_points_kernel(ftl::cuda::TextureObject<float4> depth) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
...@@ -79,6 +94,21 @@ void ftl::cuda::clear_colour(const ftl::cuda::TextureObject<uchar4> &depth, cuda ...@@ -79,6 +94,21 @@ void ftl::cuda::clear_colour(const ftl::cuda::TextureObject<uchar4> &depth, cuda
clear_colour_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth); clear_colour_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth);
} }
__global__ void clear_colour_kernel(ftl::cuda::TextureObject<float4> depth) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < depth.width() && y < depth.height()) {
depth(x,y) = make_float4(0.0f);
}
}
void ftl::cuda::clear_colour(const ftl::cuda::TextureObject<float4> &depth, cudaStream_t stream) {
const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK);
clear_colour_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth);
}
// ===== Type convert ===== // ===== Type convert =====
template <typename A, typename B> template <typename A, typename B>
...@@ -103,331 +133,6 @@ void ftl::cuda::int_to_float(const ftl::cuda::TextureObject<int> &in, ftl::cuda: ...@@ -103,331 +133,6 @@ void ftl::cuda::int_to_float(const ftl::cuda::TextureObject<int> &in, ftl::cuda:
convert_kernel<int,float><<<gridSize, blockSize, 0, stream>>>(in, out, scale); convert_kernel<int,float><<<gridSize, blockSize, 0, stream>>>(in, out, scale);
} }
/// ===== MLS Smooth
// TODO:(Nick) Put this in a common location (used in integrators.cu)
extern __device__ float spatialWeighting(float r);
extern __device__ float spatialWeighting(float r, float h);
/*
* Kim, K., Chalidabhongse, T. H., Harwood, D., & Davis, L. (2005).
* Real-time foreground-background segmentation using codebook model.
* Real-Time Imaging. https://doi.org/10.1016/j.rti.2004.12.004
*/
__device__ float colordiffFloat(const uchar4 &pa, const uchar4 &pb) {
const float x_2 = pb.x * pb.x + pb.y * pb.y + pb.z * pb.z;
const float v_2 = pa.x * pa.x + pa.y * pa.y + pa.z * pa.z;
const float xv_2 = pow(pb.x * pa.x + pb.y * pa.y + pb.z * pa.z, 2);
const float p_2 = xv_2 / v_2;
return sqrt(x_2 - p_2);
}
__device__ float colordiffFloat2(const uchar4 &pa, const uchar4 &pb) {
float3 delta = make_float3((float)pa.x - (float)pb.x, (float)pa.y - (float)pb.y, (float)pa.z - (float)pb.z);
return length(delta);
}
/*
* Colour weighting as suggested in:
* C. Kuster et al. Spatio-Temporal Geometry Fusion for Multiple Hybrid Cameras using Moving Least Squares Surfaces. 2014.
* c = colour distance
*/
__device__ float colourWeighting(float c) {
const float h = c_hashParams.m_colourSmoothing;
if (c >= h) return 0.0f;
float ch = c / h;
ch = 1.0f - ch*ch;
return ch*ch*ch*ch;
}
#define WINDOW_RADIUS 5
__device__ float mlsCamera(int cam, const float3 &mPos, uchar4 c1, float3 &wpos) {
const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
const float3 pf = camera.poseInverse * mPos;
float3 pos = make_float3(0.0f, 0.0f, 0.0f);
const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf));
float weights = 0.0f;
//#pragma unroll
for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) {
for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) {
//if (screenPos.x+u < width && screenPos.y+v < height) { //on screen
float depth = tex2D<float>(camera.depth, screenPos.x+u, screenPos.y+v);
const float3 camPos = camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth);
float weight = spatialWeighting(length(pf - camPos));
if (weight > 0.0f) {
uchar4 c2 = tex2D<uchar4>(camera.colour, screenPos.x+u, screenPos.y+v);
weight *= colourWeighting(colordiffFloat2(c1,c2));
if (weight > 0.0f) {
wpos += weight* (camera.pose * camPos);
weights += weight;
}
}
//}
}
}
//wpos += (camera.pose * pos);
return weights;
}
__device__ float mlsCameraNoColour(int cam, const float3 &mPos, uchar4 c1, const float4 &norm, float3 &wpos, float h) {
const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
const float3 pf = camera.poseInverse * mPos;
float3 pos = make_float3(0.0f, 0.0f, 0.0f);
const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf));
float weights = 0.0f;
//#pragma unroll
for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) {
for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) {
//if (screenPos.x+u < width && screenPos.y+v < height) { //on creen
float depth = tex2D<float>(camera.depth, screenPos.x+u, screenPos.y+v);
const float3 camPos = camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth);
// TODO:(Nick) dot product of normals < 0 means the point
// should be ignored with a weight of 0 since it is facing the wrong direction
// May be good to simply weight using the dot product to give
// a stronger weight to those whose normals are closer
float weight = spatialWeighting(length(pf - camPos), h);
if (weight > 0.0f) {
float4 n2 = tex2D<float4>(camera.normal, screenPos.x+u, screenPos.y+v);
if (dot(make_float3(norm), make_float3(n2)) > 0.0f) {
uchar4 c2 = tex2D<uchar4>(camera.colour, screenPos.x+u, screenPos.y+v);
if (colourWeighting(colordiffFloat2(c1,c2)) > 0.0f) {
pos += weight*camPos; // (camera.pose * camPos);
weights += weight;
}
}
}
//}
}
}
if (weights > 0.0f) wpos += (camera.pose * (pos / weights)) * weights;
return weights;
}
__device__ float mlsCameraBest(int cam, const float3 &mPos, uchar4 c1, float3 &wpos) {
const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
const float3 pf = camera.poseInverse * mPos;
float3 pos = make_float3(0.0f, 0.0f, 0.0f);
const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf));
float weights = 0.0f;
//#pragma unroll
for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) {
for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) {
//if (screenPos.x+u < width && screenPos.y+v < height) { //on screen
float depth = tex2D<float>(camera.depth, screenPos.x+u, screenPos.y+v);
const float3 camPos = camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth);
float weight = spatialWeighting(length(pf - camPos));
if (weight > 0.0f) {
uchar4 c2 = tex2D<uchar4>(camera.colour, screenPos.x+u, screenPos.y+v);
weight *= colourWeighting(colordiffFloat2(c1,c2));
if (weight > weights) {
pos = weight* (camera.pose * camPos);
weights = weight;
}
}
//}
}
}
wpos += pos;
//wpos += (camera.pose * pos);
return weights;
}
__device__ float mlsCameraPoint(int cam, const float3 &mPos, uchar4 c1, float3 &wpos) {
const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
const float3 pf = camera.poseInverse * mPos;
float3 pos = make_float3(0.0f, 0.0f, 0.0f);
const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf));
float weights = 0.0f;
//float depth = tex2D<float>(camera.depth, screenPos.x, screenPos.y);
const float3 worldPos = make_float3(tex2D<float4>(camera.points, screenPos.x, screenPos.y));
if (worldPos.z == MINF) return 0.0f;
float weight = spatialWeighting(length(mPos - worldPos));
if (weight > 0.0f) {
wpos += weight* (worldPos);
weights += weight;
}
return weights;
}
__global__ void mls_smooth_kernel(ftl::cuda::TextureObject<float4> output, HashParams hashParams, int numcams, int cam) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
const int width = output.width();
const int height = output.height();
const DepthCameraCUDA &mainCamera = c_cameras[cam];
if (x < width && y < height) {
const float depth = tex2D<float>(mainCamera.depth, x, y);
const uchar4 c1 = tex2D<uchar4>(mainCamera.colour, x, y);
const float4 norm = tex2D<float4>(mainCamera.normal, x, y);
//if (x == 400 && y == 200) printf("NORMX: %f\n", norm.x);
float3 wpos = make_float3(0.0f);
float3 wnorm = make_float3(0.0f);
float weights = 0.0f;
if (depth >= mainCamera.params.m_sensorDepthWorldMin && depth <= mainCamera.params.m_sensorDepthWorldMax) {
float3 mPos = mainCamera.pose * mainCamera.params.kinectDepthToSkeleton(x, y, depth);
if ((!(hashParams.m_flags & ftl::voxhash::kFlagClipping)) || (mPos.x > hashParams.m_minBounds.x && mPos.x < hashParams.m_maxBounds.x &&
mPos.y > hashParams.m_minBounds.y && mPos.y < hashParams.m_maxBounds.y &&
mPos.z > hashParams.m_minBounds.z && mPos.z < hashParams.m_maxBounds.z)) {
if (hashParams.m_flags & ftl::voxhash::kFlagMLS) {
for (uint cam2=0; cam2<numcams; ++cam2) {
//if (cam2 == cam) weights += mlsCameraNoColour(cam2, mPos, c1, wpos, c_hashParams.m_spatialSmoothing*0.1f); //weights += 0.5*mlsCamera(cam2, mPos, c1, wpos);
weights += mlsCameraNoColour(cam2, mPos, c1, norm, wpos, c_hashParams.m_spatialSmoothing); //*((cam == cam2)? 0.1f : 5.0f));
// Previous approach
//if (cam2 == cam) continue;
//weights += mlsCameraBest(cam2, mPos, c1, wpos);
}
wpos /= weights;
} else {
weights = 1000.0f;
wpos = mPos;
}
//output(x,y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF);
if (weights >= hashParams.m_confidenceThresh) output(x,y) = make_float4(wpos, 0.0f);
//const uint2 screenPos = make_uint2(mainCamera.params.cameraToKinectScreenInt(mainCamera.poseInverse * wpos));
//if (screenPos.x < output.width() && screenPos.y < output.height()) {
// output(screenPos.x,screenPos.y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF);
//}
}
}
}
}
void ftl::cuda::mls_smooth(TextureObject<float4> &output, const HashParams &hashParams, int numcams, int cam, cudaStream_t stream) {
const dim3 gridSize((output.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (output.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
mls_smooth_kernel<<<gridSize, blockSize, 0, stream>>>(output, hashParams, numcams, cam);
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
#endif
}
#define RESAMPLE_RADIUS 7
__global__ void mls_resample_kernel(ftl::cuda::TextureObject<int> depthin, ftl::cuda::TextureObject<uchar4> colourin, ftl::cuda::TextureObject<float> depthout, HashParams hashParams, int numcams, SplatParams params) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
const int width = depthin.width();
const int height = depthin.height();
if (x < width && y < height) {
//const int depth = depthin.tex2D((int)x, (int)y);
//if (depth != 0x7FFFFFFF) {
// depthout(x,y) = (float)depth / 1000.0f;
// return;
//}
struct map_t {
int d;
int quad;
};
map_t mappings[5];
int mapidx = 0;
for (int v=-RESAMPLE_RADIUS; v<=RESAMPLE_RADIUS; ++v) {
for (int u=-RESAMPLE_RADIUS; u<=RESAMPLE_RADIUS; ++u) {
const int depth = depthin.tex2D((int)x+u, (int)y+v);
const uchar4 c1 = colourin.tex2D((int)x+u, (int)y+v);
if (depth != 0x7FFFFFFF) {
int i=0;
for (i=0; i<mapidx; ++i) {
if (abs(mappings[i].d - depth) < 100) {
if (u < 0 && v < 0) mappings[i].quad |= 0x1;
if (u > 0 && v < 0) mappings[i].quad |= 0x2;
if (u > 0 && v > 0) mappings[i].quad |= 0x4;
if (u < 0 && v > 0) mappings[i].quad |= 0x8;
break;
}
}
if (i == mapidx && i < 5) {
mappings[mapidx].d = depth;
mappings[mapidx].quad = 0;
if (u < 0 && v < 0) mappings[mapidx].quad |= 0x1;
if (u > 0 && v < 0) mappings[mapidx].quad |= 0x2;
if (u > 0 && v > 0) mappings[mapidx].quad |= 0x4;
if (u < 0 && v > 0) mappings[mapidx].quad |= 0x8;
++mapidx;
} else {
//printf("EXCEEDED\n");
}
}
}
}
int bestdepth = 1000000;
//int count = 0;
for (int i=0; i<mapidx; ++i) {
if (__popc(mappings[i].quad) >= 3 && mappings[i].d < bestdepth) bestdepth = mappings[i].d;
//if (mappings[i].quad == 15 && mappings[i].d < bestdepth) bestdepth = mappings[i].d;
//if (mappings[i].quad == 15) count ++;
}
//depthout(x,y) = (mapidx == 5) ? 3.0f : 0.0f;
if (bestdepth < 1000000) {
depthout(x,y) = (float)bestdepth / 1000.0f;
}
}
}
void ftl::cuda::mls_resample(const TextureObject<int> &depthin, const TextureObject<uchar4> &colourin, TextureObject<float> &depthout, const HashParams &hashParams, int numcams, const SplatParams &params, cudaStream_t stream) {
const dim3 gridSize((depthin.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depthin.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
mls_resample_kernel<<<gridSize, blockSize, 0, stream>>>(depthin, colourin, depthout, hashParams, numcams, params);
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
#endif
}
/// ===== Median Filter ====== /// ===== Median Filter ======
#define WINDOW_SIZE 3 #define WINDOW_SIZE 3
......
...@@ -10,8 +10,10 @@ namespace cuda { ...@@ -10,8 +10,10 @@ namespace cuda {
void clear_depth(const TextureObject<float> &depth, cudaStream_t stream); void clear_depth(const TextureObject<float> &depth, cudaStream_t stream);
void clear_depth(const TextureObject<int> &depth, cudaStream_t stream); void clear_depth(const TextureObject<int> &depth, cudaStream_t stream);
void clear_to_zero(const ftl::cuda::TextureObject<float> &depth, cudaStream_t stream);
void clear_points(const ftl::cuda::TextureObject<float4> &depth, cudaStream_t stream); void clear_points(const ftl::cuda::TextureObject<float4> &depth, cudaStream_t stream);
void clear_colour(const ftl::cuda::TextureObject<uchar4> &depth, cudaStream_t stream); void clear_colour(const ftl::cuda::TextureObject<uchar4> &depth, cudaStream_t stream);
void clear_colour(const ftl::cuda::TextureObject<float4> &depth, cudaStream_t stream);
void median_filter(const ftl::cuda::TextureObject<int> &in, ftl::cuda::TextureObject<float> &out, cudaStream_t stream); void median_filter(const ftl::cuda::TextureObject<int> &in, ftl::cuda::TextureObject<float> &out, cudaStream_t stream);
...@@ -21,7 +23,7 @@ void float_to_int(const ftl::cuda::TextureObject<float> &in, ftl::cuda::TextureO ...@@ -21,7 +23,7 @@ void float_to_int(const ftl::cuda::TextureObject<float> &in, ftl::cuda::TextureO
void mls_smooth(TextureObject<float4> &output, const ftl::voxhash::HashParams &hashParams, int numcams, int cam, cudaStream_t stream); void mls_smooth(TextureObject<float4> &output, const ftl::voxhash::HashParams &hashParams, int numcams, int cam, cudaStream_t stream);
void mls_resample(const TextureObject<int> &depthin, const TextureObject<uchar4> &colourin, TextureObject<float> &depthout, const ftl::voxhash::HashParams &hashParams, int numcams, const ftl::render::SplatParams &params, cudaStream_t stream); void mls_render_depth(const TextureObject<int> &input, TextureObject<int> &output, const ftl::render::SplatParams &params, int numcams, cudaStream_t stream);
void hole_fill(const TextureObject<int> &depth_in, const TextureObject<float> &depth_out, const DepthCameraParams &params, cudaStream_t stream); void hole_fill(const TextureObject<int> &depth_in, const TextureObject<float> &depth_out, const DepthCameraParams &params, cudaStream_t stream);
......
This diff is collapsed.
#include <ftl/cuda_common.hpp>
#include <ftl/cuda_util.hpp>
#include <ftl/depth_camera.hpp>
#include "depth_camera_cuda.hpp"
#include "mls_cuda.hpp"
#define T_PER_BLOCK 16
#define MINF __int_as_float(0xff800000)
using ftl::voxhash::DepthCameraCUDA;
using ftl::voxhash::HashData;
using ftl::voxhash::HashParams;
using ftl::cuda::TextureObject;
using ftl::render::SplatParams;
extern __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS];
extern __constant__ HashParams c_hashParams;
/// ===== MLS Smooth
/*
* Kim, K., Chalidabhongse, T. H., Harwood, D., & Davis, L. (2005).
* Real-time foreground-background segmentation using codebook model.
* Real-Time Imaging. https://doi.org/10.1016/j.rti.2004.12.004
*/
__device__ float colordiffFloat(const uchar4 &pa, const uchar4 &pb) {
const float x_2 = pb.x * pb.x + pb.y * pb.y + pb.z * pb.z;
const float v_2 = pa.x * pa.x + pa.y * pa.y + pa.z * pa.z;
const float xv_2 = pow(pb.x * pa.x + pb.y * pa.y + pb.z * pa.z, 2);
const float p_2 = xv_2 / v_2;
return sqrt(x_2 - p_2);
}
__device__ float colordiffFloat2(const uchar4 &pa, const uchar4 &pb) {
float3 delta = make_float3((float)pa.x - (float)pb.x, (float)pa.y - (float)pb.y, (float)pa.z - (float)pb.z);
return length(delta);
}
/*
* Colour weighting as suggested in:
* C. Kuster et al. Spatio-Temporal Geometry Fusion for Multiple Hybrid Cameras using Moving Least Squares Surfaces. 2014.
* c = colour distance
*/
__device__ float colourWeighting(float c) {
const float h = c_hashParams.m_colourSmoothing;
if (c >= h) return 0.0f;
float ch = c / h;
ch = 1.0f - ch*ch;
return ch*ch*ch*ch;
}
#define WINDOW_RADIUS 5
__device__ float mlsCamera(int cam, const float3 &mPos, uchar4 c1, float3 &wpos) {
const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
const float3 pf = camera.poseInverse * mPos;
float3 pos = make_float3(0.0f, 0.0f, 0.0f);
const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf));
float weights = 0.0f;
//#pragma unroll
for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) {
for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) {
//if (screenPos.x+u < width && screenPos.y+v < height) { //on screen
float depth = tex2D<float>(camera.depth, screenPos.x+u, screenPos.y+v);
const float3 camPos = camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth);
float weight = ftl::cuda::spatialWeighting(length(pf - camPos), c_hashParams.m_spatialSmoothing);
if (weight > 0.0f) {
uchar4 c2 = tex2D<uchar4>(camera.colour, screenPos.x+u, screenPos.y+v);
weight *= colourWeighting(colordiffFloat2(c1,c2));
if (weight > 0.0f) {
wpos += weight* (camera.pose * camPos);
weights += weight;
}
}
//}
}
}
//wpos += (camera.pose * pos);
return weights;
}
__device__ float mlsCameraNoColour(int cam, const float3 &mPos, uchar4 c1, const float4 &norm, float3 &wpos, float h) {
const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
const float3 pf = camera.poseInverse * mPos;
float3 pos = make_float3(0.0f, 0.0f, 0.0f);
const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf));
float weights = 0.0f;
//#pragma unroll
for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) {
for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) {
//if (screenPos.x+u < width && screenPos.y+v < height) { //on creen
float depth = tex2D<float>(camera.depth, screenPos.x+u, screenPos.y+v);
const float3 camPos = camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth);
// TODO:(Nick) dot product of normals < 0 means the point
// should be ignored with a weight of 0 since it is facing the wrong direction
// May be good to simply weight using the dot product to give
// a stronger weight to those whose normals are closer
float weight = ftl::cuda::spatialWeighting(length(pf - camPos), h);
if (weight > 0.0f) {
float4 n2 = tex2D<float4>(camera.normal, screenPos.x+u, screenPos.y+v);
if (dot(make_float3(norm), make_float3(n2)) > 0.0f) {
uchar4 c2 = tex2D<uchar4>(camera.colour, screenPos.x+u, screenPos.y+v);
if (colourWeighting(colordiffFloat2(c1,c2)) > 0.0f) {
pos += weight*camPos; // (camera.pose * camPos);
weights += weight;
}
}
}
//}
}
}
if (weights > 0.0f) wpos += (camera.pose * (pos / weights)) * weights;
return weights;
}
__global__ void mls_smooth_kernel(ftl::cuda::TextureObject<float4> output, HashParams hashParams, int numcams, int cam) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
const int width = output.width();
const int height = output.height();
const DepthCameraCUDA &mainCamera = c_cameras[cam];
if (x < width && y < height) {
const float depth = tex2D<float>(mainCamera.depth, x, y);
const uchar4 c1 = tex2D<uchar4>(mainCamera.colour, x, y);
const float4 norm = tex2D<float4>(mainCamera.normal, x, y);
//if (x == 400 && y == 200) printf("NORMX: %f\n", norm.x);
float3 wpos = make_float3(0.0f);
float3 wnorm = make_float3(0.0f);
float weights = 0.0f;
if (depth >= mainCamera.params.m_sensorDepthWorldMin && depth <= mainCamera.params.m_sensorDepthWorldMax) {
float3 mPos = mainCamera.pose * mainCamera.params.kinectDepthToSkeleton(x, y, depth);
if ((!(hashParams.m_flags & ftl::voxhash::kFlagClipping)) || (mPos.x > hashParams.m_minBounds.x && mPos.x < hashParams.m_maxBounds.x &&
mPos.y > hashParams.m_minBounds.y && mPos.y < hashParams.m_maxBounds.y &&
mPos.z > hashParams.m_minBounds.z && mPos.z < hashParams.m_maxBounds.z)) {
if (hashParams.m_flags & ftl::voxhash::kFlagMLS) {
for (uint cam2=0; cam2<numcams; ++cam2) {
//if (cam2 == cam) weights += mlsCameraNoColour(cam2, mPos, c1, wpos, c_hashParams.m_spatialSmoothing*0.1f); //weights += 0.5*mlsCamera(cam2, mPos, c1, wpos);
weights += mlsCameraNoColour(cam2, mPos, c1, norm, wpos, c_hashParams.m_spatialSmoothing); //*((cam == cam2)? 0.1f : 5.0f));
// Previous approach
//if (cam2 == cam) continue;
//weights += mlsCameraBest(cam2, mPos, c1, wpos);
}
wpos /= weights;
} else {
weights = 1000.0f;
wpos = mPos;
}
//output(x,y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF);
if (weights >= hashParams.m_confidenceThresh) output(x,y) = make_float4(wpos, 0.0f);
//const uint2 screenPos = make_uint2(mainCamera.params.cameraToKinectScreenInt(mainCamera.poseInverse * wpos));
//if (screenPos.x < output.width() && screenPos.y < output.height()) {
// output(screenPos.x,screenPos.y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF);
//}
}
}
}
}
void ftl::cuda::mls_smooth(TextureObject<float4> &output, const HashParams &hashParams, int numcams, int cam, cudaStream_t stream) {
const dim3 gridSize((output.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (output.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
mls_smooth_kernel<<<gridSize, blockSize, 0, stream>>>(output, hashParams, numcams, cam);
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
#endif
}
// ===== Render Depth using MLS ================================================
#define MAX_UPSAMPLE 5
#define SAMPLE_BUFFER ((2*MAX_UPSAMPLE+1)*(2*MAX_UPSAMPLE+1))
#define WARP_SIZE 32
#define BLOCK_WIDTH 4
#define MLS_RADIUS 5
#define MLS_WIDTH (2*MLS_RADIUS+1)
#define MLS_SAMPLES (MLS_WIDTH*MLS_WIDTH)
__global__ void mls_render_depth_kernel(const TextureObject<int> input, TextureObject<int> output, SplatParams params, int numcams) {
/*const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
const int width = output.width();
const int height = output.height();
if (x < width && y < height) {
const float depth = tex2D<float>(mainCamera.depth, x, y);
const uchar4 c1 = tex2D<uchar4>(mainCamera.colour, x, y);
const float4 norm = tex2D<float4>(mainCamera.normal, x, y);
//if (x == 400 && y == 200) printf("NORMX: %f\n", norm.x);
float3 wpos = make_float3(0.0f);
float3 wnorm = make_float3(0.0f);
float weights = 0.0f;
if (depth >= mainCamera.params.m_sensorDepthWorldMin && depth <= mainCamera.params.m_sensorDepthWorldMax) {
float3 mPos = mainCamera.pose * mainCamera.params.kinectDepthToSkeleton(x, y, depth);
if ((!(hashParams.m_flags & ftl::voxhash::kFlagClipping)) || (mPos.x > hashParams.m_minBounds.x && mPos.x < hashParams.m_maxBounds.x &&
mPos.y > hashParams.m_minBounds.y && mPos.y < hashParams.m_maxBounds.y &&
mPos.z > hashParams.m_minBounds.z && mPos.z < hashParams.m_maxBounds.z)) {
if (hashParams.m_flags & ftl::voxhash::kFlagMLS) {
for (uint cam2=0; cam2<numcams; ++cam2) {
//if (cam2 == cam) weights += mlsCameraNoColour(cam2, mPos, c1, wpos, c_hashParams.m_spatialSmoothing*0.1f); //weights += 0.5*mlsCamera(cam2, mPos, c1, wpos);
weights += mlsCameraNoColour(cam2, mPos, c1, norm, wpos, c_hashParams.m_spatialSmoothing); //*((cam == cam2)? 0.1f : 5.0f));
// Previous approach
//if (cam2 == cam) continue;
//weights += mlsCameraBest(cam2, mPos, c1, wpos);
}
wpos /= weights;
} else {
weights = 1000.0f;
wpos = mPos;
}
//output(x,y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF);
//if (weights >= hashParams.m_confidenceThresh) output(x,y) = make_float4(wpos, 0.0f);
const uint2 screenPos = make_uint2(mainCamera.params.cameraToKinectScreenInt(mainCamera.poseInverse * wpos));
if (screenPos.x < output.width() && screenPos.y < output.height()) {
output(screenPos.x,screenPos.y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF);
}
}
}
}*/
}
void ftl::cuda::mls_render_depth(const TextureObject<int> &input, TextureObject<int> &output, const SplatParams &params, int numcams, cudaStream_t stream) {
const dim3 gridSize((output.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (output.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
mls_render_depth_kernel<<<gridSize, blockSize, 0, stream>>>(input, output, params, numcams);
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
#endif
}
#ifndef _FTL_MLS_CUDA_HPP_
#define _FTL_MLS_CUDA_HPP_
#include <ftl/cuda_util.hpp>
#include <ftl/cuda_common.hpp>
#include <ftl/cuda_matrix_util.hpp>
#include "splat_params.hpp"
__device__ inline float3 make_float3(const uchar4 &c) {
return make_float3((float)c.x,(float)c.y,(float)c.z);
}
__device__ inline uchar4 make_uchar4(const float3 &c) {
return make_uchar4(c.x,c.y,c.z,255);
}
namespace ftl {
namespace cuda {
/*
* Guennebaud, G.; Gross, M. Algebraic point set surfaces. ACMTransactions on Graphics Vol. 26, No. 3, Article No. 23, 2007.
* Used in: FusionMLS: Highly dynamic 3D reconstruction with consumer-grade RGB-D cameras
* r = distance between points
* h = smoothing parameter in meters (default 4cm)
*/
__device__ inline float spatialWeighting(float r, float h) {
if (r >= h) return 0.0f;
float rh = r / h;
rh = 1.0f - rh*rh;
return rh*rh*rh*rh;
}
__device__ float colourWeighting(float c);
struct fragment {
float3 point;
float3 normal;
uchar4 colour;
};
__device__ inline float3 upsampled_point(cudaTextureObject_t pointset, const float2 &uv) {
float3 R = make_float3(0.0f, 0.0f, 0.0f);
const float3 P1 = make_float3(tex2D<float4>(pointset, int(uv.x), int(uv.y)));
const float D1 = 1.0f - length(uv - make_float2(int(uv.x), int(uv.y)));
R += D1 * P1;
const float3 P2 = make_float3(tex2D<float4>(pointset, int(uv.x), int(uv.y+1.0f)));
const float D2 = 1.0f - length(uv - make_float2(int(uv.x), int(uv.y+1.0f)));
R += D2 * P2;
const float3 P3 = make_float3(tex2D<float4>(pointset, int(uv.x+1.0f), int(uv.y)));
const float D3 = 1.0f - length(uv - make_float2(int(uv.x+1.0f), int(uv.y)));
R += D3 * P3;
const float3 P4 = make_float3(tex2D<float4>(pointset, int(uv.x+1.0f), int(uv.y+1.0f)));
const float D4 = 1.0f - length(uv - make_float2(int(uv.x+1.0f), int(uv.y+1.0f)));
R += D4 * P4;
// R is the centroid of surrounding points.
R /= (D1+D2+D3+D4);
// FIXME: Should not use centroid but instead sample the surface at this point
// Use plane estimate at point to get "centroid" and then do the spatial weighted sample?
return R;
}
__device__ inline fragment upsampled_point(cudaTextureObject_t pointset,
cudaTextureObject_t normals, cudaTextureObject_t colours, const float2 &uv) {
float3 R = make_float3(0.0f, 0.0f, 0.0f);
float3 N = make_float3(0.0f, 0.0f, 0.0f);
float3 C = make_float3(0.0f, 0.0f, 0.0f);
// TODO:(Nick) Don't upsample points if distance is too great
const float3 P1 = make_float3(tex2D<float4>(pointset, int(uv.x), int(uv.y)));
const float D1 = 1.0f - length(uv - make_float2(int(uv.x), int(uv.y)));
R += D1 * P1;
N += D1 * make_float3(tex2D<float4>(normals, int(uv.x), int(uv.y)));
C += D1 * make_float3(tex2D<uchar4>(colours, int(uv.x), int(uv.y)));
const float3 P2 = make_float3(tex2D<float4>(pointset, int(uv.x), int(uv.y+1.0f)));
const float D2 = 1.0f - length(uv - make_float2(int(uv.x), int(uv.y+1.0f)));
R += D2 * P2;
N += D2 * make_float3(tex2D<float4>(normals, int(uv.x), int(uv.y+1.0f)));
C += D2 * make_float3(tex2D<uchar4>(colours, int(uv.x), int(uv.y+1.0f)));
const float3 P3 = make_float3(tex2D<float4>(pointset, int(uv.x+1.0f), int(uv.y)));
const float D3 = 1.0f - length(uv - make_float2(int(uv.x+1.0f), int(uv.y)));
R += D3 * P3;
N += D3 * make_float3(tex2D<float4>(normals, int(uv.x+1.0f), int(uv.y)));
C += D3 * make_float3(tex2D<uchar4>(colours, int(uv.x+1.0f), int(uv.y)));
const float3 P4 = make_float3(tex2D<float4>(pointset, int(uv.x+1.0f), int(uv.y+1.0f)));
const float D4 = 1.0f - length(uv - make_float2(int(uv.x+1.0f), int(uv.y+1.0f)));
R += D4 * P4;
N += D4 * make_float3(tex2D<float4>(normals, int(uv.x+1.0f), int(uv.y+1.0f)));
C += D4 * make_float3(tex2D<uchar4>(colours, int(uv.x+1.0f), int(uv.y+1.0f)));
return {R / (D1+D2+D3+D4), N / (D1+D2+D3+D4), make_uchar4(C / (D1+D2+D3+D4))};
}
__device__ inline void render_depth(ftl::cuda::TextureObject<int> &depth, ftl::render::SplatParams &params, const float3 &worldPos) {
const float3 camPos = params.m_viewMatrix * worldPos;
const float d = camPos.z;
if (d < params.camera.m_sensorDepthWorldMin) return;
const float2 screenPosf = params.camera.cameraToKinectScreenFloat(camPos);
const uint2 screenPos = make_uint2(make_int2(screenPosf));
const unsigned int cx = screenPos.x;
const unsigned int cy = screenPos.y;
if (cx < depth.width() && cy < depth.height()) {
atomicMin(&depth(cx,cy), d * 1000.0f);
}
}
__device__ inline void render_fragment(
ftl::cuda::TextureObject<int> &depth_in,
ftl::cuda::TextureObject<float4> &normal_out,
ftl::cuda::TextureObject<uchar4> &colour_out,
ftl::render::SplatParams &params, const fragment &frag) {
const float3 camPos = params.m_viewMatrix * frag.point;
const float d = camPos.z;
if (d < params.camera.m_sensorDepthWorldMin) return;
const float2 screenPosf = params.camera.cameraToKinectScreenFloat(camPos);
const uint2 screenPos = make_uint2(make_int2(screenPosf));
const unsigned int cx = screenPos.x;
const unsigned int cy = screenPos.y;
if (cx < depth_in.width() && cy < depth_in.height()) {
if (depth_in(cx,cy) == (int)(d * 1000.0f)) {
colour_out(cx,cy) = frag.colour;
normal_out(cx,cy) = make_float4(frag.normal, 0.0f);
}
}
}
/**
* Estimate the point set surface location near to a given point.
*/
template <int R>
__device__ float3 screen_centroid(
cudaTextureObject_t pointset,
const float2 &suv,
const int2 &uv,
const ftl::render::SplatParams &params,
float smoothing) {
float3 pos = make_float3(0.0f, 0.0f, 0.0f);
float weights = 0.0f;
//#pragma unroll
for (int v=-R; v<=R; ++v) {
for (int u=-R; u<=R; ++u) {
//if (uv.x+u >= 0 && uv.x+u < pointset.width() && uv.y+v >= 0 && uv.y+v < pointset.height()) {
const float3 samplePoint = params.m_viewMatrix * make_float3(tex2D<float4>(pointset, uv.x+u, uv.y+v));
const float weight = ftl::cuda::spatialWeighting(length(suv - params.camera.cameraToKinectScreenFloat(samplePoint)), smoothing);
pos += weight*samplePoint;
weights += weight;
//}
}
}
if (weights > 0.0f) pos = pos / weights;
return pos;
}
/**
* Estimate a point set surface point from an existing point in the set.
*/
template <int R>
__device__ float mls_point_surface(
cudaTextureObject_t pointset,
const int2 &uv,
float3 &estPoint,
float smoothing) {
float3 pos = make_float3(0.0f, 0.0f, 0.0f);
float weights = 0.0f;
const float3 nearPoint = make_float3(tex2D<float4>(pointset, uv.x, uv.y));
//#pragma unroll
for (int v=-R; v<=R; ++v) {
for (int u=-R; u<=R; ++u) {
//if (uv.x+u >= 0 && uv.x+u < pointset.width() && uv.y+v >= 0 && uv.y+v < pointset.height()) {
const float3 samplePoint = make_float3(tex2D<float4>(pointset, uv.x+u, uv.y+v));
const float weight = ftl::cuda::spatialWeighting(length(nearPoint - samplePoint), smoothing);
pos += weight*samplePoint;
weights += weight;
//}
}
}
if (weights > 0.0f) estPoint = pos / weights;
return weights;
};
/**
* Estimate the point set surface location near to a given point.
*/
template <int R>
__device__ float mls_point_surface(
cudaTextureObject_t pointset,
const int2 &uv,
const float3 &nearPoint,
float3 &estPoint,
float smoothing) {
float3 pos = make_float3(0.0f, 0.0f, 0.0f);
float weights = 0.0f;
//#pragma unroll
for (int v=-R; v<=R; ++v) {
for (int u=-R; u<=R; ++u) {
//if (uv.x+u >= 0 && uv.x+u < pointset.width() && uv.y+v >= 0 && uv.y+v < pointset.height()) {
const float3 samplePoint = make_float3(tex2D<float4>(pointset, uv.x+u, uv.y+v));
const float weight = ftl::cuda::spatialWeighting(length(nearPoint - samplePoint), smoothing);
pos += weight*samplePoint;
weights += weight;
//}
}
}
if (weights > 0.0f) estPoint = pos / weights;
return weights;
}
/**
* Calculate the point sample energy.
*/
template <int R>
__device__ float mls_point_energy(
cudaTextureObject_t pointset,
const int2 &uv,
const float3 &nearPoint,
float smoothing) {
float weights = 0.0f;
//#pragma unroll
for (int v=-R; v<=R; ++v) {
for (int u=-R; u<=R; ++u) {
//if (uv.x+u >= 0 && uv.x+u < pointset.width() && uv.y+v >= 0 && uv.y+v < pointset.height()) {
const float3 samplePoint = make_float3(tex2D<float4>(pointset, uv.x+u, uv.y+v));
const float weight = ftl::cuda::spatialWeighting(length(nearPoint - samplePoint), smoothing);
weights += weight;
//}
}
}
return weights;
}
/**
* Calculate the point sample energy.
*/
template <int M>
__device__ float mls_point_energy(
const float3 (&pointset)[M],
const float3 &nearPoint,
float smoothing) {
float weights = 0.0f;
//#pragma unroll
for (int i=0; i<M; ++i) {
const float3 samplePoint = pointset[i];
const float weight = ftl::cuda::spatialWeighting(length(nearPoint - samplePoint), smoothing);
weights += weight;
}
return weights;
}
/**
* Estimate a point set surface location near an existing and return also
* an estimate of the normal and colour of that point.
*/
template <int R>
__device__ float mls_point_surface(
cudaTextureObject_t pointset,
cudaTextureObject_t normalset,
cudaTextureObject_t colourset,
const int2 &uv,
float3 &estPoint,
float3 &estNormal,
uchar4 &estColour,
float smoothing) {
float3 pos = make_float3(0.0f, 0.0f, 0.0f);
float3 normal = make_float3(0.0f, 0.0f, 0.0f);
float3 colour = make_float3(0.0f, 0.0f, 0.0f);
float weights = 0.0f;
const float3 nearPoint = make_float3(tex2D<float4>(pointset, uv.x, uv.y));
//#pragma unroll
for (int v=-R; v<=R; ++v) {
for (int u=-R; u<=R; ++u) {
//if (uv.x+u >= 0 && uv.x+u < pointset.width() && uv.y+v >= 0 && uv.y+v < pointset.height()) {
const float3 samplePoint = make_float3(tex2D<float4>(pointset, uv.x+u, uv.y+v));
const float weight = spatialWeighting(length(nearPoint - samplePoint), smoothing);
if (weight > 0.0f) {
pos += weight*samplePoint;
weights += weight;
normal += weight * make_float3(tex2D<float4>(normalset, uv.x+u, uv.y+v));
const uchar4 c = tex2D<uchar4>(colourset, uv.x+u, uv.y+v);
colour += weight * make_float3(c.x, c.y, c.z);
}
//}
}
}
if (weights > 0.0f) {
estPoint = pos / weights;
estNormal = normal / weights;
estColour = make_uchar4(colour.x / weights, colour.y / weights, colour.z / weights, 255);
}
return weights;
}
/**
* Estimate a point set surface location near a given point and return also
* an estimate of the normal and colour of that point.
*/
template <int R>
__device__ float mls_point_surface(
cudaTextureObject_t pointset,
cudaTextureObject_t normalset,
cudaTextureObject_t colourset,
const int2 &uv,
const float3 &nearPoint,
float3 &estPoint,
float3 &estNormal,
uchar4 &estColour,
float smoothing) {
float3 pos = make_float3(0.0f, 0.0f, 0.0f);
float3 normal = make_float3(0.0f, 0.0f, 0.0f);
float3 colour = make_float3(0.0f, 0.0f, 0.0f);
float weights = 0.0f;
//#pragma unroll
for (int v=-R; v<=R; ++v) {
for (int u=-R; u<=R; ++u) {
//if (uv.x+u >= 0 && uv.x+u < pointset.width() && uv.y+v >= 0 && uv.y+v < pointset.height()) {
const float3 samplePoint = make_float3(tex2D<float4>(pointset, uv.x+u, uv.y+v));
const float weight = spatialWeighting(length(nearPoint - samplePoint), smoothing);
if (weight > 0.0f) {
pos += weight*samplePoint;
weights += weight;
normal += weight * make_float3(tex2D<float4>(normalset, uv.x+u, uv.y+v));
const uchar4 c = tex2D<uchar4>(colourset, uv.x+u, uv.y+v);
colour += weight * make_float3(c.x, c.y, c.z);
}
//}
}
}
if (weights > 0.0f) {
estPoint = pos / weights;
estNormal = normal / weights;
estColour = make_uchar4(colour.x / weights, colour.y / weights, colour.z / weights, 255);
}
return weights;
}
}
}
#endif // _FTL_MLS_CUDA_HPP_
...@@ -9,6 +9,7 @@ namespace ftl { ...@@ -9,6 +9,7 @@ namespace ftl {
namespace render { namespace render {
static const uint kShowBlockBorders = 0x0001; static const uint kShowBlockBorders = 0x0001;
static const uint kNoSplatting = 0x0002;
struct __align__(16) SplatParams { struct __align__(16) SplatParams {
float4x4 m_viewMatrix; float4x4 m_viewMatrix;
...@@ -16,6 +17,7 @@ struct __align__(16) SplatParams { ...@@ -16,6 +17,7 @@ struct __align__(16) SplatParams {
uint m_flags; uint m_flags;
float voxelSize; float voxelSize;
float depthThreshold;
DepthCameraParams camera; DepthCameraParams camera;
}; };
......
...@@ -24,9 +24,18 @@ void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) { ...@@ -24,9 +24,18 @@ void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) {
if ((unsigned int)depth1_.width() != camera.width || (unsigned int)depth1_.height() != camera.height) { if ((unsigned int)depth1_.width() != camera.width || (unsigned int)depth1_.height() != camera.height) {
depth1_ = ftl::cuda::TextureObject<int>(camera.width, camera.height); depth1_ = ftl::cuda::TextureObject<int>(camera.width, camera.height);
} }
if ((unsigned int)depth3_.width() != camera.width || (unsigned int)depth3_.height() != camera.height) {
depth3_ = ftl::cuda::TextureObject<int>(camera.width, camera.height);
}
if ((unsigned int)colour1_.width() != camera.width || (unsigned int)colour1_.height() != camera.height) { if ((unsigned int)colour1_.width() != camera.width || (unsigned int)colour1_.height() != camera.height) {
colour1_ = ftl::cuda::TextureObject<uchar4>(camera.width, camera.height); colour1_ = ftl::cuda::TextureObject<uchar4>(camera.width, camera.height);
} }
if ((unsigned int)colour_tmp_.width() != camera.width || (unsigned int)colour_tmp_.height() != camera.height) {
colour_tmp_ = ftl::cuda::TextureObject<float4>(camera.width, camera.height);
}
if ((unsigned int)normal1_.width() != camera.width || (unsigned int)normal1_.height() != camera.height) {
normal1_ = ftl::cuda::TextureObject<float4>(camera.width, camera.height);
}
if ((unsigned int)depth2_.width() != camera.width || (unsigned int)depth2_.height() != camera.height) { if ((unsigned int)depth2_.width() != camera.width || (unsigned int)depth2_.height() != camera.height) {
depth2_ = ftl::cuda::TextureObject<float>(camera.width, camera.height); depth2_ = ftl::cuda::TextureObject<float>(camera.width, camera.height);
} }
...@@ -56,30 +65,31 @@ void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) { ...@@ -56,30 +65,31 @@ void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) {
if (scene_->value("voxels", false)) { if (scene_->value("voxels", false)) {
// TODO:(Nick) Stereo for voxel version // TODO:(Nick) Stereo for voxel version
ftl::cuda::isosurface_point_image(scene_->getHashData(), depth1_, params, stream); ftl::cuda::isosurface_point_image(scene_->getHashData(), depth1_, params, stream);
ftl::cuda::splat_points(depth1_, depth2_, params, stream); //ftl::cuda::splat_points(depth1_, depth2_, params, stream);
ftl::cuda::dibr(depth2_, colour1_, scene_->cameraCount(), params, stream); //ftl::cuda::dibr(depth2_, colour1_, scene_->cameraCount(), params, stream);
src->writeFrames(colour1_, depth2_, stream); src->writeFrames(colour1_, depth2_, stream);
} else { } else {
//ftl::cuda::clear_colour(colour1_, stream);
ftl::cuda::clear_depth(depth1_, stream); ftl::cuda::clear_depth(depth1_, stream);
ftl::cuda::clear_depth(depth3_, stream);
ftl::cuda::clear_depth(depth2_, stream); ftl::cuda::clear_depth(depth2_, stream);
ftl::cuda::dibr(depth1_, colour1_, scene_->cameraCount(), params, stream); ftl::cuda::clear_colour(colour2_, stream);
//ftl::cuda::hole_fill(depth1_, depth2_, params.camera, stream); ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, scene_->cameraCount(), params, stream);
// Splat the depth from first DIBR to expand the points // Step 1: Put all points into virtual view to gather them
ftl::cuda::splat_points(depth1_, depth2_, params, stream); //ftl::cuda::dibr_raw(depth1_, scene_->cameraCount(), params, stream);
// Alternative to above... // Step 2: For each point, use a warp to do MLS and up sample
//ftl::cuda::mls_resample(depth1_, colour1_, depth2_, scene_->getHashParams(), scene_->cameraCount(), params, stream); //ftl::cuda::mls_render_depth(depth1_, depth3_, params, scene_->cameraCount(), stream);
// Use reverse sampling to obtain more colour details
// Should use nearest neighbor point depths to verify which camera to use
//ftl::cuda::dibr(depth2_, colour1_, scene_->cameraCount(), params, stream);
if (src->getChannel() == ftl::rgbd::kChanDepth) { if (src->getChannel() == ftl::rgbd::kChanDepth) {
ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); //ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream);
src->writeFrames(colour1_, depth2_, stream); if (src->value("splatting", false)) {
//ftl::cuda::splat_points(depth1_, colour1_, normal1_, depth2_, colour2_, params, stream);
src->writeFrames(colour2_, depth2_, stream);
} else {
ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream);
src->writeFrames(colour1_, depth2_, stream);
}
} else if (src->getChannel() == ftl::rgbd::kChanRight) { } else if (src->getChannel() == ftl::rgbd::kChanRight) {
// Adjust pose to right eye position // Adjust pose to right eye position
Eigen::Affine3f transform(Eigen::Translation3f(camera.baseline,0.0f,0.0f)); Eigen::Affine3f transform(Eigen::Translation3f(camera.baseline,0.0f,0.0f));
...@@ -88,10 +98,16 @@ void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) { ...@@ -88,10 +98,16 @@ void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) {
params.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix); params.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix);
ftl::cuda::clear_depth(depth1_, stream); ftl::cuda::clear_depth(depth1_, stream);
ftl::cuda::dibr(depth1_, colour2_, scene_->cameraCount(), params, stream); ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, scene_->cameraCount(), params, stream);
src->writeFrames(colour1_, colour2_, stream); src->writeFrames(colour1_, colour2_, stream);
} else { } else {
src->writeFrames(colour1_, depth2_, stream); if (src->value("splatting", false)) {
//ftl::cuda::splat_points(depth1_, colour1_, normal1_, depth2_, colour2_, params, stream);
src->writeFrames(colour2_, depth2_, stream);
} else {
ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream);
src->writeFrames(colour1_, depth2_, stream);
}
} }
} }
......
...@@ -33,9 +33,12 @@ class Splatter { ...@@ -33,9 +33,12 @@ class Splatter {
private: private:
int device_; int device_;
ftl::cuda::TextureObject<int> depth1_; ftl::cuda::TextureObject<int> depth1_;
ftl::cuda::TextureObject<int> depth3_;
ftl::cuda::TextureObject<uchar4> colour1_; ftl::cuda::TextureObject<uchar4> colour1_;
ftl::cuda::TextureObject<float4> colour_tmp_;
ftl::cuda::TextureObject<float> depth2_; ftl::cuda::TextureObject<float> depth2_;
ftl::cuda::TextureObject<uchar4> colour2_; ftl::cuda::TextureObject<uchar4> colour2_;
ftl::cuda::TextureObject<float4> normal1_;
SplatParams params_; SplatParams params_;
ftl::voxhash::SceneRep *scene_; ftl::voxhash::SceneRep *scene_;
}; };
......
...@@ -10,6 +10,80 @@ ...@@ -10,6 +10,80 @@
namespace ftl { namespace ftl {
namespace cuda { namespace cuda {
__device__ inline bool intersectPlane(const float3 &n, const float3 &p0, const float3 &l0, const float3 &l, float &t) {
// assuming vectors are all normalized
float denom = dot(n, l);
if (denom > 1e-6) {
t = dot(p0 - l0, n) / denom;
return (t >= 0);
}
return false;
}
__device__ inline bool intersectPlane(const float3 &n, const float3 &p0, const float3 &l, float &t) {
// assuming vectors are all normalized
float denom = dot(n, l);
if (denom > 1e-6) {
t = dot(p0, n) / denom;
return (t >= 0);
}
return false;
}
__device__ inline bool intersectDisk(const float3 &n, const float3 &p0, float radius, const float3 &l0, const float3 &l) {
float t = 0;
if (intersectPlane(n, p0, l0, l, t)) {
float3 p = l0 + l * t;
float3 v = p - p0;
float d2 = dot(v, v);
return (sqrt(d2) <= radius);
// or you can use the following optimisation (and precompute radius^2)
// return d2 <= radius2; // where radius2 = radius * radius
}
return false;
}
/**
* Get the radius of a ray intersection with a disk.
* @param n Normalised normal of disk.
* @param p0 Centre of disk in camera space
* @param l Normalised ray direction in camera space
* @return Radius from centre of disk where intersection occurred.
*/
__device__ inline float intersectDistance(const float3 &n, const float3 &p0, const float3 &l0, const float3 &l) {
float t = 0;
if (intersectPlane(n, p0, l0, l, t)) {
const float3 p = l0 + l * t;
const float3 v = p - p0;
const float d2 = dot(v, v);
return sqrt(d2);
// or you can use the following optimisation (and precompute radius^2)
// return d2 <= radius2; // where radius2 = radius * radius
}
return PINF;
}
/**
* Get the radius of a ray intersection with a disk.
* @param n Normalised normal of disk.
* @param p0 Centre of disk in camera space
* @param l Normalised ray direction in camera space
* @return Radius from centre of disk where intersection occurred.
*/
__device__ inline float intersectDistance(const float3 &n, const float3 &p0, const float3 &l) {
float t = 0;
if (intersectPlane(n, p0, l, t)) {
const float3 p = l * t;
const float3 v = p - p0;
const float d2 = dot(v, v);
return sqrt(d2);
// or you can use the following optimisation (and precompute radius^2)
// return d2 <= radius2; // where radius2 = radius * radius
}
return PINF;
}
/** /**
* NOTE: Not strictly isosurface currently since it includes the internals * NOTE: Not strictly isosurface currently since it includes the internals
* of objects up to at most truncation depth. * of objects up to at most truncation depth.
...@@ -26,11 +100,22 @@ void isosurface_point_image(const ftl::voxhash::HashData& hashData, ...@@ -26,11 +100,22 @@ void isosurface_point_image(const ftl::voxhash::HashData& hashData,
// TODO: isosurface_point_cloud // TODO: isosurface_point_cloud
void splat_points(const ftl::cuda::TextureObject<int> &depth_in, void splat_points(const ftl::cuda::TextureObject<int> &depth_in,
const ftl::cuda::TextureObject<uchar4> &colour_in,
const ftl::cuda::TextureObject<float4> &normal_in,
const ftl::cuda::TextureObject<float> &depth_out, const ftl::cuda::TextureObject<float> &depth_out,
const ftl::render::SplatParams &params, cudaStream_t stream); const ftl::cuda::TextureObject<uchar4> &colour_out, const ftl::render::SplatParams &params, cudaStream_t stream);
void dibr(const ftl::cuda::TextureObject<int> &depth_out, void dibr(const ftl::cuda::TextureObject<int> &depth_out,
const ftl::cuda::TextureObject<uchar4> &colour_out, int numcams, const ftl::cuda::TextureObject<uchar4> &colour_out,
const ftl::cuda::TextureObject<float4> &normal_out,
const ftl::cuda::TextureObject<float> &confidence_out,
const ftl::cuda::TextureObject<float4> &tmp_colour, int numcams,
const ftl::render::SplatParams &params, cudaStream_t stream);
/**
* Directly render input depth maps to virtual view with clipping.
*/
void dibr_raw(const ftl::cuda::TextureObject<int> &depth_out, int numcams,
const ftl::render::SplatParams &params, cudaStream_t stream); const ftl::render::SplatParams &params, cudaStream_t stream);
void dibr(const ftl::cuda::TextureObject<float> &depth_out, void dibr(const ftl::cuda::TextureObject<float> &depth_out,
......
#include <ftl/voxel_hash.hpp> #include <ftl/voxel_hash.hpp>
#include <loguru.hpp>
using ftl::voxhash::HashData; using ftl::voxhash::HashData;
using ftl::voxhash::HashParams; using ftl::voxhash::HashParams;
......
...@@ -258,143 +258,4 @@ void ftl::cuda::isosurface_point_image(const ftl::voxhash::HashData& hashData, ...@@ -258,143 +258,4 @@ void ftl::cuda::isosurface_point_image(const ftl::voxhash::HashData& hashData,
#endif #endif
} }
// ---- Pass 2: Expand the point splats ----------------------------------------
#define SPLAT_RADIUS 7
#define SPLAT_BOUNDS (2*SPLAT_RADIUS+T_PER_BLOCK+1)
#define SPLAT_BUFFER_SIZE (SPLAT_BOUNDS*SPLAT_BOUNDS)
#define MAX_VALID 100
__device__ float distance2(float3 a, float3 b) {
const float x = a.x-b.x;
const float y = a.y-b.y;
const float z = a.z-b.z;
return x*x+y*y+z*z;
}
__global__ void splatting_kernel(
TextureObject<int> depth_in,
TextureObject<float> depth_out, SplatParams params) {
// Read an NxN region and
// - interpolate a depth value for this pixel
// - interpolate an rgb value for this pixel
// Must respect depth discontinuities.
// How much influence a given neighbour has depends on its depth value
__shared__ float3 positions[SPLAT_BUFFER_SIZE];
const int i = threadIdx.y*blockDim.y + threadIdx.x;
const int bx = blockIdx.x*blockDim.x;
const int by = blockIdx.y*blockDim.y;
const int x = bx + threadIdx.x;
const int y = by + threadIdx.y;
// const float camMinDepth = params.camera.m_sensorDepthWorldMin;
// const float camMaxDepth = params.camera.m_sensorDepthWorldMax;
for (int j=i; j<SPLAT_BUFFER_SIZE; j+=T_PER_BLOCK) {
const unsigned int sx = (j % SPLAT_BOUNDS)+bx-SPLAT_RADIUS;
const unsigned int sy = (j / SPLAT_BOUNDS)+by-SPLAT_RADIUS;
if (sx >= depth_in.width() || sy >= depth_in.height()) {
positions[j] = make_float3(1000.0f,1000.0f, 1000.0f);
} else {
positions[j] = params.camera.kinectDepthToSkeleton(sx, sy, (float)depth_in.tex2D((int)sx,(int)sy) / 1000.0f);
}
}
__syncthreads();
if (x >= depth_in.width() || y >= depth_in.height()) return;
const float voxelSquared = params.voxelSize*params.voxelSize;
float mindepth = 1000.0f;
int minidx = -1;
// float3 minpos;
//float3 validPos[MAX_VALID];
int validIndices[MAX_VALID];
int validix = 0;
for (int v=-SPLAT_RADIUS; v<=SPLAT_RADIUS; ++v) {
for (int u=-SPLAT_RADIUS; u<=SPLAT_RADIUS; ++u) {
//const int idx = (threadIdx.y+v)*SPLAT_BOUNDS+threadIdx.x+u;
const int idx = (threadIdx.y+v+SPLAT_RADIUS)*SPLAT_BOUNDS+threadIdx.x+u+SPLAT_RADIUS;
float3 posp = positions[idx];
const float d = posp.z;
//if (d < camMinDepth || d > camMaxDepth) continue;
float3 pos = params.camera.kinectDepthToSkeleton(x, y, d);
float dist = distance2(pos, posp);
if (dist < voxelSquared) {
// Valid so check for minimum
//validPos[validix] = pos;
//validIndices[validix++] = idx;
if (d < mindepth) {
mindepth = d;
minidx = idx;
// minpos = pos;
}
}
}
}
if (minidx == -1) {
depth_out(x,y) = 0.0f;
//colour_out(x,y) = make_uchar4(76,76,82,255);
return;
}
//float3 colour = make_float3(0.0f, 0.0f, 0.0f);
float depth = 0.0f;
float contrib = 0.0f;
float3 pos = params.camera.kinectDepthToSkeleton(x, y, mindepth); // TODO:(Nick) Mindepth assumption is poor choice.
//for (int j=0; j<validix; ++j) {
const int idx = minidx; //validIndices[j];
float3 posp = positions[idx];
//float3 pos = params.camera.kinectDepthToSkeleton(x, y, posp.z);
float3 delta = (posp - pos) / 2*params.voxelSize;
float dist = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
// It contributes to pixel
if (dist < 1.0f && fabs(posp.z - mindepth) < 2*params.voxelSize) {
const unsigned int sx = (idx % SPLAT_BOUNDS)+bx-SPLAT_RADIUS;
const unsigned int sy = (idx / SPLAT_BOUNDS)+by-SPLAT_RADIUS;
// Fast and simple trilinear interpolation
float c = fabs((1.0f - delta.x) * (1.0f - delta.y) * (1.0f - delta.z));
//uchar4 col = colour_in.tex2D((int)sx, (int)sy);
//colour.x += col.x*c;
//colour.y += col.y*c;
//colour.z += col.z*c;
contrib += c;
depth += posp.z * c;
}
//}
// Normalise
//colour.x /= contrib;
//colour.y /= contrib;
//colour.z /= contrib;
depth /= contrib;
depth_out(x,y) = depth;
//colour_out(x,y) = make_uchar4(colour.x, colour.y, colour.z, 255);
}
void ftl::cuda::splat_points(const TextureObject<int> &depth_in,
const TextureObject<float> &depth_out, const SplatParams &params, cudaStream_t stream)
{
const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
splatting_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, params);
cudaSafeCall( cudaGetLastError() );
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
#endif
}
...@@ -68,7 +68,9 @@ class Configurable { ...@@ -68,7 +68,9 @@ class Configurable {
template <typename T> template <typename T>
T value(const std::string &name, T def) { T value(const std::string &name, T def) {
auto r = get<T>(name); auto r = get<T>(name);
return (r) ? *r : def; if (r) return *r;
(*config_)[name] = def;
return def;
} }
/** /**
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment