Skip to content
Snippets Groups Projects
Commit 6fde05a5 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

WIP MLS partially working

parent 60a420bc
No related branches found
No related tags found
1 merge request!71Implements #130 for optional MLS merging
...@@ -73,6 +73,7 @@ struct __align__(16) HashEntry ...@@ -73,6 +73,7 @@ struct __align__(16) HashEntry
{ {
HashEntryHead head; HashEntryHead head;
uint voxels[16]; // 512 bits, 1 bit per voxel uint voxels[16]; // 512 bits, 1 bit per voxel
uint validity[16]; // Is the voxel valid, 512 bit
/*__device__ void operator=(const struct HashEntry& e) { /*__device__ void operator=(const struct HashEntry& e) {
((long long*)this)[0] = ((const long long*)&e)[0]; ((long long*)this)[0] = ((const long long*)&e)[0];
......
...@@ -47,10 +47,10 @@ __device__ bool colordiff(const uchar4 &pa, const uchar3 &pb, float epsilon) { ...@@ -47,10 +47,10 @@ __device__ bool colordiff(const uchar4 &pa, const uchar3 &pb, float epsilon) {
*/ */
__device__ float spatialWeighting(float r) { __device__ float spatialWeighting(float r) {
const float h = c_hashParams.m_spatialSmoothing; const float h = c_hashParams.m_spatialSmoothing;
if (r >= h) return 0.0f;
float rh = r / h; float rh = r / h;
rh = 1.0f - rh*rh; rh = 1.0f - rh*rh;
rh = rh*rh*rh*rh; return rh*rh*rh*rh;
return (rh < h) ? rh : 0.0f;
} }
...@@ -103,6 +103,10 @@ __global__ void integrateDepthMapsKernel(HashData hashData, HashParams hashParam ...@@ -103,6 +103,10 @@ __global__ void integrateDepthMapsKernel(HashData hashData, HashParams hashParam
//printf("screen pos %d\n", color.x); //printf("screen pos %d\n", color.x);
//return; //return;
// TODO:(Nick) Accumulate weighted positions
// TODO:(Nick) Accumulate weighted normals
// TODO:(Nick) Accumulate weights
// Depth is within accepted max distance from camera // Depth is within accepted max distance from camera
if (depth > 0.01f && depth < hashParams.m_maxIntegrationDistance) { // valid depth and color (Nick: removed colour check) if (depth > 0.01f && depth < hashParams.m_maxIntegrationDistance) { // valid depth and color (Nick: removed colour check)
//camdepths[count] = depth; //camdepths[count] = depth;
...@@ -181,6 +185,102 @@ __global__ void integrateDepthMapsKernel(HashData hashData, HashParams hashParam ...@@ -181,6 +185,102 @@ __global__ void integrateDepthMapsKernel(HashData hashData, HashParams hashParam
} }
} }
#define WINDOW_RADIUS 7
__global__ void integrateMLSKernel(HashData hashData, HashParams hashParams, int numcams) {
__shared__ uint voxels[16];
__shared__ uint valid[16];
const uint i = threadIdx.x; //inside of an SDF block
const int3 po = make_int3(hashData.delinearizeVoxelIndex(i));
// Stride over all allocated blocks
for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) {
//TODO check if we should load this in shared memory
//HashEntryHead entry = hashData.d_hashCompactified[bi]->head;
int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(make_int3(hashData.d_hashCompactified[bi]->head.posXYZ));
//uint idx = entry.offset + i;
int3 pi = pi_base + po;
float3 pfb = hashData.virtualVoxelPosToWorld(pi);
int count = 0;
//float camdepths[MAX_CAMERAS];
Voxel oldVoxel; // = hashData.d_SDFBlocks[idx];
hashData.deleteVoxel(oldVoxel);
float3 wpos = make_float3(0.0f);
float3 wnorm = make_float3(0.0f);
float weights = 0.0f;
for (uint cam=0; cam<numcams; ++cam) {
const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
const uint height = camera.params.m_imageHeight;
const uint width = camera.params.m_imageWidth;
float3 pf = camera.poseInverse * pfb;
uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf));
for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) {
for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) {
// For this voxel in hash, get its screen position and check it is on screen
if (screenPos.x+u < width && screenPos.y+v < height) { //on screen
float depth = tex2D<float>(camera.depth, screenPos.x+u, screenPos.y+v);
float4 normal = tex2D<float4>(camera.normal, screenPos.x+u, screenPos.y+v);
float3 worldPos = camera.pose * camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth);
float weight = spatialWeighting(length(pfb - worldPos));
wpos += weight*worldPos;
wnorm += weight*make_float3(normal);
weights += weight;
}
}
}
}
wpos /= weights;
wnorm /= weights;
float sdf = length(pfb - wpos); //wnorm.x * (pfb.x - wpos.x) + wnorm.y * (pfb.y - wpos.y) + wnorm.z * (pfb.z - wpos.z);
//printf("WEIGHTS: %f\n", weights);
//if (weights < 0.00001f) sdf = 0.0f;
// Calculate voxel sign values across a warp
int warpNum = i / WARP_SIZE;
uint solid_ballot = __ballot_sync(0xFFFFFFFF, (sdf >= 0.0f) ? 0 : 1);
uint valid_ballot = __ballot_sync(0xFFFFFFFF, (weights >= 2.0f) ? 1 : 0);
// Aggregate each warp result into voxel mask
if (i % WARP_SIZE == 0) {
voxels[warpNum] = solid_ballot;
valid[warpNum] = valid_ballot;
}
__syncthreads();
// Work out if block is occupied or not and save voxel masks
// TODO:(Nick) Is it faster to do this in a separate garbage kernel?
if (i < 16) {
const uint v = voxels[i];
hashData.d_hashCompactified[bi]->voxels[i] = v;
hashData.d_hashCompactified[bi]->validity[i] = valid[i];
const uint mask = 0x0000FFFF;
uint b1 = __ballot_sync(mask, v == 0xFFFFFFFF);
uint b2 = __ballot_sync(mask, v == 0);
if (i == 0) {
if (b1 != mask && b2 != mask) hashData.d_hashCompactified[bi]->head.flags |= ftl::voxhash::kFlagSurface;
else hashData.d_hashCompactified[bi]->head.flags &= ~ftl::voxhash::kFlagSurface;
}
}
}
}
void ftl::cuda::integrateDepthMaps(HashData& hashData, const HashParams& hashParams, int numcams, cudaStream_t stream) { void ftl::cuda::integrateDepthMaps(HashData& hashData, const HashParams& hashParams, int numcams, cudaStream_t stream) {
...@@ -189,7 +289,7 @@ const dim3 gridSize(NUM_CUDA_BLOCKS, 1); ...@@ -189,7 +289,7 @@ const dim3 gridSize(NUM_CUDA_BLOCKS, 1);
const dim3 blockSize(threadsPerBlock, 1); const dim3 blockSize(threadsPerBlock, 1);
//if (hashParams.m_numOccupiedBlocks > 0) { //this guard is important if there is no depth in the current frame (i.e., no blocks were allocated) //if (hashParams.m_numOccupiedBlocks > 0) { //this guard is important if there is no depth in the current frame (i.e., no blocks were allocated)
integrateDepthMapsKernel << <gridSize, blockSize, 0, stream >> >(hashData, hashParams, numcams); integrateMLSKernel << <gridSize, blockSize, 0, stream >> >(hashData, hashParams, numcams);
//} //}
//cudaSafeCall( cudaGetLastError() ); //cudaSafeCall( cudaGetLastError() );
......
...@@ -79,6 +79,7 @@ __device__ inline bool getVoxel(uint *voxels, int ix) { ...@@ -79,6 +79,7 @@ __device__ inline bool getVoxel(uint *voxels, int ix) {
__global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, TextureObject<uint> depth, SplatParams params) { __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, TextureObject<uint> depth, SplatParams params) {
// TODO:(Nick) Reduce bank conflicts by aligning these // TODO:(Nick) Reduce bank conflicts by aligning these
__shared__ uint voxels[16]; __shared__ uint voxels[16];
__shared__ uint valid[16];
__shared__ ftl::voxhash::HashEntryHead block; __shared__ ftl::voxhash::HashEntryHead block;
// Stride over all allocated blocks // Stride over all allocated blocks
...@@ -88,7 +89,10 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture ...@@ -88,7 +89,10 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture
const uint i = threadIdx.x; //inside of an SDF block const uint i = threadIdx.x; //inside of an SDF block
if (i == 0) block = hashData.d_hashCompactified[bi]->head; if (i == 0) block = hashData.d_hashCompactified[bi]->head;
if (i < 16) voxels[i] = hashData.d_hashCompactified[bi]->voxels[i]; if (i < 16) {
voxels[i] = hashData.d_hashCompactified[bi]->voxels[i];
valid[i] = hashData.d_hashCompactified[bi]->validity[i];
}
// Make sure all hash entries are cached // Make sure all hash entries are cached
__syncthreads(); __syncthreads();
...@@ -118,7 +122,7 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture ...@@ -118,7 +122,7 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture
bool is_surface = false; //((params.m_flags & ftl::render::kShowBlockBorders) && edgeX + edgeY + edgeZ >= 2); bool is_surface = false; //((params.m_flags & ftl::render::kShowBlockBorders) && edgeX + edgeY + edgeZ >= 2);
//if (is_surface) color = make_uchar4(255,(vp.x == 0 && vp.y == 0 && vp.z == 0) ? 255 : 0,0,255); //if (is_surface) color = make_uchar4(255,(vp.x == 0 && vp.y == 0 && vp.z == 0) ? 255 : 0,0,255);
if (!is_surface && v) continue; if (!getVoxel(valid, i)) continue;
//if (vp.z == 7) voxels[j].color = make_uchar3(0,255,(voxels[j].sdf < 0.0f) ? 255 : 0); //if (vp.z == 7) voxels[j].color = make_uchar3(0,255,(voxels[j].sdf < 0.0f) ? 255 : 0);
...@@ -136,7 +140,7 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture ...@@ -136,7 +140,7 @@ __global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, Texture
//if (uvi.x == 8 || uvi.z == 8 || uvi.y == 8) continue; //if (uvi.x == 8 || uvi.z == 8 || uvi.y == 8) continue;
const bool vox = getVoxel(voxels, hashData.linearizeVoxelPos(uvi)); const bool vox = getVoxel(voxels, hashData.linearizeVoxelPos(uvi));
if (vox) { if (getVoxel(valid, hashData.linearizeVoxelPos(uvi))) {
is_surface = true; is_surface = true;
// Should break but is slower? // Should break but is slower?
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment