Skip to content
Snippets Groups Projects
Commit fc934762 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Resolves #63 bucket removal

parent 7668ea66
Branches
Tags
No related merge requests found
...@@ -11,6 +11,8 @@ set(REPSRC ...@@ -11,6 +11,8 @@ set(REPSRC
src/integrators.cu src/integrators.cu
src/ray_cast_sdf.cu src/ray_cast_sdf.cu
src/camera_util.cu src/camera_util.cu
src/voxel_hash.cu
src/voxel_hash.cpp
src/ray_cast_sdf.cpp src/ray_cast_sdf.cpp
src/registration.cpp src/registration.cpp
src/virtual_source.cpp src/virtual_source.cpp
......
...@@ -662,6 +662,11 @@ inline __host__ __device__ void operator/=(int3 &a, int s) ...@@ -662,6 +662,11 @@ inline __host__ __device__ void operator/=(int3 &a, int s)
a.x /= s; a.y /= s; a.z /= s; a.x /= s; a.y /= s; a.z /= s;
} }
// Comparison
inline __host__ __device__ bool operator==(const int3 &a, const int3 &b) {
return a.x == b.x && a.y == b.y && a.z == b.z;
}
// clamp // clamp
inline __device__ __host__ int clamp(int f, int a, int b) inline __device__ __host__ int clamp(int f, int a, int b)
{ {
......
...@@ -36,9 +36,7 @@ typedef signed char schar; ...@@ -36,9 +36,7 @@ typedef signed char schar;
#include <ftl/depth_camera.hpp> #include <ftl/depth_camera.hpp>
#define HANDLE_COLLISIONS
#define SDF_BLOCK_SIZE 8 #define SDF_BLOCK_SIZE 8
#define HASH_BUCKET_SIZE 10
#ifndef MINF #ifndef MINF
#define MINF __int_as_float(0xff800000) #define MINF __int_as_float(0xff800000)
...@@ -109,94 +107,30 @@ struct HashData { ...@@ -109,94 +107,30 @@ struct HashData {
m_bIsOnGPU = false; m_bIsOnGPU = false;
} }
__host__ /**
void allocate(const HashParams& params, bool dataOnGPU = true) { * Create all the data structures, either on GPU or system memory.
m_bIsOnGPU = dataOnGPU; */
if (m_bIsOnGPU) { __host__ void allocate(const HashParams& params, bool dataOnGPU = true);
cudaSafeCall(cudaMalloc(&d_heap, sizeof(unsigned int) * params.m_numSDFBlocks));
cudaSafeCall(cudaMalloc(&d_heapCounter, sizeof(unsigned int)));
cudaSafeCall(cudaMalloc(&d_hash, sizeof(HashEntry)* params.m_hashNumBuckets * params.m_hashBucketSize));
cudaSafeCall(cudaMalloc(&d_hashDecision, sizeof(int)* params.m_hashNumBuckets * params.m_hashBucketSize));
cudaSafeCall(cudaMalloc(&d_hashDecisionPrefix, sizeof(int)* params.m_hashNumBuckets * params.m_hashBucketSize));
cudaSafeCall(cudaMalloc(&d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets * params.m_hashBucketSize));
cudaSafeCall(cudaMalloc(&d_hashCompactifiedCounter, sizeof(int)));
cudaSafeCall(cudaMalloc(&d_SDFBlocks, sizeof(Voxel) * params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize));
cudaSafeCall(cudaMalloc(&d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets));
} else {
d_heap = new unsigned int[params.m_numSDFBlocks];
d_heapCounter = new unsigned int[1];
d_hash = new HashEntry[params.m_hashNumBuckets * params.m_hashBucketSize];
d_hashDecision = new int[params.m_hashNumBuckets * params.m_hashBucketSize];
d_hashDecisionPrefix = new int[params.m_hashNumBuckets * params.m_hashBucketSize];
d_hashCompactified = new HashEntry[params.m_hashNumBuckets * params.m_hashBucketSize];
d_hashCompactifiedCounter = new int[1];
d_SDFBlocks = new Voxel[params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize];
d_hashBucketMutex = new int[params.m_hashNumBuckets];
}
updateParams(params);
}
__host__
void updateParams(const HashParams& params) {
if (m_bIsOnGPU) {
updateConstantHashParams(params);
}
}
__host__
void free() {
if (m_bIsOnGPU) {
cudaSafeCall(cudaFree(d_heap));
cudaSafeCall(cudaFree(d_heapCounter));
cudaSafeCall(cudaFree(d_hash));
cudaSafeCall(cudaFree(d_hashDecision));
cudaSafeCall(cudaFree(d_hashDecisionPrefix));
cudaSafeCall(cudaFree(d_hashCompactified));
cudaSafeCall(cudaFree(d_hashCompactifiedCounter));
cudaSafeCall(cudaFree(d_SDFBlocks));
cudaSafeCall(cudaFree(d_hashBucketMutex));
} else {
if (d_heap) delete[] d_heap;
if (d_heapCounter) delete[] d_heapCounter;
if (d_hash) delete[] d_hash;
if (d_hashDecision) delete[] d_hashDecision;
if (d_hashDecisionPrefix) delete[] d_hashDecisionPrefix;
if (d_hashCompactified) delete[] d_hashCompactified;
if (d_hashCompactifiedCounter) delete[] d_hashCompactifiedCounter;
if (d_SDFBlocks) delete[] d_SDFBlocks;
if (d_hashBucketMutex) delete[] d_hashBucketMutex;
}
d_hash = NULL; __host__ void updateParams(const HashParams& params);
d_heap = NULL;
d_heapCounter = NULL;
d_hashDecision = NULL;
d_hashDecisionPrefix = NULL;
d_hashCompactified = NULL;
d_hashCompactifiedCounter = NULL;
d_SDFBlocks = NULL;
d_hashBucketMutex = NULL;
}
__host__ __host__ void free();
HashData copyToCPU() const {
HashParams params;
HashData hashData; /**
hashData.allocate(params, false); //allocate the data on the CPU * Download entire hash table from GPU to CPU memory.
cudaSafeCall(cudaMemcpy(hashData.d_heap, d_heap, sizeof(unsigned int) * params.m_numSDFBlocks, cudaMemcpyDeviceToHost)); */
cudaSafeCall(cudaMemcpy(hashData.d_heapCounter, d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); __host__ HashData download() const;
cudaSafeCall(cudaMemcpy(hashData.d_hash, d_hash, sizeof(HashEntry)* params.m_hashNumBuckets * params.m_hashBucketSize, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hashDecision, d_hashDecision, sizeof(int)*params.m_hashNumBuckets * params.m_hashBucketSize, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hashDecisionPrefix, d_hashDecisionPrefix, sizeof(int)*params.m_hashNumBuckets * params.m_hashBucketSize, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hashCompactified, d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets * params.m_hashBucketSize, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hashCompactifiedCounter, d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_SDFBlocks, d_SDFBlocks, sizeof(Voxel) * params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hashBucketMutex, d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
return hashData; //TODO MATTHIAS look at this (i.e,. when does memory get destroyed ; if it's in the destructer it would kill everything here /**
} * Upload entire hash table from CPU to GPU memory.
*/
__host__ HashData upload() const;
__host__ size_t getAllocatedBlocks() const;
__host__ size_t getFreeBlocks() const;
__host__ size_t getCollisionCount() const;
...@@ -218,8 +152,8 @@ struct HashData { ...@@ -218,8 +152,8 @@ struct HashData {
const int p1 = 19349669; const int p1 = 19349669;
const int p2 = 83492791; const int p2 = 83492791;
int res = ((virtualVoxelPos.x * p0) ^ (virtualVoxelPos.y * p1) ^ (virtualVoxelPos.z * p2)) % c_hashParams.m_hashNumBuckets; int res = ((virtualVoxelPos.x * p0) ^ (virtualVoxelPos.y * p1) ^ (virtualVoxelPos.z * p2)) % params().m_hashNumBuckets;
if (res < 0) res += c_hashParams.m_hashNumBuckets; if (res < 0) res += params().m_hashNumBuckets;
return (uint)res; return (uint)res;
} }
...@@ -261,26 +195,26 @@ struct HashData { ...@@ -261,26 +195,26 @@ struct HashData {
out.color.z = (v0.weight > 0) ? (uchar)(c0.z * factor0 + c1.z * factor1) : c1.z;*/ out.color.z = (v0.weight > 0) ? (uchar)(c0.z * factor0 + c1.z * factor1) : c1.z;*/
out.sdf = (v0.sdf * (float)v0.weight + v1.sdf * (float)v1.weight) / ((float)v0.weight + (float)v1.weight); out.sdf = (v0.sdf * (float)v0.weight + v1.sdf * (float)v1.weight) / ((float)v0.weight + (float)v1.weight);
out.weight = min(c_hashParams.m_integrationWeightMax, (unsigned int)v0.weight + (unsigned int)v1.weight); out.weight = min(params().m_integrationWeightMax, (unsigned int)v0.weight + (unsigned int)v1.weight);
} }
//! returns the truncation of the SDF for a given distance value //! returns the truncation of the SDF for a given distance value
__device__ __device__
float getTruncation(float z) const { float getTruncation(float z) const {
return c_hashParams.m_truncation + c_hashParams.m_truncScale * z; return params().m_truncation + params().m_truncScale * z;
} }
__device__ __device__
float3 worldToVirtualVoxelPosFloat(const float3& pos) const { float3 worldToVirtualVoxelPosFloat(const float3& pos) const {
return pos / c_hashParams.m_virtualVoxelSize; return pos / params().m_virtualVoxelSize;
} }
__device__ __device__
int3 worldToVirtualVoxelPos(const float3& pos) const { int3 worldToVirtualVoxelPos(const float3& pos) const {
//const float3 p = pos*g_VirtualVoxelResolutionScalar; //const float3 p = pos*g_VirtualVoxelResolutionScalar;
const float3 p = pos / c_hashParams.m_virtualVoxelSize; const float3 p = pos / params().m_virtualVoxelSize;
return make_int3(p+make_float3(sign(p))*0.5f); return make_int3(p+make_float3(sign(p))*0.5f);
} }
...@@ -304,7 +238,7 @@ struct HashData { ...@@ -304,7 +238,7 @@ struct HashData {
__device__ __device__
float3 virtualVoxelPosToWorld(const int3& pos) const { float3 virtualVoxelPosToWorld(const int3& pos) const {
return make_float3(pos)*c_hashParams.m_virtualVoxelSize; return make_float3(pos)*params().m_virtualVoxelSize;
} }
__device__ __device__
...@@ -438,98 +372,15 @@ struct HashData { ...@@ -438,98 +372,15 @@ struct HashData {
//! returns the hash entry for a given sdf block id; if there was no hash entry the returned entry will have a ptr with FREE_ENTRY set //! returns the hash entry for a given sdf block id; if there was no hash entry the returned entry will have a ptr with FREE_ENTRY set
__device__ __device__
HashEntry getHashEntryForSDFBlockPos(const int3& sdfBlock) const HashEntry getHashEntryForSDFBlockPos(const int3& sdfBlock) const;
{
uint h = computeHashPos(sdfBlock); //hash bucket
uint hp = h * HASH_BUCKET_SIZE; //hash position
HashEntry entry;
entry.pos = sdfBlock;
entry.offset = 0;
entry.ptr = FREE_ENTRY;
for (uint j = 0; j < HASH_BUCKET_SIZE; j++) {
uint i = j + hp;
HashEntry curr = d_hash[i];
if (curr.pos.x == entry.pos.x && curr.pos.y == entry.pos.y && curr.pos.z == entry.pos.z && curr.ptr != FREE_ENTRY) {
return curr;
}
}
#ifdef HANDLE_COLLISIONS
const uint idxLastEntryInBucket = (h+1)*HASH_BUCKET_SIZE - 1;
int i = idxLastEntryInBucket; //start with the last entry of the current bucket
HashEntry curr;
//traverse list until end: memorize idx at list end and memorize offset from last element of bucket to list end
unsigned int maxIter = 0;
uint g_MaxLoopIterCount = c_hashParams.m_hashMaxCollisionLinkedListSize;
#pragma unroll 1
while (maxIter < g_MaxLoopIterCount) {
curr = d_hash[i];
if (curr.pos.x == entry.pos.x && curr.pos.y == entry.pos.y && curr.pos.z == entry.pos.z && curr.ptr != FREE_ENTRY) {
return curr;
}
if (curr.offset == 0) { //we have found the end of the list
break;
}
i = idxLastEntryInBucket + curr.offset; //go to next element in the list
i %= (HASH_BUCKET_SIZE * c_hashParams.m_hashNumBuckets); //check for overflow
maxIter++;
}
#endif
return entry;
}
//for histogram (no collision traversal) //for histogram (no collision traversal)
__device__ __device__
unsigned int getNumHashEntriesPerBucket(unsigned int bucketID) { unsigned int getNumHashEntriesPerBucket(unsigned int bucketID);
unsigned int h = 0;
for (uint i = 0; i < HASH_BUCKET_SIZE; i++) {
if (d_hash[bucketID*HASH_BUCKET_SIZE+i].ptr != FREE_ENTRY) {
h++;
}
}
return h;
}
//for histogram (collisions traversal only) //for histogram (collisions traversal only)
__device__ __device__
unsigned int getNumHashLinkedList(unsigned int bucketID) { unsigned int getNumHashLinkedList(unsigned int bucketID);
unsigned int listLen = 0;
#ifdef HANDLE_COLLISIONS
const uint idxLastEntryInBucket = (bucketID+1)*HASH_BUCKET_SIZE - 1;
unsigned int i = idxLastEntryInBucket; //start with the last entry of the current bucket
//int offset = 0;
HashEntry curr; curr.offset = 0;
//traverse list until end: memorize idx at list end and memorize offset from last element of bucket to list end
unsigned int maxIter = 0;
uint g_MaxLoopIterCount = c_hashParams.m_hashMaxCollisionLinkedListSize;
#pragma unroll 1
while (maxIter < g_MaxLoopIterCount) {
//offset = curr.offset;
//curr = getHashEntry(g_Hash, i);
curr = d_hash[i];
if (curr.offset == 0) { //we have found the end of the list
break;
}
i = idxLastEntryInBucket + curr.offset; //go to next element in the list
i %= (HASH_BUCKET_SIZE * c_hashParams.m_hashNumBuckets); //check for overflow
listLen++;
maxIter++;
}
#endif
return listLen;
}
__device__ __device__
...@@ -547,287 +398,15 @@ struct HashData { ...@@ -547,287 +398,15 @@ struct HashData {
//pos in SDF block coordinates //pos in SDF block coordinates
__device__ __device__
void allocBlock(const int3& pos, const uchar frame) { void allocBlock(const int3& pos, const uchar frame);
uint h = computeHashPos(pos); //hash bucket
uint hp = h * HASH_BUCKET_SIZE; //hash position
int firstEmpty = -1;
for (uint j = 0; j < HASH_BUCKET_SIZE; j++) {
uint i = j + hp;
HashEntry& curr = d_hash[i];
//in that case the SDF-block is already allocated and corresponds to the current position -> exit thread
if (curr.pos.x == pos.x && curr.pos.y == pos.y && curr.pos.z == pos.z && curr.ptr != FREE_ENTRY) {
//curr.flags = frame; // Flag block as valid in this frame (Nick)
return;
}
//store the first FREE_ENTRY hash entry
if (firstEmpty == -1 && curr.ptr == FREE_ENTRY) {
firstEmpty = i;
}
}
#ifdef HANDLE_COLLISIONS
//updated variables as after the loop
const uint idxLastEntryInBucket = (h+1)*HASH_BUCKET_SIZE - 1; //get last index of bucket
uint i = idxLastEntryInBucket; //start with the last entry of the current bucket
//int offset = 0;
HashEntry curr; curr.offset = 0;
//traverse list until end: memorize idx at list end and memorize offset from last element of bucket to list end
//int k = 0;
unsigned int maxIter = 0;
uint g_MaxLoopIterCount = c_hashParams.m_hashMaxCollisionLinkedListSize;
#pragma unroll 1
while (maxIter < g_MaxLoopIterCount) {
//offset = curr.offset;
curr = d_hash[i]; //TODO MATTHIAS do by reference
if (curr.pos.x == pos.x && curr.pos.y == pos.y && curr.pos.z == pos.z && curr.ptr != FREE_ENTRY) {
//curr.flags = frame; // Flag block as valid in this frame (Nick)
return;
}
if (curr.offset == 0) { //we have found the end of the list
break;
}
i = idxLastEntryInBucket + curr.offset; //go to next element in the list
i %= (HASH_BUCKET_SIZE * c_hashParams.m_hashNumBuckets); //check for overflow
maxIter++;
}
#endif
if (firstEmpty != -1) { //if there is an empty entry and we haven't allocated the current entry before
//int prevValue = 0;
//InterlockedExchange(d_hashBucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket
int prevValue = atomicExch(&d_hashBucketMutex[h], LOCK_ENTRY);
if (prevValue != LOCK_ENTRY) { //only proceed if the bucket has been locked
HashEntry& entry = d_hash[firstEmpty];
entry.pos = pos;
entry.offset = NO_OFFSET;
entry.flags = 0; // Flag block as valid in this frame (Nick)
entry.ptr = consumeHeap() * SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; //memory alloc
}
return;
}
#ifdef HANDLE_COLLISIONS
//if (i != idxLastEntryInBucket) return;
int offset = 0;
//linear search for free entry
maxIter = 0;
#pragma unroll 1
while (maxIter < g_MaxLoopIterCount) {
offset++;
i = (idxLastEntryInBucket + offset) % (HASH_BUCKET_SIZE * c_hashParams.m_hashNumBuckets); //go to next hash element
if ((offset % HASH_BUCKET_SIZE) == 0) continue; //cannot insert into a last bucket element (would conflict with other linked lists)
curr = d_hash[i];
//if (curr.pos.x == pos.x && curr.pos.y == pos.y && curr.pos.z == pos.z && curr.ptr != FREE_ENTRY) {
// return;
//}
if (curr.ptr == FREE_ENTRY) { //this is the first free entry
//int prevValue = 0;
//InterlockedExchange(g_HashBucketMutex[h], LOCK_ENTRY, prevValue); //lock the original hash bucket
int prevValue = atomicExch(&d_hashBucketMutex[h], LOCK_ENTRY);
if (prevValue != LOCK_ENTRY) {
HashEntry lastEntryInBucket = d_hash[idxLastEntryInBucket];
h = i / HASH_BUCKET_SIZE;
//InterlockedExchange(g_HashBucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket where we have found a free entry
prevValue = atomicExch(&d_hashBucketMutex[h], LOCK_ENTRY);
if (prevValue != LOCK_ENTRY) { //only proceed if the bucket has been locked
HashEntry& entry = d_hash[i];
entry.pos = pos;
entry.offset = lastEntryInBucket.offset;
entry.flags = 0; // Flag block as valid in this frame (Nick)
entry.ptr = consumeHeap() * SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; //memory alloc
lastEntryInBucket.offset = offset;
d_hash[idxLastEntryInBucket] = lastEntryInBucket;
//setHashEntry(g_Hash, idxLastEntryInBucket, lastEntryInBucket);
}
}
return; //bucket was already locked
}
maxIter++;
}
#endif
}
//!inserts a hash entry without allocating any memory: used by streaming: TODO MATTHIAS check the atomics in this function //!inserts a hash entry without allocating any memory: used by streaming: TODO MATTHIAS check the atomics in this function
__device__ __device__
bool insertHashEntry(HashEntry entry) bool insertHashEntry(HashEntry entry);
{
uint h = computeHashPos(entry.pos);
uint hp = h * HASH_BUCKET_SIZE;
for (uint j = 0; j < HASH_BUCKET_SIZE; j++) {
uint i = j + hp;
//const HashEntry& curr = d_hash[i];
int prevWeight = 0;
//InterlockedCompareExchange(hash[3*i+2], FREE_ENTRY, LOCK_ENTRY, prevWeight);
prevWeight = atomicCAS(&d_hash[i].ptr, FREE_ENTRY, LOCK_ENTRY);
if (prevWeight == FREE_ENTRY) {
d_hash[i] = entry;
//setHashEntry(hash, i, entry);
return true;
}
}
#ifdef HANDLE_COLLISIONS
//updated variables as after the loop
const uint idxLastEntryInBucket = (h+1)*HASH_BUCKET_SIZE - 1; //get last index of bucket
uint i = idxLastEntryInBucket; //start with the last entry of the current bucket
HashEntry curr;
unsigned int maxIter = 0;
//[allow_uav_condition]
uint g_MaxLoopIterCount = c_hashParams.m_hashMaxCollisionLinkedListSize;
#pragma unroll 1
while (maxIter < g_MaxLoopIterCount) { //traverse list until end // why find the end? we you are inserting at the start !!!
//curr = getHashEntry(hash, i);
curr = d_hash[i]; //TODO MATTHIAS do by reference
if (curr.offset == 0) break; //we have found the end of the list
i = idxLastEntryInBucket + curr.offset; //go to next element in the list
i %= (HASH_BUCKET_SIZE * c_hashParams.m_hashNumBuckets); //check for overflow
maxIter++;
}
maxIter = 0;
int offset = 0;
#pragma unroll 1
while (maxIter < g_MaxLoopIterCount) { //linear search for free entry
offset++;
uint i = (idxLastEntryInBucket + offset) % (HASH_BUCKET_SIZE * c_hashParams.m_hashNumBuckets); //go to next hash element
if ((offset % HASH_BUCKET_SIZE) == 0) continue; //cannot insert into a last bucket element (would conflict with other linked lists)
int prevWeight = 0;
//InterlockedCompareExchange(hash[3*i+2], FREE_ENTRY, LOCK_ENTRY, prevWeight); //check for a free entry
uint* d_hashUI = (uint*)d_hash;
prevWeight = prevWeight = atomicCAS(&d_hashUI[3*idxLastEntryInBucket+1], (uint)FREE_ENTRY, (uint)LOCK_ENTRY);
if (prevWeight == FREE_ENTRY) { //if free entry found set prev->next = curr & curr->next = prev->next
//[allow_uav_condition]
//while(hash[3*idxLastEntryInBucket+2] == LOCK_ENTRY); // expects setHashEntry to set the ptr last, required because pos.z is packed into the same value -> prev->next = curr -> might corrput pos.z
HashEntry lastEntryInBucket = d_hash[idxLastEntryInBucket]; //get prev (= lastEntry in Bucket)
int newOffsetPrev = (offset << 16) | (lastEntryInBucket.pos.z & 0x0000ffff); //prev->next = curr (maintain old z-pos)
int oldOffsetPrev = 0;
//InterlockedExchange(hash[3*idxLastEntryInBucket+1], newOffsetPrev, oldOffsetPrev); //set prev offset atomically
uint* d_hashUI = (uint*)d_hash;
oldOffsetPrev = prevWeight = atomicExch(&d_hashUI[3*idxLastEntryInBucket+1], newOffsetPrev);
entry.offset = oldOffsetPrev >> 16; //remove prev z-pos from old offset
//setHashEntry(hash, i, entry); //sets the current hashEntry with: curr->next = prev->next
d_hash[i] = entry;
return true;
}
maxIter++;
}
#endif
return false;
}
//! deletes a hash entry position for a given sdfBlock index (returns true uppon successful deletion; otherwise returns false) //! deletes a hash entry position for a given sdfBlock index (returns true uppon successful deletion; otherwise returns false)
__device__ __device__
bool deleteHashEntryElement(const int3& sdfBlock) { bool deleteHashEntryElement(const int3& sdfBlock);
uint h = computeHashPos(sdfBlock); //hash bucket
uint hp = h * HASH_BUCKET_SIZE; //hash position
for (uint j = 0; j < HASH_BUCKET_SIZE; j++) {
uint i = j + hp;
const HashEntry& curr = d_hash[i];
if (curr.pos.x == sdfBlock.x && curr.pos.y == sdfBlock.y && curr.pos.z == sdfBlock.z && curr.ptr != FREE_ENTRY) {
#ifndef HANDLE_COLLISIONS
const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE;
appendHeap(curr.ptr / linBlockSize);
//heapAppend.Append(curr.ptr / linBlockSize);
deleteHashEntry(i);
return true;
#endif
#ifdef HANDLE_COLLISIONS
if (curr.offset != 0) { //if there was a pointer set it to the next list element
//int prevValue = 0;
//InterlockedExchange(bucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket
int prevValue = atomicExch(&d_hashBucketMutex[h], LOCK_ENTRY);
if (prevValue == LOCK_ENTRY) return false;
if (prevValue != LOCK_ENTRY) {
const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE;
appendHeap(curr.ptr / linBlockSize);
//heapAppend.Append(curr.ptr / linBlockSize);
int nextIdx = (i + curr.offset) % (HASH_BUCKET_SIZE*c_hashParams.m_hashNumBuckets);
//setHashEntry(hash, i, getHashEntry(hash, nextIdx));
d_hash[i] = d_hash[nextIdx];
deleteHashEntry(nextIdx);
return true;
}
} else {
const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE;
appendHeap(curr.ptr / linBlockSize);
//heapAppend.Append(curr.ptr / linBlockSize);
deleteHashEntry(i);
return true;
}
#endif //HANDLE_COLLSISION
}
}
#ifdef HANDLE_COLLISIONS
const uint idxLastEntryInBucket = (h+1)*HASH_BUCKET_SIZE - 1;
int i = idxLastEntryInBucket;
HashEntry curr;
curr = d_hash[i];
int prevIdx = i;
i = idxLastEntryInBucket + curr.offset; //go to next element in the list
i %= (HASH_BUCKET_SIZE * c_hashParams.m_hashNumBuckets); //check for overflow
unsigned int maxIter = 0;
uint g_MaxLoopIterCount = c_hashParams.m_hashMaxCollisionLinkedListSize;
#pragma unroll 1
while (maxIter < g_MaxLoopIterCount) {
curr = d_hash[i];
//found that dude that we need/want to delete
if (curr.pos.x == sdfBlock.x && curr.pos.y == sdfBlock.y && curr.pos.z == sdfBlock.z && curr.ptr != FREE_ENTRY) {
//int prevValue = 0;
//InterlockedExchange(bucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket
int prevValue = atomicExch(&d_hashBucketMutex[h], LOCK_ENTRY);
if (prevValue == LOCK_ENTRY) return false;
if (prevValue != LOCK_ENTRY) {
const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE;
appendHeap(curr.ptr / linBlockSize);
//heapAppend.Append(curr.ptr / linBlockSize);
deleteHashEntry(i);
HashEntry prev = d_hash[prevIdx];
prev.offset = curr.offset;
//setHashEntry(hash, prevIdx, prev);
d_hash[prevIdx] = prev;
return true;
}
}
if (curr.offset == 0) { //we have found the end of the list
return false; //should actually never happen because we need to find that guy before
}
prevIdx = i;
i = idxLastEntryInBucket + curr.offset; //go to next element in the list
i %= (HASH_BUCKET_SIZE * c_hashParams.m_hashNumBuckets); //check for overflow
maxIter++;
}
#endif // HANDLE_COLLSISION
return false;
}
#endif //CUDACC #endif //CUDACC
......
...@@ -21,8 +21,8 @@ struct __align__(16) HashParams { ...@@ -21,8 +21,8 @@ struct __align__(16) HashParams {
float4x4 m_rigidTransformInverse; float4x4 m_rigidTransformInverse;
unsigned int m_hashNumBuckets; unsigned int m_hashNumBuckets;
unsigned int m_hashBucketSize; unsigned int m_deprecated1;
unsigned int m_hashMaxCollisionLinkedListSize; unsigned int m_deprecated2; //m_hashMaxCollisionLinkedListSize;
unsigned int m_numSDFBlocks; unsigned int m_numSDFBlocks;
int m_SDFBlockSize; int m_SDFBlockSize;
......
...@@ -47,16 +47,8 @@ class SceneRep : public ftl::Configurable { ...@@ -47,16 +47,8 @@ class SceneRep : public ftl::Configurable {
// Mark voxels as surfaces // Mark voxels as surfaces
// void isosurface(); // void isosurface();
/**
* Note: lastRigidTransform appears to be the estimated camera pose.
* Note: bitMask can be nullptr if not streaming out voxels from GPU
*/
//void integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask);
void setLastRigidTransform(const Eigen::Matrix4f& lastRigidTransform); void setLastRigidTransform(const Eigen::Matrix4f& lastRigidTransform);
//void setLastRigidTransformAndCompactify(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData);
const Eigen::Matrix4f getLastRigidTransform() const; const Eigen::Matrix4f getLastRigidTransform() const;
......
...@@ -68,7 +68,7 @@ __global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams ...@@ -68,7 +68,7 @@ __global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams
//const HashParams& hashParams = c_hashParams; //const HashParams& hashParams = c_hashParams;
const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
#ifdef COMPACTIFY_HASH_SIMPLE #ifdef COMPACTIFY_HASH_SIMPLE
if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { if (idx < hashParams.m_hashNumBuckets) {
if (hashData.d_hash[idx].ptr != FREE_ENTRY) { if (hashData.d_hash[idx].ptr != FREE_ENTRY) {
if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, camera, hashData.d_hash[idx].pos)) if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, camera, hashData.d_hash[idx].pos))
{ {
...@@ -83,7 +83,7 @@ __global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams ...@@ -83,7 +83,7 @@ __global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams
__syncthreads(); __syncthreads();
int addrLocal = -1; int addrLocal = -1;
if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { if (idx < hashParams.m_hashNumBuckets) {
if (hashData.d_hash[idx].ptr != FREE_ENTRY) { if (hashData.d_hash[idx].ptr != FREE_ENTRY) {
if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, camera, hashData.d_hash[idx].pos)) if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, camera, hashData.d_hash[idx].pos))
{ {
...@@ -109,7 +109,7 @@ __global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams ...@@ -109,7 +109,7 @@ __global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams
void ftl::cuda::compactifyVisible(HashData& hashData, const HashParams& hashParams, const DepthCameraParams &camera, cudaStream_t stream) { void ftl::cuda::compactifyVisible(HashData& hashData, const HashParams& hashParams, const DepthCameraParams &camera, cudaStream_t stream) {
const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK;
const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); const dim3 gridSize((hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1);
const dim3 blockSize(threadsPerBlock, 1); const dim3 blockSize(threadsPerBlock, 1);
cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int),stream)); cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int),stream));
...@@ -129,7 +129,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData) ...@@ -129,7 +129,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData)
const HashParams& hashParams = c_hashParams; const HashParams& hashParams = c_hashParams;
const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
#ifdef COMPACTIFY_HASH_SIMPLE #ifdef COMPACTIFY_HASH_SIMPLE
if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { if (idx < hashParams.m_hashNumBuckets) {
if (hashData.d_hash[idx].ptr != FREE_ENTRY) { if (hashData.d_hash[idx].ptr != FREE_ENTRY) {
int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1); int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1);
hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; hashData.d_hashCompactified[addr] = hashData.d_hash[idx];
...@@ -141,7 +141,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData) ...@@ -141,7 +141,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData)
__syncthreads(); __syncthreads();
int addrLocal = -1; int addrLocal = -1;
if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { if (idx < hashParams.m_hashNumBuckets) {
if (hashData.d_hash[idx].ptr != FREE_ENTRY) { if (hashData.d_hash[idx].ptr != FREE_ENTRY) {
addrLocal = atomicAdd(&localCounter, 1); addrLocal = atomicAdd(&localCounter, 1);
} }
...@@ -164,7 +164,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData) ...@@ -164,7 +164,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData)
void ftl::cuda::compactifyAllocated(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { void ftl::cuda::compactifyAllocated(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) {
const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK;
const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); const dim3 gridSize((hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1);
const dim3 blockSize(threadsPerBlock, 1); const dim3 blockSize(threadsPerBlock, 1);
cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int), stream)); cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int), stream));
......
...@@ -98,7 +98,7 @@ __global__ void resetHashKernel(HashData hashData) ...@@ -98,7 +98,7 @@ __global__ void resetHashKernel(HashData hashData)
{ {
const HashParams& hashParams = c_hashParams; const HashParams& hashParams = c_hashParams;
const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { if (idx < hashParams.m_hashNumBuckets) {
hashData.deleteHashEntry(hashData.d_hash[idx]); hashData.deleteHashEntry(hashData.d_hash[idx]);
hashData.deleteHashEntry(hashData.d_hashCompactified[idx]); hashData.deleteHashEntry(hashData.d_hashCompactified[idx]);
} }
...@@ -133,7 +133,7 @@ extern "C" void resetCUDA(HashData& hashData, const HashParams& hashParams) ...@@ -133,7 +133,7 @@ extern "C" void resetCUDA(HashData& hashData, const HashParams& hashParams)
{ {
//resetting the hash //resetting the hash
const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); const dim3 gridSize((hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1);
const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1);
resetHashKernel<<<gridSize, blockSize>>>(hashData); resetHashKernel<<<gridSize, blockSize>>>(hashData);
......
#include <ftl/voxel_hash.hpp>
using ftl::voxhash::HashData;
using ftl::voxhash::HashParams;
void HashData::allocate(const HashParams& params, bool dataOnGPU) {
m_bIsOnGPU = dataOnGPU;
if (m_bIsOnGPU) {
cudaSafeCall(cudaMalloc(&d_heap, sizeof(unsigned int) * params.m_numSDFBlocks));
cudaSafeCall(cudaMalloc(&d_heapCounter, sizeof(unsigned int)));
cudaSafeCall(cudaMalloc(&d_hash, sizeof(HashEntry)* params.m_hashNumBuckets));
cudaSafeCall(cudaMalloc(&d_hashDecision, sizeof(int)* params.m_hashNumBuckets));
cudaSafeCall(cudaMalloc(&d_hashDecisionPrefix, sizeof(int)* params.m_hashNumBuckets));
cudaSafeCall(cudaMalloc(&d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets));
cudaSafeCall(cudaMalloc(&d_hashCompactifiedCounter, sizeof(int)));
cudaSafeCall(cudaMalloc(&d_SDFBlocks, sizeof(Voxel) * params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize));
cudaSafeCall(cudaMalloc(&d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets));
} else {
d_heap = new unsigned int[params.m_numSDFBlocks];
d_heapCounter = new unsigned int[1];
d_hash = new HashEntry[params.m_hashNumBuckets];
d_hashDecision = new int[params.m_hashNumBuckets];
d_hashDecisionPrefix = new int[params.m_hashNumBuckets];
d_hashCompactified = new HashEntry[params.m_hashNumBuckets];
d_hashCompactifiedCounter = new int[1];
d_SDFBlocks = new Voxel[params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize];
d_hashBucketMutex = new int[params.m_hashNumBuckets];
}
updateParams(params);
}
void HashData::updateParams(const HashParams& params) {
if (m_bIsOnGPU) {
updateConstantHashParams(params);
}
}
void HashData::free() {
if (m_bIsOnGPU) {
cudaSafeCall(cudaFree(d_heap));
cudaSafeCall(cudaFree(d_heapCounter));
cudaSafeCall(cudaFree(d_hash));
cudaSafeCall(cudaFree(d_hashDecision));
cudaSafeCall(cudaFree(d_hashDecisionPrefix));
cudaSafeCall(cudaFree(d_hashCompactified));
cudaSafeCall(cudaFree(d_hashCompactifiedCounter));
cudaSafeCall(cudaFree(d_SDFBlocks));
cudaSafeCall(cudaFree(d_hashBucketMutex));
} else {
if (d_heap) delete[] d_heap;
if (d_heapCounter) delete[] d_heapCounter;
if (d_hash) delete[] d_hash;
if (d_hashDecision) delete[] d_hashDecision;
if (d_hashDecisionPrefix) delete[] d_hashDecisionPrefix;
if (d_hashCompactified) delete[] d_hashCompactified;
if (d_hashCompactifiedCounter) delete[] d_hashCompactifiedCounter;
if (d_SDFBlocks) delete[] d_SDFBlocks;
if (d_hashBucketMutex) delete[] d_hashBucketMutex;
}
d_hash = NULL;
d_heap = NULL;
d_heapCounter = NULL;
d_hashDecision = NULL;
d_hashDecisionPrefix = NULL;
d_hashCompactified = NULL;
d_hashCompactifiedCounter = NULL;
d_SDFBlocks = NULL;
d_hashBucketMutex = NULL;
}
HashData HashData::download() const {
if (!m_bIsOnGPU) return *this;
HashParams params;
HashData hashData;
hashData.allocate(params, false); //allocate the data on the CPU
cudaSafeCall(cudaMemcpy(hashData.d_heap, d_heap, sizeof(unsigned int) * params.m_numSDFBlocks, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_heapCounter, d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hash, d_hash, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hashDecision, d_hashDecision, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hashDecisionPrefix, d_hashDecisionPrefix, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hashCompactified, d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hashCompactifiedCounter, d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_SDFBlocks, d_SDFBlocks, sizeof(Voxel) * params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hashBucketMutex, d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
return hashData;
}
HashData HashData::upload() const {
if (m_bIsOnGPU) return *this;
HashParams params;
HashData hashData;
hashData.allocate(params, false); //allocate the data on the CPU
cudaSafeCall(cudaMemcpy(hashData.d_heap, d_heap, sizeof(unsigned int) * params.m_numSDFBlocks, cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(hashData.d_heapCounter, d_heapCounter, sizeof(unsigned int), cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(hashData.d_hash, d_hash, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(hashData.d_hashDecision, d_hashDecision, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(hashData.d_hashDecisionPrefix, d_hashDecisionPrefix, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(hashData.d_hashCompactified, d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(hashData.d_hashCompactifiedCounter, d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(hashData.d_SDFBlocks, d_SDFBlocks, sizeof(Voxel) * params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize, cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(hashData.d_hashBucketMutex, d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets, cudaMemcpyHostToDevice));
return hashData;
}
#include <ftl/voxel_hash.hpp>
using namespace ftl::voxhash;
#define COLLISION_LIST_SIZE 5
//! returns the hash entry for a given sdf block id; if there was no hash entry the returned entry will have a ptr with FREE_ENTRY set
__device__
HashEntry HashData::getHashEntryForSDFBlockPos(const int3& sdfBlock) const {
uint h = computeHashPos(sdfBlock); //hash
int3 pos = sdfBlock;
HashEntry curr;
int i = h;
unsigned int maxIter = 0;
#pragma unroll 2
while (maxIter < COLLISION_LIST_SIZE) {
curr = d_hash[i];
if (curr.pos == pos && curr.ptr != FREE_ENTRY) return curr;
if (curr.offset == 0) break;
i += curr.offset; //go to next element in the list
i %= (params().m_hashNumBuckets); //check for overflow
++maxIter;
}
// Could not find so return dummy
curr.pos = pos;
curr.ptr = FREE_ENTRY;
return curr;
}
//for histogram (collisions traversal only)
__device__
unsigned int HashData::getNumHashLinkedList(unsigned int bucketID) {
unsigned int listLen = 0;
unsigned int i = bucketID; //start with the last entry of the current bucket
HashEntry curr; curr.offset = 0;
unsigned int maxIter = 0;
#pragma unroll 2
while (maxIter < COLLISION_LIST_SIZE) {
curr = d_hash[i];
if (curr.offset == 0) break;
i += curr.offset; //go to next element in the list
i %= (params().m_hashNumBuckets); //check for overflow
++listLen;
++maxIter;
}
return listLen;
}
//pos in SDF block coordinates
__device__
void HashData::allocBlock(const int3& pos, const uchar frame) {
uint h = computeHashPos(pos); //hash bucket
uint i = h;
HashEntry curr; curr.offset = 0;
unsigned int maxIter = 0;
#pragma unroll 2
while (maxIter < COLLISION_LIST_SIZE) {
//offset = curr.offset;
curr = d_hash[i]; //TODO MATTHIAS do by reference
if (curr.pos == pos && curr.ptr != FREE_ENTRY) return;
if (curr.offset == 0) break;
i += curr.offset; //go to next element in the list
i %= (params().m_hashNumBuckets); //check for overflow
++maxIter;
}
// Limit reached...
if (curr.offset != 0) return;
int j = i+1;
while (maxIter < COLLISION_LIST_SIZE) {
//offset = curr.offset;
curr = d_hash[j]; //TODO MATTHIAS do by reference
if (curr.ptr == FREE_ENTRY) {
int prevValue = atomicExch(&d_hashBucketMutex[i], LOCK_ENTRY);
if (prevValue != LOCK_ENTRY) {
//InterlockedExchange(g_HashBucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket where we have found a free entry
prevValue = atomicExch(&d_hashBucketMutex[j], LOCK_ENTRY);
if (prevValue != LOCK_ENTRY) { //only proceed if the bucket has been locked
HashEntry& entry = d_hash[j];
entry.pos = pos;
entry.offset = 0;
entry.flags = 0; // Flag block as valid in this frame (Nick)
entry.ptr = consumeHeap() * SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; //memory alloc
d_hash[i].offset = j-i;
//setHashEntry(g_Hash, idxLastEntryInBucket, lastEntryInBucket);
}
}
return; //bucket was already locked
}
++j;
j %= (params().m_hashNumBuckets); //check for overflow
++maxIter;
}
}
//!inserts a hash entry without allocating any memory: used by streaming: TODO MATTHIAS check the atomics in this function
/*__device__
bool HashData::insertHashEntry(HashEntry entry)
{
uint h = computeHashPos(entry.pos);
uint hp = h * HASH_BUCKET_SIZE;
for (uint j = 0; j < HASH_BUCKET_SIZE; j++) {
uint i = j + hp;
//const HashEntry& curr = d_hash[i];
int prevWeight = 0;
//InterlockedCompareExchange(hash[3*i+2], FREE_ENTRY, LOCK_ENTRY, prevWeight);
prevWeight = atomicCAS(&d_hash[i].ptr, FREE_ENTRY, LOCK_ENTRY);
if (prevWeight == FREE_ENTRY) {
d_hash[i] = entry;
//setHashEntry(hash, i, entry);
return true;
}
}
#ifdef HANDLE_COLLISIONS
//updated variables as after the loop
const uint idxLastEntryInBucket = (h+1)*HASH_BUCKET_SIZE - 1; //get last index of bucket
uint i = idxLastEntryInBucket; //start with the last entry of the current bucket
HashEntry curr;
unsigned int maxIter = 0;
//[allow_uav_condition]
uint g_MaxLoopIterCount = params().m_hashMaxCollisionLinkedListSize;
#pragma unroll 1
while (maxIter < g_MaxLoopIterCount) { //traverse list until end // why find the end? we you are inserting at the start !!!
//curr = getHashEntry(hash, i);
curr = d_hash[i]; //TODO MATTHIAS do by reference
if (curr.offset == 0) break; //we have found the end of the list
i = idxLastEntryInBucket + curr.offset; //go to next element in the list
i %= (HASH_BUCKET_SIZE * params().m_hashNumBuckets); //check for overflow
maxIter++;
}
maxIter = 0;
int offset = 0;
#pragma unroll 1
while (maxIter < g_MaxLoopIterCount) { //linear search for free entry
offset++;
uint i = (idxLastEntryInBucket + offset) % (HASH_BUCKET_SIZE * params().m_hashNumBuckets); //go to next hash element
if ((offset % HASH_BUCKET_SIZE) == 0) continue; //cannot insert into a last bucket element (would conflict with other linked lists)
int prevWeight = 0;
//InterlockedCompareExchange(hash[3*i+2], FREE_ENTRY, LOCK_ENTRY, prevWeight); //check for a free entry
uint* d_hashUI = (uint*)d_hash;
prevWeight = prevWeight = atomicCAS(&d_hashUI[3*idxLastEntryInBucket+1], (uint)FREE_ENTRY, (uint)LOCK_ENTRY);
if (prevWeight == FREE_ENTRY) { //if free entry found set prev->next = curr & curr->next = prev->next
//[allow_uav_condition]
//while(hash[3*idxLastEntryInBucket+2] == LOCK_ENTRY); // expects setHashEntry to set the ptr last, required because pos.z is packed into the same value -> prev->next = curr -> might corrput pos.z
HashEntry lastEntryInBucket = d_hash[idxLastEntryInBucket]; //get prev (= lastEntry in Bucket)
int newOffsetPrev = (offset << 16) | (lastEntryInBucket.pos.z & 0x0000ffff); //prev->next = curr (maintain old z-pos)
int oldOffsetPrev = 0;
//InterlockedExchange(hash[3*idxLastEntryInBucket+1], newOffsetPrev, oldOffsetPrev); //set prev offset atomically
uint* d_hashUI = (uint*)d_hash;
oldOffsetPrev = prevWeight = atomicExch(&d_hashUI[3*idxLastEntryInBucket+1], newOffsetPrev);
entry.offset = oldOffsetPrev >> 16; //remove prev z-pos from old offset
//setHashEntry(hash, i, entry); //sets the current hashEntry with: curr->next = prev->next
d_hash[i] = entry;
return true;
}
maxIter++;
}
#endif
return false;
}*/
//! deletes a hash entry position for a given sdfBlock index (returns true uppon successful deletion; otherwise returns false)
__device__
bool HashData::deleteHashEntryElement(const int3& sdfBlock) {
uint h = computeHashPos(sdfBlock); //hash bucket
int i = h;
int prev = -1;
HashEntry curr;
unsigned int maxIter = 0;
#pragma unroll 2
while (maxIter < COLLISION_LIST_SIZE) {
curr = d_hash[i];
//found that dude that we need/want to delete
if (curr.pos == sdfBlock && curr.ptr != FREE_ENTRY) {
//int prevValue = 0;
//InterlockedExchange(bucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket
int prevValue = atomicExch(&d_hashBucketMutex[i], LOCK_ENTRY);
if (prevValue == LOCK_ENTRY) return false;
if (prevValue != LOCK_ENTRY) {
prevValue = (prev >= 0) ? atomicExch(&d_hashBucketMutex[prev], LOCK_ENTRY) : 0;
if (prevValue == LOCK_ENTRY) return false;
if (prevValue != LOCK_ENTRY) {
const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE;
appendHeap(curr.ptr / linBlockSize);
deleteHashEntry(i);
if (prev >= 0) {
d_hash[prev].offset = curr.offset;
}
return true;
}
}
}
if (curr.offset == 0) { //we have found the end of the list
return false; //should actually never happen because we need to find that guy before
}
prev = i;
i += curr.offset; //go to next element in the list
i %= (params().m_hashNumBuckets); //check for overflow
++maxIter;
}
return false;
}
\ No newline at end of file
...@@ -263,7 +263,7 @@ unsigned int SceneRep::getHeapFreeCount() { ...@@ -263,7 +263,7 @@ unsigned int SceneRep::getHeapFreeCount() {
//! debug only! //! debug only!
void SceneRep::debugHash() { void SceneRep::debugHash() {
HashEntry* hashCPU = new HashEntry[m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets]; HashEntry* hashCPU = new HashEntry[m_hashParams.m_hashNumBuckets];
unsigned int* heapCPU = new unsigned int[m_hashParams.m_numSDFBlocks]; unsigned int* heapCPU = new unsigned int[m_hashParams.m_numSDFBlocks];
unsigned int heapCounterCPU; unsigned int heapCounterCPU;
...@@ -271,7 +271,7 @@ void SceneRep::debugHash() { ...@@ -271,7 +271,7 @@ void SceneRep::debugHash() {
heapCounterCPU++; //points to the first free entry: number of blocks is one more heapCounterCPU++; //points to the first free entry: number of blocks is one more
cudaSafeCall(cudaMemcpy(heapCPU, m_hashData.d_heap, sizeof(unsigned int)*m_hashParams.m_numSDFBlocks, cudaMemcpyDeviceToHost)); cudaSafeCall(cudaMemcpy(heapCPU, m_hashData.d_heap, sizeof(unsigned int)*m_hashParams.m_numSDFBlocks, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashCPU, m_hashData.d_hash, sizeof(HashEntry)*m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets, cudaMemcpyDeviceToHost)); cudaSafeCall(cudaMemcpy(hashCPU, m_hashData.d_hash, sizeof(HashEntry)*m_hashParams.m_hashNumBuckets, cudaMemcpyDeviceToHost));
//Check for duplicates //Check for duplicates
class myint3Voxel { class myint3Voxel {
...@@ -316,7 +316,7 @@ void SceneRep::debugHash() { ...@@ -316,7 +316,7 @@ void SceneRep::debugHash() {
std::list<myint3Voxel> l; std::list<myint3Voxel> l;
//std::vector<myint3Voxel> v; //std::vector<myint3Voxel> v;
for (unsigned int i = 0; i < m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets; i++) { for (unsigned int i = 0; i < m_hashParams.m_hashNumBuckets; i++) {
if (hashCPU[i].ptr == -1) { if (hashCPU[i].ptr == -1) {
numMinusOne++; numMinusOne++;
} }
...@@ -376,8 +376,6 @@ HashParams SceneRep::_parametersFromConfig() { ...@@ -376,8 +376,6 @@ HashParams SceneRep::_parametersFromConfig() {
params.m_rigidTransform.setIdentity(); params.m_rigidTransform.setIdentity();
params.m_rigidTransformInverse.setIdentity(); params.m_rigidTransformInverse.setIdentity();
params.m_hashNumBuckets = value("hashNumBuckets", 100000); params.m_hashNumBuckets = value("hashNumBuckets", 100000);
params.m_hashBucketSize = HASH_BUCKET_SIZE;
params.m_hashMaxCollisionLinkedListSize = value("hashMaxCollisionLinkedListSize", 7);
params.m_SDFBlockSize = SDF_BLOCK_SIZE; params.m_SDFBlockSize = SDF_BLOCK_SIZE;
params.m_numSDFBlocks = value("hashNumSDFBlocks",500000); params.m_numSDFBlocks = value("hashNumSDFBlocks",500000);
params.m_virtualVoxelSize = value("SDFVoxelSize", 0.006f); params.m_virtualVoxelSize = value("SDFVoxelSize", 0.006f);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment