From fc934762b7a1d19cf25fd2226049f43b17ced261 Mon Sep 17 00:00:00 2001 From: Nicolas Pope <nicolas.pope@utu.fi> Date: Sun, 30 Jun 2019 09:05:05 +0300 Subject: [PATCH] Resolves #63 bucket removal --- applications/reconstruct/CMakeLists.txt | 2 + .../include/ftl/cuda_operators.hpp | 5 + .../reconstruct/include/ftl/voxel_hash.hpp | 485 ++---------------- .../include/ftl/voxel_hash_params.hpp | 4 +- .../reconstruct/include/ftl/voxel_scene.hpp | 8 - applications/reconstruct/src/compactors.cu | 12 +- .../reconstruct/src/scene_rep_hash_sdf.cu | 4 +- applications/reconstruct/src/voxel_hash.cpp | 109 ++++ applications/reconstruct/src/voxel_hash.cu | 240 +++++++++ applications/reconstruct/src/voxel_scene.cpp | 8 +- 10 files changed, 401 insertions(+), 476 deletions(-) create mode 100644 applications/reconstruct/src/voxel_hash.cpp create mode 100644 applications/reconstruct/src/voxel_hash.cu diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt index 4fa1cba34..906d7ebda 100644 --- a/applications/reconstruct/CMakeLists.txt +++ b/applications/reconstruct/CMakeLists.txt @@ -11,6 +11,8 @@ set(REPSRC src/integrators.cu src/ray_cast_sdf.cu src/camera_util.cu + src/voxel_hash.cu + src/voxel_hash.cpp src/ray_cast_sdf.cpp src/registration.cpp src/virtual_source.cpp diff --git a/applications/reconstruct/include/ftl/cuda_operators.hpp b/applications/reconstruct/include/ftl/cuda_operators.hpp index eeb6f26c2..21e109b89 100644 --- a/applications/reconstruct/include/ftl/cuda_operators.hpp +++ b/applications/reconstruct/include/ftl/cuda_operators.hpp @@ -662,6 +662,11 @@ inline __host__ __device__ void operator/=(int3 &a, int s) a.x /= s; a.y /= s; a.z /= s; } +// Comparison +inline __host__ __device__ bool operator==(const int3 &a, const int3 &b) { + return a.x == b.x && a.y == b.y && a.z == b.z; +} + // clamp inline __device__ __host__ int clamp(int f, int a, int b) { diff --git a/applications/reconstruct/include/ftl/voxel_hash.hpp b/applications/reconstruct/include/ftl/voxel_hash.hpp index de8d0636f..4a8e8ee80 100644 --- a/applications/reconstruct/include/ftl/voxel_hash.hpp +++ b/applications/reconstruct/include/ftl/voxel_hash.hpp @@ -36,9 +36,7 @@ typedef signed char schar; #include <ftl/depth_camera.hpp> -#define HANDLE_COLLISIONS #define SDF_BLOCK_SIZE 8 -#define HASH_BUCKET_SIZE 10 #ifndef MINF #define MINF __int_as_float(0xff800000) @@ -109,94 +107,30 @@ struct HashData { m_bIsOnGPU = false; } - __host__ - void allocate(const HashParams& params, bool dataOnGPU = true) { - m_bIsOnGPU = dataOnGPU; - if (m_bIsOnGPU) { - cudaSafeCall(cudaMalloc(&d_heap, sizeof(unsigned int) * params.m_numSDFBlocks)); - cudaSafeCall(cudaMalloc(&d_heapCounter, sizeof(unsigned int))); - cudaSafeCall(cudaMalloc(&d_hash, sizeof(HashEntry)* params.m_hashNumBuckets * params.m_hashBucketSize)); - cudaSafeCall(cudaMalloc(&d_hashDecision, sizeof(int)* params.m_hashNumBuckets * params.m_hashBucketSize)); - cudaSafeCall(cudaMalloc(&d_hashDecisionPrefix, sizeof(int)* params.m_hashNumBuckets * params.m_hashBucketSize)); - cudaSafeCall(cudaMalloc(&d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets * params.m_hashBucketSize)); - cudaSafeCall(cudaMalloc(&d_hashCompactifiedCounter, sizeof(int))); - cudaSafeCall(cudaMalloc(&d_SDFBlocks, sizeof(Voxel) * params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize)); - cudaSafeCall(cudaMalloc(&d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets)); - } else { - d_heap = new unsigned int[params.m_numSDFBlocks]; - d_heapCounter = new unsigned int[1]; - d_hash = new HashEntry[params.m_hashNumBuckets * params.m_hashBucketSize]; - d_hashDecision = new int[params.m_hashNumBuckets * params.m_hashBucketSize]; - d_hashDecisionPrefix = new int[params.m_hashNumBuckets * params.m_hashBucketSize]; - d_hashCompactified = new HashEntry[params.m_hashNumBuckets * params.m_hashBucketSize]; - d_hashCompactifiedCounter = new int[1]; - d_SDFBlocks = new Voxel[params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize]; - d_hashBucketMutex = new int[params.m_hashNumBuckets]; - } + /** + * Create all the data structures, either on GPU or system memory. + */ + __host__ void allocate(const HashParams& params, bool dataOnGPU = true); - updateParams(params); - } + __host__ void updateParams(const HashParams& params); - __host__ - void updateParams(const HashParams& params) { - if (m_bIsOnGPU) { - updateConstantHashParams(params); - } - } + __host__ void free(); - __host__ - void free() { - if (m_bIsOnGPU) { - cudaSafeCall(cudaFree(d_heap)); - cudaSafeCall(cudaFree(d_heapCounter)); - cudaSafeCall(cudaFree(d_hash)); - cudaSafeCall(cudaFree(d_hashDecision)); - cudaSafeCall(cudaFree(d_hashDecisionPrefix)); - cudaSafeCall(cudaFree(d_hashCompactified)); - cudaSafeCall(cudaFree(d_hashCompactifiedCounter)); - cudaSafeCall(cudaFree(d_SDFBlocks)); - cudaSafeCall(cudaFree(d_hashBucketMutex)); - } else { - if (d_heap) delete[] d_heap; - if (d_heapCounter) delete[] d_heapCounter; - if (d_hash) delete[] d_hash; - if (d_hashDecision) delete[] d_hashDecision; - if (d_hashDecisionPrefix) delete[] d_hashDecisionPrefix; - if (d_hashCompactified) delete[] d_hashCompactified; - if (d_hashCompactifiedCounter) delete[] d_hashCompactifiedCounter; - if (d_SDFBlocks) delete[] d_SDFBlocks; - if (d_hashBucketMutex) delete[] d_hashBucketMutex; - } + /** + * Download entire hash table from GPU to CPU memory. + */ + __host__ HashData download() const; - d_hash = NULL; - d_heap = NULL; - d_heapCounter = NULL; - d_hashDecision = NULL; - d_hashDecisionPrefix = NULL; - d_hashCompactified = NULL; - d_hashCompactifiedCounter = NULL; - d_SDFBlocks = NULL; - d_hashBucketMutex = NULL; - } + /** + * Upload entire hash table from CPU to GPU memory. + */ + __host__ HashData upload() const; - __host__ - HashData copyToCPU() const { - HashParams params; - - HashData hashData; - hashData.allocate(params, false); //allocate the data on the CPU - cudaSafeCall(cudaMemcpy(hashData.d_heap, d_heap, sizeof(unsigned int) * params.m_numSDFBlocks, cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_heapCounter, d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_hash, d_hash, sizeof(HashEntry)* params.m_hashNumBuckets * params.m_hashBucketSize, cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_hashDecision, d_hashDecision, sizeof(int)*params.m_hashNumBuckets * params.m_hashBucketSize, cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_hashDecisionPrefix, d_hashDecisionPrefix, sizeof(int)*params.m_hashNumBuckets * params.m_hashBucketSize, cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_hashCompactified, d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets * params.m_hashBucketSize, cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_hashCompactifiedCounter, d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_SDFBlocks, d_SDFBlocks, sizeof(Voxel) * params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize, cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_hashBucketMutex, d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost)); - - return hashData; //TODO MATTHIAS look at this (i.e,. when does memory get destroyed ; if it's in the destructer it would kill everything here - } + __host__ size_t getAllocatedBlocks() const; + + __host__ size_t getFreeBlocks() const; + + __host__ size_t getCollisionCount() const; @@ -218,8 +152,8 @@ struct HashData { const int p1 = 19349669; const int p2 = 83492791; - int res = ((virtualVoxelPos.x * p0) ^ (virtualVoxelPos.y * p1) ^ (virtualVoxelPos.z * p2)) % c_hashParams.m_hashNumBuckets; - if (res < 0) res += c_hashParams.m_hashNumBuckets; + int res = ((virtualVoxelPos.x * p0) ^ (virtualVoxelPos.y * p1) ^ (virtualVoxelPos.z * p2)) % params().m_hashNumBuckets; + if (res < 0) res += params().m_hashNumBuckets; return (uint)res; } @@ -261,26 +195,26 @@ struct HashData { out.color.z = (v0.weight > 0) ? (uchar)(c0.z * factor0 + c1.z * factor1) : c1.z;*/ out.sdf = (v0.sdf * (float)v0.weight + v1.sdf * (float)v1.weight) / ((float)v0.weight + (float)v1.weight); - out.weight = min(c_hashParams.m_integrationWeightMax, (unsigned int)v0.weight + (unsigned int)v1.weight); + out.weight = min(params().m_integrationWeightMax, (unsigned int)v0.weight + (unsigned int)v1.weight); } //! returns the truncation of the SDF for a given distance value __device__ float getTruncation(float z) const { - return c_hashParams.m_truncation + c_hashParams.m_truncScale * z; + return params().m_truncation + params().m_truncScale * z; } __device__ float3 worldToVirtualVoxelPosFloat(const float3& pos) const { - return pos / c_hashParams.m_virtualVoxelSize; + return pos / params().m_virtualVoxelSize; } __device__ int3 worldToVirtualVoxelPos(const float3& pos) const { //const float3 p = pos*g_VirtualVoxelResolutionScalar; - const float3 p = pos / c_hashParams.m_virtualVoxelSize; + const float3 p = pos / params().m_virtualVoxelSize; return make_int3(p+make_float3(sign(p))*0.5f); } @@ -304,7 +238,7 @@ struct HashData { __device__ float3 virtualVoxelPosToWorld(const int3& pos) const { - return make_float3(pos)*c_hashParams.m_virtualVoxelSize; + return make_float3(pos)*params().m_virtualVoxelSize; } __device__ @@ -438,98 +372,15 @@ struct HashData { //! returns the hash entry for a given sdf block id; if there was no hash entry the returned entry will have a ptr with FREE_ENTRY set __device__ - HashEntry getHashEntryForSDFBlockPos(const int3& sdfBlock) const - { - uint h = computeHashPos(sdfBlock); //hash bucket - uint hp = h * HASH_BUCKET_SIZE; //hash position - - HashEntry entry; - entry.pos = sdfBlock; - entry.offset = 0; - entry.ptr = FREE_ENTRY; - - for (uint j = 0; j < HASH_BUCKET_SIZE; j++) { - uint i = j + hp; - HashEntry curr = d_hash[i]; - if (curr.pos.x == entry.pos.x && curr.pos.y == entry.pos.y && curr.pos.z == entry.pos.z && curr.ptr != FREE_ENTRY) { - return curr; - } - } - -#ifdef HANDLE_COLLISIONS - const uint idxLastEntryInBucket = (h+1)*HASH_BUCKET_SIZE - 1; - int i = idxLastEntryInBucket; //start with the last entry of the current bucket - HashEntry curr; - //traverse list until end: memorize idx at list end and memorize offset from last element of bucket to list end - - unsigned int maxIter = 0; - uint g_MaxLoopIterCount = c_hashParams.m_hashMaxCollisionLinkedListSize; - #pragma unroll 1 - while (maxIter < g_MaxLoopIterCount) { - curr = d_hash[i]; - - if (curr.pos.x == entry.pos.x && curr.pos.y == entry.pos.y && curr.pos.z == entry.pos.z && curr.ptr != FREE_ENTRY) { - return curr; - } - - if (curr.offset == 0) { //we have found the end of the list - break; - } - i = idxLastEntryInBucket + curr.offset; //go to next element in the list - i %= (HASH_BUCKET_SIZE * c_hashParams.m_hashNumBuckets); //check for overflow - - maxIter++; - } -#endif - return entry; - } + HashEntry getHashEntryForSDFBlockPos(const int3& sdfBlock) const; //for histogram (no collision traversal) __device__ - unsigned int getNumHashEntriesPerBucket(unsigned int bucketID) { - unsigned int h = 0; - for (uint i = 0; i < HASH_BUCKET_SIZE; i++) { - if (d_hash[bucketID*HASH_BUCKET_SIZE+i].ptr != FREE_ENTRY) { - h++; - } - } - return h; - } + unsigned int getNumHashEntriesPerBucket(unsigned int bucketID); //for histogram (collisions traversal only) __device__ - unsigned int getNumHashLinkedList(unsigned int bucketID) { - unsigned int listLen = 0; - -#ifdef HANDLE_COLLISIONS - const uint idxLastEntryInBucket = (bucketID+1)*HASH_BUCKET_SIZE - 1; - unsigned int i = idxLastEntryInBucket; //start with the last entry of the current bucket - //int offset = 0; - HashEntry curr; curr.offset = 0; - //traverse list until end: memorize idx at list end and memorize offset from last element of bucket to list end - - unsigned int maxIter = 0; - uint g_MaxLoopIterCount = c_hashParams.m_hashMaxCollisionLinkedListSize; - #pragma unroll 1 - while (maxIter < g_MaxLoopIterCount) { - //offset = curr.offset; - //curr = getHashEntry(g_Hash, i); - curr = d_hash[i]; - - if (curr.offset == 0) { //we have found the end of the list - break; - } - i = idxLastEntryInBucket + curr.offset; //go to next element in the list - i %= (HASH_BUCKET_SIZE * c_hashParams.m_hashNumBuckets); //check for overflow - listLen++; - - maxIter++; - } -#endif - - return listLen; - } - + unsigned int getNumHashLinkedList(unsigned int bucketID); __device__ @@ -547,287 +398,15 @@ struct HashData { //pos in SDF block coordinates __device__ - void allocBlock(const int3& pos, const uchar frame) { - - - uint h = computeHashPos(pos); //hash bucket - uint hp = h * HASH_BUCKET_SIZE; //hash position - - int firstEmpty = -1; - for (uint j = 0; j < HASH_BUCKET_SIZE; j++) { - uint i = j + hp; - HashEntry& curr = d_hash[i]; - - //in that case the SDF-block is already allocated and corresponds to the current position -> exit thread - if (curr.pos.x == pos.x && curr.pos.y == pos.y && curr.pos.z == pos.z && curr.ptr != FREE_ENTRY) { - //curr.flags = frame; // Flag block as valid in this frame (Nick) - return; - } - - //store the first FREE_ENTRY hash entry - if (firstEmpty == -1 && curr.ptr == FREE_ENTRY) { - firstEmpty = i; - } - } - - -#ifdef HANDLE_COLLISIONS - //updated variables as after the loop - const uint idxLastEntryInBucket = (h+1)*HASH_BUCKET_SIZE - 1; //get last index of bucket - uint i = idxLastEntryInBucket; //start with the last entry of the current bucket - //int offset = 0; - HashEntry curr; curr.offset = 0; - //traverse list until end: memorize idx at list end and memorize offset from last element of bucket to list end - //int k = 0; - - unsigned int maxIter = 0; - uint g_MaxLoopIterCount = c_hashParams.m_hashMaxCollisionLinkedListSize; - #pragma unroll 1 - while (maxIter < g_MaxLoopIterCount) { - //offset = curr.offset; - curr = d_hash[i]; //TODO MATTHIAS do by reference - if (curr.pos.x == pos.x && curr.pos.y == pos.y && curr.pos.z == pos.z && curr.ptr != FREE_ENTRY) { - //curr.flags = frame; // Flag block as valid in this frame (Nick) - return; - } - if (curr.offset == 0) { //we have found the end of the list - break; - } - i = idxLastEntryInBucket + curr.offset; //go to next element in the list - i %= (HASH_BUCKET_SIZE * c_hashParams.m_hashNumBuckets); //check for overflow - - maxIter++; - } -#endif - - if (firstEmpty != -1) { //if there is an empty entry and we haven't allocated the current entry before - //int prevValue = 0; - //InterlockedExchange(d_hashBucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket - int prevValue = atomicExch(&d_hashBucketMutex[h], LOCK_ENTRY); - if (prevValue != LOCK_ENTRY) { //only proceed if the bucket has been locked - HashEntry& entry = d_hash[firstEmpty]; - entry.pos = pos; - entry.offset = NO_OFFSET; - entry.flags = 0; // Flag block as valid in this frame (Nick) - entry.ptr = consumeHeap() * SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; //memory alloc - } - return; - } - -#ifdef HANDLE_COLLISIONS - //if (i != idxLastEntryInBucket) return; - int offset = 0; - //linear search for free entry - - maxIter = 0; - #pragma unroll 1 - while (maxIter < g_MaxLoopIterCount) { - offset++; - i = (idxLastEntryInBucket + offset) % (HASH_BUCKET_SIZE * c_hashParams.m_hashNumBuckets); //go to next hash element - if ((offset % HASH_BUCKET_SIZE) == 0) continue; //cannot insert into a last bucket element (would conflict with other linked lists) - curr = d_hash[i]; - //if (curr.pos.x == pos.x && curr.pos.y == pos.y && curr.pos.z == pos.z && curr.ptr != FREE_ENTRY) { - // return; - //} - if (curr.ptr == FREE_ENTRY) { //this is the first free entry - //int prevValue = 0; - //InterlockedExchange(g_HashBucketMutex[h], LOCK_ENTRY, prevValue); //lock the original hash bucket - int prevValue = atomicExch(&d_hashBucketMutex[h], LOCK_ENTRY); - if (prevValue != LOCK_ENTRY) { - HashEntry lastEntryInBucket = d_hash[idxLastEntryInBucket]; - h = i / HASH_BUCKET_SIZE; - //InterlockedExchange(g_HashBucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket where we have found a free entry - prevValue = atomicExch(&d_hashBucketMutex[h], LOCK_ENTRY); - if (prevValue != LOCK_ENTRY) { //only proceed if the bucket has been locked - HashEntry& entry = d_hash[i]; - entry.pos = pos; - entry.offset = lastEntryInBucket.offset; - entry.flags = 0; // Flag block as valid in this frame (Nick) - entry.ptr = consumeHeap() * SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; //memory alloc - - lastEntryInBucket.offset = offset; - d_hash[idxLastEntryInBucket] = lastEntryInBucket; - //setHashEntry(g_Hash, idxLastEntryInBucket, lastEntryInBucket); - } - } - return; //bucket was already locked - } - - maxIter++; - } -#endif - } + void allocBlock(const int3& pos, const uchar frame); - //!inserts a hash entry without allocating any memory: used by streaming: TODO MATTHIAS check the atomics in this function __device__ - bool insertHashEntry(HashEntry entry) - { - uint h = computeHashPos(entry.pos); - uint hp = h * HASH_BUCKET_SIZE; - - for (uint j = 0; j < HASH_BUCKET_SIZE; j++) { - uint i = j + hp; - //const HashEntry& curr = d_hash[i]; - int prevWeight = 0; - //InterlockedCompareExchange(hash[3*i+2], FREE_ENTRY, LOCK_ENTRY, prevWeight); - prevWeight = atomicCAS(&d_hash[i].ptr, FREE_ENTRY, LOCK_ENTRY); - if (prevWeight == FREE_ENTRY) { - d_hash[i] = entry; - //setHashEntry(hash, i, entry); - return true; - } - } - -#ifdef HANDLE_COLLISIONS - //updated variables as after the loop - const uint idxLastEntryInBucket = (h+1)*HASH_BUCKET_SIZE - 1; //get last index of bucket - - uint i = idxLastEntryInBucket; //start with the last entry of the current bucket - HashEntry curr; - - unsigned int maxIter = 0; - //[allow_uav_condition] - uint g_MaxLoopIterCount = c_hashParams.m_hashMaxCollisionLinkedListSize; - #pragma unroll 1 - while (maxIter < g_MaxLoopIterCount) { //traverse list until end // why find the end? we you are inserting at the start !!! - //curr = getHashEntry(hash, i); - curr = d_hash[i]; //TODO MATTHIAS do by reference - if (curr.offset == 0) break; //we have found the end of the list - i = idxLastEntryInBucket + curr.offset; //go to next element in the list - i %= (HASH_BUCKET_SIZE * c_hashParams.m_hashNumBuckets); //check for overflow - - maxIter++; - } - - maxIter = 0; - int offset = 0; - #pragma unroll 1 - while (maxIter < g_MaxLoopIterCount) { //linear search for free entry - offset++; - uint i = (idxLastEntryInBucket + offset) % (HASH_BUCKET_SIZE * c_hashParams.m_hashNumBuckets); //go to next hash element - if ((offset % HASH_BUCKET_SIZE) == 0) continue; //cannot insert into a last bucket element (would conflict with other linked lists) - - int prevWeight = 0; - //InterlockedCompareExchange(hash[3*i+2], FREE_ENTRY, LOCK_ENTRY, prevWeight); //check for a free entry - uint* d_hashUI = (uint*)d_hash; - prevWeight = prevWeight = atomicCAS(&d_hashUI[3*idxLastEntryInBucket+1], (uint)FREE_ENTRY, (uint)LOCK_ENTRY); - if (prevWeight == FREE_ENTRY) { //if free entry found set prev->next = curr & curr->next = prev->next - //[allow_uav_condition] - //while(hash[3*idxLastEntryInBucket+2] == LOCK_ENTRY); // expects setHashEntry to set the ptr last, required because pos.z is packed into the same value -> prev->next = curr -> might corrput pos.z - - HashEntry lastEntryInBucket = d_hash[idxLastEntryInBucket]; //get prev (= lastEntry in Bucket) - - int newOffsetPrev = (offset << 16) | (lastEntryInBucket.pos.z & 0x0000ffff); //prev->next = curr (maintain old z-pos) - int oldOffsetPrev = 0; - //InterlockedExchange(hash[3*idxLastEntryInBucket+1], newOffsetPrev, oldOffsetPrev); //set prev offset atomically - uint* d_hashUI = (uint*)d_hash; - oldOffsetPrev = prevWeight = atomicExch(&d_hashUI[3*idxLastEntryInBucket+1], newOffsetPrev); - entry.offset = oldOffsetPrev >> 16; //remove prev z-pos from old offset - - //setHashEntry(hash, i, entry); //sets the current hashEntry with: curr->next = prev->next - d_hash[i] = entry; - return true; - } - - maxIter++; - } -#endif - - return false; - } - - + bool insertHashEntry(HashEntry entry); //! deletes a hash entry position for a given sdfBlock index (returns true uppon successful deletion; otherwise returns false) __device__ - bool deleteHashEntryElement(const int3& sdfBlock) { - uint h = computeHashPos(sdfBlock); //hash bucket - uint hp = h * HASH_BUCKET_SIZE; //hash position - - for (uint j = 0; j < HASH_BUCKET_SIZE; j++) { - uint i = j + hp; - const HashEntry& curr = d_hash[i]; - if (curr.pos.x == sdfBlock.x && curr.pos.y == sdfBlock.y && curr.pos.z == sdfBlock.z && curr.ptr != FREE_ENTRY) { -#ifndef HANDLE_COLLISIONS - const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE; - appendHeap(curr.ptr / linBlockSize); - //heapAppend.Append(curr.ptr / linBlockSize); - deleteHashEntry(i); - return true; -#endif -#ifdef HANDLE_COLLISIONS - if (curr.offset != 0) { //if there was a pointer set it to the next list element - //int prevValue = 0; - //InterlockedExchange(bucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket - int prevValue = atomicExch(&d_hashBucketMutex[h], LOCK_ENTRY); - if (prevValue == LOCK_ENTRY) return false; - if (prevValue != LOCK_ENTRY) { - const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE; - appendHeap(curr.ptr / linBlockSize); - //heapAppend.Append(curr.ptr / linBlockSize); - int nextIdx = (i + curr.offset) % (HASH_BUCKET_SIZE*c_hashParams.m_hashNumBuckets); - //setHashEntry(hash, i, getHashEntry(hash, nextIdx)); - d_hash[i] = d_hash[nextIdx]; - deleteHashEntry(nextIdx); - return true; - } - } else { - const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE; - appendHeap(curr.ptr / linBlockSize); - //heapAppend.Append(curr.ptr / linBlockSize); - deleteHashEntry(i); - return true; - } -#endif //HANDLE_COLLSISION - } - } -#ifdef HANDLE_COLLISIONS - const uint idxLastEntryInBucket = (h+1)*HASH_BUCKET_SIZE - 1; - int i = idxLastEntryInBucket; - HashEntry curr; - curr = d_hash[i]; - int prevIdx = i; - i = idxLastEntryInBucket + curr.offset; //go to next element in the list - i %= (HASH_BUCKET_SIZE * c_hashParams.m_hashNumBuckets); //check for overflow - - unsigned int maxIter = 0; - uint g_MaxLoopIterCount = c_hashParams.m_hashMaxCollisionLinkedListSize; - - #pragma unroll 1 - while (maxIter < g_MaxLoopIterCount) { - curr = d_hash[i]; - //found that dude that we need/want to delete - if (curr.pos.x == sdfBlock.x && curr.pos.y == sdfBlock.y && curr.pos.z == sdfBlock.z && curr.ptr != FREE_ENTRY) { - //int prevValue = 0; - //InterlockedExchange(bucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket - int prevValue = atomicExch(&d_hashBucketMutex[h], LOCK_ENTRY); - if (prevValue == LOCK_ENTRY) return false; - if (prevValue != LOCK_ENTRY) { - const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE; - appendHeap(curr.ptr / linBlockSize); - //heapAppend.Append(curr.ptr / linBlockSize); - deleteHashEntry(i); - HashEntry prev = d_hash[prevIdx]; - prev.offset = curr.offset; - //setHashEntry(hash, prevIdx, prev); - d_hash[prevIdx] = prev; - return true; - } - } - - if (curr.offset == 0) { //we have found the end of the list - return false; //should actually never happen because we need to find that guy before - } - prevIdx = i; - i = idxLastEntryInBucket + curr.offset; //go to next element in the list - i %= (HASH_BUCKET_SIZE * c_hashParams.m_hashNumBuckets); //check for overflow - - maxIter++; - } -#endif // HANDLE_COLLSISION - return false; - } + bool deleteHashEntryElement(const int3& sdfBlock); #endif //CUDACC diff --git a/applications/reconstruct/include/ftl/voxel_hash_params.hpp b/applications/reconstruct/include/ftl/voxel_hash_params.hpp index 821631017..cb94404c3 100644 --- a/applications/reconstruct/include/ftl/voxel_hash_params.hpp +++ b/applications/reconstruct/include/ftl/voxel_hash_params.hpp @@ -21,8 +21,8 @@ struct __align__(16) HashParams { float4x4 m_rigidTransformInverse; unsigned int m_hashNumBuckets; - unsigned int m_hashBucketSize; - unsigned int m_hashMaxCollisionLinkedListSize; + unsigned int m_deprecated1; + unsigned int m_deprecated2; //m_hashMaxCollisionLinkedListSize; unsigned int m_numSDFBlocks; int m_SDFBlockSize; diff --git a/applications/reconstruct/include/ftl/voxel_scene.hpp b/applications/reconstruct/include/ftl/voxel_scene.hpp index 487cf4b95..e75a1b724 100644 --- a/applications/reconstruct/include/ftl/voxel_scene.hpp +++ b/applications/reconstruct/include/ftl/voxel_scene.hpp @@ -47,16 +47,8 @@ class SceneRep : public ftl::Configurable { // Mark voxels as surfaces // void isosurface(); - /** - * Note: lastRigidTransform appears to be the estimated camera pose. - * Note: bitMask can be nullptr if not streaming out voxels from GPU - */ - //void integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask); - void setLastRigidTransform(const Eigen::Matrix4f& lastRigidTransform); - //void setLastRigidTransformAndCompactify(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData); - const Eigen::Matrix4f getLastRigidTransform() const; diff --git a/applications/reconstruct/src/compactors.cu b/applications/reconstruct/src/compactors.cu index b1eb1eab7..373a71fc2 100644 --- a/applications/reconstruct/src/compactors.cu +++ b/applications/reconstruct/src/compactors.cu @@ -68,7 +68,7 @@ __global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams //const HashParams& hashParams = c_hashParams; const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; #ifdef COMPACTIFY_HASH_SIMPLE - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + if (idx < hashParams.m_hashNumBuckets) { if (hashData.d_hash[idx].ptr != FREE_ENTRY) { if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, camera, hashData.d_hash[idx].pos)) { @@ -83,7 +83,7 @@ __global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams __syncthreads(); int addrLocal = -1; - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + if (idx < hashParams.m_hashNumBuckets) { if (hashData.d_hash[idx].ptr != FREE_ENTRY) { if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, camera, hashData.d_hash[idx].pos)) { @@ -109,7 +109,7 @@ __global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams void ftl::cuda::compactifyVisible(HashData& hashData, const HashParams& hashParams, const DepthCameraParams &camera, cudaStream_t stream) { const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; - const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); + const dim3 gridSize((hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); const dim3 blockSize(threadsPerBlock, 1); cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int),stream)); @@ -129,7 +129,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData) const HashParams& hashParams = c_hashParams; const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; #ifdef COMPACTIFY_HASH_SIMPLE - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + if (idx < hashParams.m_hashNumBuckets) { if (hashData.d_hash[idx].ptr != FREE_ENTRY) { int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1); hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; @@ -141,7 +141,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData) __syncthreads(); int addrLocal = -1; - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + if (idx < hashParams.m_hashNumBuckets) { if (hashData.d_hash[idx].ptr != FREE_ENTRY) { addrLocal = atomicAdd(&localCounter, 1); } @@ -164,7 +164,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData) void ftl::cuda::compactifyAllocated(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; - const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); + const dim3 gridSize((hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); const dim3 blockSize(threadsPerBlock, 1); cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int), stream)); diff --git a/applications/reconstruct/src/scene_rep_hash_sdf.cu b/applications/reconstruct/src/scene_rep_hash_sdf.cu index 152722a34..4f57bf370 100644 --- a/applications/reconstruct/src/scene_rep_hash_sdf.cu +++ b/applications/reconstruct/src/scene_rep_hash_sdf.cu @@ -98,7 +98,7 @@ __global__ void resetHashKernel(HashData hashData) { const HashParams& hashParams = c_hashParams; const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + if (idx < hashParams.m_hashNumBuckets) { hashData.deleteHashEntry(hashData.d_hash[idx]); hashData.deleteHashEntry(hashData.d_hashCompactified[idx]); } @@ -133,7 +133,7 @@ extern "C" void resetCUDA(HashData& hashData, const HashParams& hashParams) { //resetting the hash - const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); + const dim3 gridSize((hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); resetHashKernel<<<gridSize, blockSize>>>(hashData); diff --git a/applications/reconstruct/src/voxel_hash.cpp b/applications/reconstruct/src/voxel_hash.cpp new file mode 100644 index 000000000..f5e6b94b6 --- /dev/null +++ b/applications/reconstruct/src/voxel_hash.cpp @@ -0,0 +1,109 @@ +#include <ftl/voxel_hash.hpp> + +using ftl::voxhash::HashData; +using ftl::voxhash::HashParams; + +void HashData::allocate(const HashParams& params, bool dataOnGPU) { + m_bIsOnGPU = dataOnGPU; + if (m_bIsOnGPU) { + cudaSafeCall(cudaMalloc(&d_heap, sizeof(unsigned int) * params.m_numSDFBlocks)); + cudaSafeCall(cudaMalloc(&d_heapCounter, sizeof(unsigned int))); + cudaSafeCall(cudaMalloc(&d_hash, sizeof(HashEntry)* params.m_hashNumBuckets)); + cudaSafeCall(cudaMalloc(&d_hashDecision, sizeof(int)* params.m_hashNumBuckets)); + cudaSafeCall(cudaMalloc(&d_hashDecisionPrefix, sizeof(int)* params.m_hashNumBuckets)); + cudaSafeCall(cudaMalloc(&d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets)); + cudaSafeCall(cudaMalloc(&d_hashCompactifiedCounter, sizeof(int))); + cudaSafeCall(cudaMalloc(&d_SDFBlocks, sizeof(Voxel) * params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize)); + cudaSafeCall(cudaMalloc(&d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets)); + } else { + d_heap = new unsigned int[params.m_numSDFBlocks]; + d_heapCounter = new unsigned int[1]; + d_hash = new HashEntry[params.m_hashNumBuckets]; + d_hashDecision = new int[params.m_hashNumBuckets]; + d_hashDecisionPrefix = new int[params.m_hashNumBuckets]; + d_hashCompactified = new HashEntry[params.m_hashNumBuckets]; + d_hashCompactifiedCounter = new int[1]; + d_SDFBlocks = new Voxel[params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize]; + d_hashBucketMutex = new int[params.m_hashNumBuckets]; + } + + updateParams(params); +} + +void HashData::updateParams(const HashParams& params) { + if (m_bIsOnGPU) { + updateConstantHashParams(params); + } +} + +void HashData::free() { + if (m_bIsOnGPU) { + cudaSafeCall(cudaFree(d_heap)); + cudaSafeCall(cudaFree(d_heapCounter)); + cudaSafeCall(cudaFree(d_hash)); + cudaSafeCall(cudaFree(d_hashDecision)); + cudaSafeCall(cudaFree(d_hashDecisionPrefix)); + cudaSafeCall(cudaFree(d_hashCompactified)); + cudaSafeCall(cudaFree(d_hashCompactifiedCounter)); + cudaSafeCall(cudaFree(d_SDFBlocks)); + cudaSafeCall(cudaFree(d_hashBucketMutex)); + } else { + if (d_heap) delete[] d_heap; + if (d_heapCounter) delete[] d_heapCounter; + if (d_hash) delete[] d_hash; + if (d_hashDecision) delete[] d_hashDecision; + if (d_hashDecisionPrefix) delete[] d_hashDecisionPrefix; + if (d_hashCompactified) delete[] d_hashCompactified; + if (d_hashCompactifiedCounter) delete[] d_hashCompactifiedCounter; + if (d_SDFBlocks) delete[] d_SDFBlocks; + if (d_hashBucketMutex) delete[] d_hashBucketMutex; + } + + d_hash = NULL; + d_heap = NULL; + d_heapCounter = NULL; + d_hashDecision = NULL; + d_hashDecisionPrefix = NULL; + d_hashCompactified = NULL; + d_hashCompactifiedCounter = NULL; + d_SDFBlocks = NULL; + d_hashBucketMutex = NULL; +} + +HashData HashData::download() const { + if (!m_bIsOnGPU) return *this; + HashParams params; + + HashData hashData; + hashData.allocate(params, false); //allocate the data on the CPU + cudaSafeCall(cudaMemcpy(hashData.d_heap, d_heap, sizeof(unsigned int) * params.m_numSDFBlocks, cudaMemcpyDeviceToHost)); + cudaSafeCall(cudaMemcpy(hashData.d_heapCounter, d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); + cudaSafeCall(cudaMemcpy(hashData.d_hash, d_hash, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost)); + cudaSafeCall(cudaMemcpy(hashData.d_hashDecision, d_hashDecision, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyDeviceToHost)); + cudaSafeCall(cudaMemcpy(hashData.d_hashDecisionPrefix, d_hashDecisionPrefix, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyDeviceToHost)); + cudaSafeCall(cudaMemcpy(hashData.d_hashCompactified, d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost)); + cudaSafeCall(cudaMemcpy(hashData.d_hashCompactifiedCounter, d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); + cudaSafeCall(cudaMemcpy(hashData.d_SDFBlocks, d_SDFBlocks, sizeof(Voxel) * params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize, cudaMemcpyDeviceToHost)); + cudaSafeCall(cudaMemcpy(hashData.d_hashBucketMutex, d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost)); + + return hashData; +} + +HashData HashData::upload() const { + if (m_bIsOnGPU) return *this; + HashParams params; + + HashData hashData; + hashData.allocate(params, false); //allocate the data on the CPU + cudaSafeCall(cudaMemcpy(hashData.d_heap, d_heap, sizeof(unsigned int) * params.m_numSDFBlocks, cudaMemcpyHostToDevice)); + cudaSafeCall(cudaMemcpy(hashData.d_heapCounter, d_heapCounter, sizeof(unsigned int), cudaMemcpyHostToDevice)); + cudaSafeCall(cudaMemcpy(hashData.d_hash, d_hash, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyHostToDevice)); + cudaSafeCall(cudaMemcpy(hashData.d_hashDecision, d_hashDecision, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyHostToDevice)); + cudaSafeCall(cudaMemcpy(hashData.d_hashDecisionPrefix, d_hashDecisionPrefix, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyHostToDevice)); + cudaSafeCall(cudaMemcpy(hashData.d_hashCompactified, d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyHostToDevice)); + cudaSafeCall(cudaMemcpy(hashData.d_hashCompactifiedCounter, d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyHostToDevice)); + cudaSafeCall(cudaMemcpy(hashData.d_SDFBlocks, d_SDFBlocks, sizeof(Voxel) * params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize, cudaMemcpyHostToDevice)); + cudaSafeCall(cudaMemcpy(hashData.d_hashBucketMutex, d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets, cudaMemcpyHostToDevice)); + + return hashData; +} diff --git a/applications/reconstruct/src/voxel_hash.cu b/applications/reconstruct/src/voxel_hash.cu new file mode 100644 index 000000000..7447a1c54 --- /dev/null +++ b/applications/reconstruct/src/voxel_hash.cu @@ -0,0 +1,240 @@ +#include <ftl/voxel_hash.hpp> + +using namespace ftl::voxhash; + +#define COLLISION_LIST_SIZE 5 + +//! returns the hash entry for a given sdf block id; if there was no hash entry the returned entry will have a ptr with FREE_ENTRY set +__device__ +HashEntry HashData::getHashEntryForSDFBlockPos(const int3& sdfBlock) const { + uint h = computeHashPos(sdfBlock); //hash + int3 pos = sdfBlock; + + HashEntry curr; + + int i = h; + unsigned int maxIter = 0; + + #pragma unroll 2 + while (maxIter < COLLISION_LIST_SIZE) { + curr = d_hash[i]; + + if (curr.pos == pos && curr.ptr != FREE_ENTRY) return curr; + if (curr.offset == 0) break; + + i += curr.offset; //go to next element in the list + i %= (params().m_hashNumBuckets); //check for overflow + ++maxIter; + } + + // Could not find so return dummy + curr.pos = pos; + curr.ptr = FREE_ENTRY; + return curr; +} + +//for histogram (collisions traversal only) +__device__ +unsigned int HashData::getNumHashLinkedList(unsigned int bucketID) { + unsigned int listLen = 0; + + unsigned int i = bucketID; //start with the last entry of the current bucket + HashEntry curr; curr.offset = 0; + + unsigned int maxIter = 0; + + #pragma unroll 2 + while (maxIter < COLLISION_LIST_SIZE) { + curr = d_hash[i]; + + if (curr.offset == 0) break; + + i += curr.offset; //go to next element in the list + i %= (params().m_hashNumBuckets); //check for overflow + ++listLen; + ++maxIter; + } + + return listLen; +} + +//pos in SDF block coordinates +__device__ +void HashData::allocBlock(const int3& pos, const uchar frame) { + uint h = computeHashPos(pos); //hash bucket + uint i = h; + HashEntry curr; curr.offset = 0; + + unsigned int maxIter = 0; + #pragma unroll 2 + while (maxIter < COLLISION_LIST_SIZE) { + //offset = curr.offset; + curr = d_hash[i]; //TODO MATTHIAS do by reference + if (curr.pos == pos && curr.ptr != FREE_ENTRY) return; + if (curr.offset == 0) break; + + i += curr.offset; //go to next element in the list + i %= (params().m_hashNumBuckets); //check for overflow + ++maxIter; + } + + // Limit reached... + if (curr.offset != 0) return; + + int j = i+1; + while (maxIter < COLLISION_LIST_SIZE) { + //offset = curr.offset; + curr = d_hash[j]; //TODO MATTHIAS do by reference + if (curr.ptr == FREE_ENTRY) { + int prevValue = atomicExch(&d_hashBucketMutex[i], LOCK_ENTRY); + if (prevValue != LOCK_ENTRY) { + //InterlockedExchange(g_HashBucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket where we have found a free entry + prevValue = atomicExch(&d_hashBucketMutex[j], LOCK_ENTRY); + if (prevValue != LOCK_ENTRY) { //only proceed if the bucket has been locked + HashEntry& entry = d_hash[j]; + entry.pos = pos; + entry.offset = 0; + entry.flags = 0; // Flag block as valid in this frame (Nick) + entry.ptr = consumeHeap() * SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; //memory alloc + d_hash[i].offset = j-i; + //setHashEntry(g_Hash, idxLastEntryInBucket, lastEntryInBucket); + } + } + return; //bucket was already locked + } + + ++j; + j %= (params().m_hashNumBuckets); //check for overflow + ++maxIter; + } +} + + +//!inserts a hash entry without allocating any memory: used by streaming: TODO MATTHIAS check the atomics in this function +/*__device__ +bool HashData::insertHashEntry(HashEntry entry) +{ + uint h = computeHashPos(entry.pos); + uint hp = h * HASH_BUCKET_SIZE; + + for (uint j = 0; j < HASH_BUCKET_SIZE; j++) { + uint i = j + hp; + //const HashEntry& curr = d_hash[i]; + int prevWeight = 0; + //InterlockedCompareExchange(hash[3*i+2], FREE_ENTRY, LOCK_ENTRY, prevWeight); + prevWeight = atomicCAS(&d_hash[i].ptr, FREE_ENTRY, LOCK_ENTRY); + if (prevWeight == FREE_ENTRY) { + d_hash[i] = entry; + //setHashEntry(hash, i, entry); + return true; + } + } + +#ifdef HANDLE_COLLISIONS + //updated variables as after the loop + const uint idxLastEntryInBucket = (h+1)*HASH_BUCKET_SIZE - 1; //get last index of bucket + + uint i = idxLastEntryInBucket; //start with the last entry of the current bucket + HashEntry curr; + + unsigned int maxIter = 0; + //[allow_uav_condition] + uint g_MaxLoopIterCount = params().m_hashMaxCollisionLinkedListSize; + #pragma unroll 1 + while (maxIter < g_MaxLoopIterCount) { //traverse list until end // why find the end? we you are inserting at the start !!! + //curr = getHashEntry(hash, i); + curr = d_hash[i]; //TODO MATTHIAS do by reference + if (curr.offset == 0) break; //we have found the end of the list + i = idxLastEntryInBucket + curr.offset; //go to next element in the list + i %= (HASH_BUCKET_SIZE * params().m_hashNumBuckets); //check for overflow + + maxIter++; + } + + maxIter = 0; + int offset = 0; + #pragma unroll 1 + while (maxIter < g_MaxLoopIterCount) { //linear search for free entry + offset++; + uint i = (idxLastEntryInBucket + offset) % (HASH_BUCKET_SIZE * params().m_hashNumBuckets); //go to next hash element + if ((offset % HASH_BUCKET_SIZE) == 0) continue; //cannot insert into a last bucket element (would conflict with other linked lists) + + int prevWeight = 0; + //InterlockedCompareExchange(hash[3*i+2], FREE_ENTRY, LOCK_ENTRY, prevWeight); //check for a free entry + uint* d_hashUI = (uint*)d_hash; + prevWeight = prevWeight = atomicCAS(&d_hashUI[3*idxLastEntryInBucket+1], (uint)FREE_ENTRY, (uint)LOCK_ENTRY); + if (prevWeight == FREE_ENTRY) { //if free entry found set prev->next = curr & curr->next = prev->next + //[allow_uav_condition] + //while(hash[3*idxLastEntryInBucket+2] == LOCK_ENTRY); // expects setHashEntry to set the ptr last, required because pos.z is packed into the same value -> prev->next = curr -> might corrput pos.z + + HashEntry lastEntryInBucket = d_hash[idxLastEntryInBucket]; //get prev (= lastEntry in Bucket) + + int newOffsetPrev = (offset << 16) | (lastEntryInBucket.pos.z & 0x0000ffff); //prev->next = curr (maintain old z-pos) + int oldOffsetPrev = 0; + //InterlockedExchange(hash[3*idxLastEntryInBucket+1], newOffsetPrev, oldOffsetPrev); //set prev offset atomically + uint* d_hashUI = (uint*)d_hash; + oldOffsetPrev = prevWeight = atomicExch(&d_hashUI[3*idxLastEntryInBucket+1], newOffsetPrev); + entry.offset = oldOffsetPrev >> 16; //remove prev z-pos from old offset + + //setHashEntry(hash, i, entry); //sets the current hashEntry with: curr->next = prev->next + d_hash[i] = entry; + return true; + } + + maxIter++; + } +#endif + + return false; +}*/ + + + +//! deletes a hash entry position for a given sdfBlock index (returns true uppon successful deletion; otherwise returns false) +__device__ +bool HashData::deleteHashEntryElement(const int3& sdfBlock) { + uint h = computeHashPos(sdfBlock); //hash bucket + + int i = h; + int prev = -1; + HashEntry curr; + unsigned int maxIter = 0; + + #pragma unroll 2 + while (maxIter < COLLISION_LIST_SIZE) { + curr = d_hash[i]; + + //found that dude that we need/want to delete + if (curr.pos == sdfBlock && curr.ptr != FREE_ENTRY) { + //int prevValue = 0; + //InterlockedExchange(bucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket + int prevValue = atomicExch(&d_hashBucketMutex[i], LOCK_ENTRY); + if (prevValue == LOCK_ENTRY) return false; + if (prevValue != LOCK_ENTRY) { + prevValue = (prev >= 0) ? atomicExch(&d_hashBucketMutex[prev], LOCK_ENTRY) : 0; + if (prevValue == LOCK_ENTRY) return false; + if (prevValue != LOCK_ENTRY) { + const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE; + appendHeap(curr.ptr / linBlockSize); + deleteHashEntry(i); + + if (prev >= 0) { + d_hash[prev].offset = curr.offset; + } + return true; + } + } + } + + if (curr.offset == 0) { //we have found the end of the list + return false; //should actually never happen because we need to find that guy before + } + prev = i; + i += curr.offset; //go to next element in the list + i %= (params().m_hashNumBuckets); //check for overflow + + ++maxIter; + } + + return false; +} \ No newline at end of file diff --git a/applications/reconstruct/src/voxel_scene.cpp b/applications/reconstruct/src/voxel_scene.cpp index 831780257..a81680490 100644 --- a/applications/reconstruct/src/voxel_scene.cpp +++ b/applications/reconstruct/src/voxel_scene.cpp @@ -263,7 +263,7 @@ unsigned int SceneRep::getHeapFreeCount() { //! debug only! void SceneRep::debugHash() { - HashEntry* hashCPU = new HashEntry[m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets]; + HashEntry* hashCPU = new HashEntry[m_hashParams.m_hashNumBuckets]; unsigned int* heapCPU = new unsigned int[m_hashParams.m_numSDFBlocks]; unsigned int heapCounterCPU; @@ -271,7 +271,7 @@ void SceneRep::debugHash() { heapCounterCPU++; //points to the first free entry: number of blocks is one more cudaSafeCall(cudaMemcpy(heapCPU, m_hashData.d_heap, sizeof(unsigned int)*m_hashParams.m_numSDFBlocks, cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashCPU, m_hashData.d_hash, sizeof(HashEntry)*m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets, cudaMemcpyDeviceToHost)); + cudaSafeCall(cudaMemcpy(hashCPU, m_hashData.d_hash, sizeof(HashEntry)*m_hashParams.m_hashNumBuckets, cudaMemcpyDeviceToHost)); //Check for duplicates class myint3Voxel { @@ -316,7 +316,7 @@ void SceneRep::debugHash() { std::list<myint3Voxel> l; //std::vector<myint3Voxel> v; - for (unsigned int i = 0; i < m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets; i++) { + for (unsigned int i = 0; i < m_hashParams.m_hashNumBuckets; i++) { if (hashCPU[i].ptr == -1) { numMinusOne++; } @@ -376,8 +376,6 @@ HashParams SceneRep::_parametersFromConfig() { params.m_rigidTransform.setIdentity(); params.m_rigidTransformInverse.setIdentity(); params.m_hashNumBuckets = value("hashNumBuckets", 100000); - params.m_hashBucketSize = HASH_BUCKET_SIZE; - params.m_hashMaxCollisionLinkedListSize = value("hashMaxCollisionLinkedListSize", 7); params.m_SDFBlockSize = SDF_BLOCK_SIZE; params.m_numSDFBlocks = value("hashNumSDFBlocks",500000); params.m_virtualVoxelSize = value("SDFVoxelSize", 0.006f); -- GitLab