Skip to content
Snippets Groups Projects
Commit fc934762 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Resolves #63 bucket removal

parent 7668ea66
No related branches found
No related tags found
No related merge requests found
...@@ -11,6 +11,8 @@ set(REPSRC ...@@ -11,6 +11,8 @@ set(REPSRC
src/integrators.cu src/integrators.cu
src/ray_cast_sdf.cu src/ray_cast_sdf.cu
src/camera_util.cu src/camera_util.cu
src/voxel_hash.cu
src/voxel_hash.cpp
src/ray_cast_sdf.cpp src/ray_cast_sdf.cpp
src/registration.cpp src/registration.cpp
src/virtual_source.cpp src/virtual_source.cpp
......
...@@ -662,6 +662,11 @@ inline __host__ __device__ void operator/=(int3 &a, int s) ...@@ -662,6 +662,11 @@ inline __host__ __device__ void operator/=(int3 &a, int s)
a.x /= s; a.y /= s; a.z /= s; a.x /= s; a.y /= s; a.z /= s;
} }
// Comparison
inline __host__ __device__ bool operator==(const int3 &a, const int3 &b) {
return a.x == b.x && a.y == b.y && a.z == b.z;
}
// clamp // clamp
inline __device__ __host__ int clamp(int f, int a, int b) inline __device__ __host__ int clamp(int f, int a, int b)
{ {
......
This diff is collapsed.
...@@ -21,8 +21,8 @@ struct __align__(16) HashParams { ...@@ -21,8 +21,8 @@ struct __align__(16) HashParams {
float4x4 m_rigidTransformInverse; float4x4 m_rigidTransformInverse;
unsigned int m_hashNumBuckets; unsigned int m_hashNumBuckets;
unsigned int m_hashBucketSize; unsigned int m_deprecated1;
unsigned int m_hashMaxCollisionLinkedListSize; unsigned int m_deprecated2; //m_hashMaxCollisionLinkedListSize;
unsigned int m_numSDFBlocks; unsigned int m_numSDFBlocks;
int m_SDFBlockSize; int m_SDFBlockSize;
......
...@@ -47,16 +47,8 @@ class SceneRep : public ftl::Configurable { ...@@ -47,16 +47,8 @@ class SceneRep : public ftl::Configurable {
// Mark voxels as surfaces // Mark voxels as surfaces
// void isosurface(); // void isosurface();
/**
* Note: lastRigidTransform appears to be the estimated camera pose.
* Note: bitMask can be nullptr if not streaming out voxels from GPU
*/
//void integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask);
void setLastRigidTransform(const Eigen::Matrix4f& lastRigidTransform); void setLastRigidTransform(const Eigen::Matrix4f& lastRigidTransform);
//void setLastRigidTransformAndCompactify(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData);
const Eigen::Matrix4f getLastRigidTransform() const; const Eigen::Matrix4f getLastRigidTransform() const;
......
...@@ -68,7 +68,7 @@ __global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams ...@@ -68,7 +68,7 @@ __global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams
//const HashParams& hashParams = c_hashParams; //const HashParams& hashParams = c_hashParams;
const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
#ifdef COMPACTIFY_HASH_SIMPLE #ifdef COMPACTIFY_HASH_SIMPLE
if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { if (idx < hashParams.m_hashNumBuckets) {
if (hashData.d_hash[idx].ptr != FREE_ENTRY) { if (hashData.d_hash[idx].ptr != FREE_ENTRY) {
if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, camera, hashData.d_hash[idx].pos)) if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, camera, hashData.d_hash[idx].pos))
{ {
...@@ -83,7 +83,7 @@ __global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams ...@@ -83,7 +83,7 @@ __global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams
__syncthreads(); __syncthreads();
int addrLocal = -1; int addrLocal = -1;
if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { if (idx < hashParams.m_hashNumBuckets) {
if (hashData.d_hash[idx].ptr != FREE_ENTRY) { if (hashData.d_hash[idx].ptr != FREE_ENTRY) {
if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, camera, hashData.d_hash[idx].pos)) if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, camera, hashData.d_hash[idx].pos))
{ {
...@@ -109,7 +109,7 @@ __global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams ...@@ -109,7 +109,7 @@ __global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams
void ftl::cuda::compactifyVisible(HashData& hashData, const HashParams& hashParams, const DepthCameraParams &camera, cudaStream_t stream) { void ftl::cuda::compactifyVisible(HashData& hashData, const HashParams& hashParams, const DepthCameraParams &camera, cudaStream_t stream) {
const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK;
const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); const dim3 gridSize((hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1);
const dim3 blockSize(threadsPerBlock, 1); const dim3 blockSize(threadsPerBlock, 1);
cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int),stream)); cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int),stream));
...@@ -129,7 +129,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData) ...@@ -129,7 +129,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData)
const HashParams& hashParams = c_hashParams; const HashParams& hashParams = c_hashParams;
const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
#ifdef COMPACTIFY_HASH_SIMPLE #ifdef COMPACTIFY_HASH_SIMPLE
if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { if (idx < hashParams.m_hashNumBuckets) {
if (hashData.d_hash[idx].ptr != FREE_ENTRY) { if (hashData.d_hash[idx].ptr != FREE_ENTRY) {
int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1); int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1);
hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; hashData.d_hashCompactified[addr] = hashData.d_hash[idx];
...@@ -141,7 +141,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData) ...@@ -141,7 +141,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData)
__syncthreads(); __syncthreads();
int addrLocal = -1; int addrLocal = -1;
if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { if (idx < hashParams.m_hashNumBuckets) {
if (hashData.d_hash[idx].ptr != FREE_ENTRY) { if (hashData.d_hash[idx].ptr != FREE_ENTRY) {
addrLocal = atomicAdd(&localCounter, 1); addrLocal = atomicAdd(&localCounter, 1);
} }
...@@ -164,7 +164,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData) ...@@ -164,7 +164,7 @@ __global__ void compactifyAllocatedKernel(HashData hashData)
void ftl::cuda::compactifyAllocated(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { void ftl::cuda::compactifyAllocated(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) {
const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK;
const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); const dim3 gridSize((hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1);
const dim3 blockSize(threadsPerBlock, 1); const dim3 blockSize(threadsPerBlock, 1);
cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int), stream)); cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int), stream));
......
...@@ -98,7 +98,7 @@ __global__ void resetHashKernel(HashData hashData) ...@@ -98,7 +98,7 @@ __global__ void resetHashKernel(HashData hashData)
{ {
const HashParams& hashParams = c_hashParams; const HashParams& hashParams = c_hashParams;
const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { if (idx < hashParams.m_hashNumBuckets) {
hashData.deleteHashEntry(hashData.d_hash[idx]); hashData.deleteHashEntry(hashData.d_hash[idx]);
hashData.deleteHashEntry(hashData.d_hashCompactified[idx]); hashData.deleteHashEntry(hashData.d_hashCompactified[idx]);
} }
...@@ -133,7 +133,7 @@ extern "C" void resetCUDA(HashData& hashData, const HashParams& hashParams) ...@@ -133,7 +133,7 @@ extern "C" void resetCUDA(HashData& hashData, const HashParams& hashParams)
{ {
//resetting the hash //resetting the hash
const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); const dim3 gridSize((hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1);
const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1);
resetHashKernel<<<gridSize, blockSize>>>(hashData); resetHashKernel<<<gridSize, blockSize>>>(hashData);
......
#include <ftl/voxel_hash.hpp>
using ftl::voxhash::HashData;
using ftl::voxhash::HashParams;
void HashData::allocate(const HashParams& params, bool dataOnGPU) {
m_bIsOnGPU = dataOnGPU;
if (m_bIsOnGPU) {
cudaSafeCall(cudaMalloc(&d_heap, sizeof(unsigned int) * params.m_numSDFBlocks));
cudaSafeCall(cudaMalloc(&d_heapCounter, sizeof(unsigned int)));
cudaSafeCall(cudaMalloc(&d_hash, sizeof(HashEntry)* params.m_hashNumBuckets));
cudaSafeCall(cudaMalloc(&d_hashDecision, sizeof(int)* params.m_hashNumBuckets));
cudaSafeCall(cudaMalloc(&d_hashDecisionPrefix, sizeof(int)* params.m_hashNumBuckets));
cudaSafeCall(cudaMalloc(&d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets));
cudaSafeCall(cudaMalloc(&d_hashCompactifiedCounter, sizeof(int)));
cudaSafeCall(cudaMalloc(&d_SDFBlocks, sizeof(Voxel) * params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize));
cudaSafeCall(cudaMalloc(&d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets));
} else {
d_heap = new unsigned int[params.m_numSDFBlocks];
d_heapCounter = new unsigned int[1];
d_hash = new HashEntry[params.m_hashNumBuckets];
d_hashDecision = new int[params.m_hashNumBuckets];
d_hashDecisionPrefix = new int[params.m_hashNumBuckets];
d_hashCompactified = new HashEntry[params.m_hashNumBuckets];
d_hashCompactifiedCounter = new int[1];
d_SDFBlocks = new Voxel[params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize];
d_hashBucketMutex = new int[params.m_hashNumBuckets];
}
updateParams(params);
}
void HashData::updateParams(const HashParams& params) {
if (m_bIsOnGPU) {
updateConstantHashParams(params);
}
}
void HashData::free() {
if (m_bIsOnGPU) {
cudaSafeCall(cudaFree(d_heap));
cudaSafeCall(cudaFree(d_heapCounter));
cudaSafeCall(cudaFree(d_hash));
cudaSafeCall(cudaFree(d_hashDecision));
cudaSafeCall(cudaFree(d_hashDecisionPrefix));
cudaSafeCall(cudaFree(d_hashCompactified));
cudaSafeCall(cudaFree(d_hashCompactifiedCounter));
cudaSafeCall(cudaFree(d_SDFBlocks));
cudaSafeCall(cudaFree(d_hashBucketMutex));
} else {
if (d_heap) delete[] d_heap;
if (d_heapCounter) delete[] d_heapCounter;
if (d_hash) delete[] d_hash;
if (d_hashDecision) delete[] d_hashDecision;
if (d_hashDecisionPrefix) delete[] d_hashDecisionPrefix;
if (d_hashCompactified) delete[] d_hashCompactified;
if (d_hashCompactifiedCounter) delete[] d_hashCompactifiedCounter;
if (d_SDFBlocks) delete[] d_SDFBlocks;
if (d_hashBucketMutex) delete[] d_hashBucketMutex;
}
d_hash = NULL;
d_heap = NULL;
d_heapCounter = NULL;
d_hashDecision = NULL;
d_hashDecisionPrefix = NULL;
d_hashCompactified = NULL;
d_hashCompactifiedCounter = NULL;
d_SDFBlocks = NULL;
d_hashBucketMutex = NULL;
}
HashData HashData::download() const {
if (!m_bIsOnGPU) return *this;
HashParams params;
HashData hashData;
hashData.allocate(params, false); //allocate the data on the CPU
cudaSafeCall(cudaMemcpy(hashData.d_heap, d_heap, sizeof(unsigned int) * params.m_numSDFBlocks, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_heapCounter, d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hash, d_hash, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hashDecision, d_hashDecision, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hashDecisionPrefix, d_hashDecisionPrefix, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hashCompactified, d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hashCompactifiedCounter, d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_SDFBlocks, d_SDFBlocks, sizeof(Voxel) * params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashData.d_hashBucketMutex, d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost));
return hashData;
}
HashData HashData::upload() const {
if (m_bIsOnGPU) return *this;
HashParams params;
HashData hashData;
hashData.allocate(params, false); //allocate the data on the CPU
cudaSafeCall(cudaMemcpy(hashData.d_heap, d_heap, sizeof(unsigned int) * params.m_numSDFBlocks, cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(hashData.d_heapCounter, d_heapCounter, sizeof(unsigned int), cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(hashData.d_hash, d_hash, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(hashData.d_hashDecision, d_hashDecision, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(hashData.d_hashDecisionPrefix, d_hashDecisionPrefix, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(hashData.d_hashCompactified, d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(hashData.d_hashCompactifiedCounter, d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(hashData.d_SDFBlocks, d_SDFBlocks, sizeof(Voxel) * params.m_numSDFBlocks * params.m_SDFBlockSize*params.m_SDFBlockSize*params.m_SDFBlockSize, cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(hashData.d_hashBucketMutex, d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets, cudaMemcpyHostToDevice));
return hashData;
}
#include <ftl/voxel_hash.hpp>
using namespace ftl::voxhash;
#define COLLISION_LIST_SIZE 5
//! returns the hash entry for a given sdf block id; if there was no hash entry the returned entry will have a ptr with FREE_ENTRY set
__device__
HashEntry HashData::getHashEntryForSDFBlockPos(const int3& sdfBlock) const {
uint h = computeHashPos(sdfBlock); //hash
int3 pos = sdfBlock;
HashEntry curr;
int i = h;
unsigned int maxIter = 0;
#pragma unroll 2
while (maxIter < COLLISION_LIST_SIZE) {
curr = d_hash[i];
if (curr.pos == pos && curr.ptr != FREE_ENTRY) return curr;
if (curr.offset == 0) break;
i += curr.offset; //go to next element in the list
i %= (params().m_hashNumBuckets); //check for overflow
++maxIter;
}
// Could not find so return dummy
curr.pos = pos;
curr.ptr = FREE_ENTRY;
return curr;
}
//for histogram (collisions traversal only)
__device__
unsigned int HashData::getNumHashLinkedList(unsigned int bucketID) {
unsigned int listLen = 0;
unsigned int i = bucketID; //start with the last entry of the current bucket
HashEntry curr; curr.offset = 0;
unsigned int maxIter = 0;
#pragma unroll 2
while (maxIter < COLLISION_LIST_SIZE) {
curr = d_hash[i];
if (curr.offset == 0) break;
i += curr.offset; //go to next element in the list
i %= (params().m_hashNumBuckets); //check for overflow
++listLen;
++maxIter;
}
return listLen;
}
//pos in SDF block coordinates
__device__
void HashData::allocBlock(const int3& pos, const uchar frame) {
uint h = computeHashPos(pos); //hash bucket
uint i = h;
HashEntry curr; curr.offset = 0;
unsigned int maxIter = 0;
#pragma unroll 2
while (maxIter < COLLISION_LIST_SIZE) {
//offset = curr.offset;
curr = d_hash[i]; //TODO MATTHIAS do by reference
if (curr.pos == pos && curr.ptr != FREE_ENTRY) return;
if (curr.offset == 0) break;
i += curr.offset; //go to next element in the list
i %= (params().m_hashNumBuckets); //check for overflow
++maxIter;
}
// Limit reached...
if (curr.offset != 0) return;
int j = i+1;
while (maxIter < COLLISION_LIST_SIZE) {
//offset = curr.offset;
curr = d_hash[j]; //TODO MATTHIAS do by reference
if (curr.ptr == FREE_ENTRY) {
int prevValue = atomicExch(&d_hashBucketMutex[i], LOCK_ENTRY);
if (prevValue != LOCK_ENTRY) {
//InterlockedExchange(g_HashBucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket where we have found a free entry
prevValue = atomicExch(&d_hashBucketMutex[j], LOCK_ENTRY);
if (prevValue != LOCK_ENTRY) { //only proceed if the bucket has been locked
HashEntry& entry = d_hash[j];
entry.pos = pos;
entry.offset = 0;
entry.flags = 0; // Flag block as valid in this frame (Nick)
entry.ptr = consumeHeap() * SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; //memory alloc
d_hash[i].offset = j-i;
//setHashEntry(g_Hash, idxLastEntryInBucket, lastEntryInBucket);
}
}
return; //bucket was already locked
}
++j;
j %= (params().m_hashNumBuckets); //check for overflow
++maxIter;
}
}
//!inserts a hash entry without allocating any memory: used by streaming: TODO MATTHIAS check the atomics in this function
/*__device__
bool HashData::insertHashEntry(HashEntry entry)
{
uint h = computeHashPos(entry.pos);
uint hp = h * HASH_BUCKET_SIZE;
for (uint j = 0; j < HASH_BUCKET_SIZE; j++) {
uint i = j + hp;
//const HashEntry& curr = d_hash[i];
int prevWeight = 0;
//InterlockedCompareExchange(hash[3*i+2], FREE_ENTRY, LOCK_ENTRY, prevWeight);
prevWeight = atomicCAS(&d_hash[i].ptr, FREE_ENTRY, LOCK_ENTRY);
if (prevWeight == FREE_ENTRY) {
d_hash[i] = entry;
//setHashEntry(hash, i, entry);
return true;
}
}
#ifdef HANDLE_COLLISIONS
//updated variables as after the loop
const uint idxLastEntryInBucket = (h+1)*HASH_BUCKET_SIZE - 1; //get last index of bucket
uint i = idxLastEntryInBucket; //start with the last entry of the current bucket
HashEntry curr;
unsigned int maxIter = 0;
//[allow_uav_condition]
uint g_MaxLoopIterCount = params().m_hashMaxCollisionLinkedListSize;
#pragma unroll 1
while (maxIter < g_MaxLoopIterCount) { //traverse list until end // why find the end? we you are inserting at the start !!!
//curr = getHashEntry(hash, i);
curr = d_hash[i]; //TODO MATTHIAS do by reference
if (curr.offset == 0) break; //we have found the end of the list
i = idxLastEntryInBucket + curr.offset; //go to next element in the list
i %= (HASH_BUCKET_SIZE * params().m_hashNumBuckets); //check for overflow
maxIter++;
}
maxIter = 0;
int offset = 0;
#pragma unroll 1
while (maxIter < g_MaxLoopIterCount) { //linear search for free entry
offset++;
uint i = (idxLastEntryInBucket + offset) % (HASH_BUCKET_SIZE * params().m_hashNumBuckets); //go to next hash element
if ((offset % HASH_BUCKET_SIZE) == 0) continue; //cannot insert into a last bucket element (would conflict with other linked lists)
int prevWeight = 0;
//InterlockedCompareExchange(hash[3*i+2], FREE_ENTRY, LOCK_ENTRY, prevWeight); //check for a free entry
uint* d_hashUI = (uint*)d_hash;
prevWeight = prevWeight = atomicCAS(&d_hashUI[3*idxLastEntryInBucket+1], (uint)FREE_ENTRY, (uint)LOCK_ENTRY);
if (prevWeight == FREE_ENTRY) { //if free entry found set prev->next = curr & curr->next = prev->next
//[allow_uav_condition]
//while(hash[3*idxLastEntryInBucket+2] == LOCK_ENTRY); // expects setHashEntry to set the ptr last, required because pos.z is packed into the same value -> prev->next = curr -> might corrput pos.z
HashEntry lastEntryInBucket = d_hash[idxLastEntryInBucket]; //get prev (= lastEntry in Bucket)
int newOffsetPrev = (offset << 16) | (lastEntryInBucket.pos.z & 0x0000ffff); //prev->next = curr (maintain old z-pos)
int oldOffsetPrev = 0;
//InterlockedExchange(hash[3*idxLastEntryInBucket+1], newOffsetPrev, oldOffsetPrev); //set prev offset atomically
uint* d_hashUI = (uint*)d_hash;
oldOffsetPrev = prevWeight = atomicExch(&d_hashUI[3*idxLastEntryInBucket+1], newOffsetPrev);
entry.offset = oldOffsetPrev >> 16; //remove prev z-pos from old offset
//setHashEntry(hash, i, entry); //sets the current hashEntry with: curr->next = prev->next
d_hash[i] = entry;
return true;
}
maxIter++;
}
#endif
return false;
}*/
//! deletes a hash entry position for a given sdfBlock index (returns true uppon successful deletion; otherwise returns false)
__device__
bool HashData::deleteHashEntryElement(const int3& sdfBlock) {
uint h = computeHashPos(sdfBlock); //hash bucket
int i = h;
int prev = -1;
HashEntry curr;
unsigned int maxIter = 0;
#pragma unroll 2
while (maxIter < COLLISION_LIST_SIZE) {
curr = d_hash[i];
//found that dude that we need/want to delete
if (curr.pos == sdfBlock && curr.ptr != FREE_ENTRY) {
//int prevValue = 0;
//InterlockedExchange(bucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket
int prevValue = atomicExch(&d_hashBucketMutex[i], LOCK_ENTRY);
if (prevValue == LOCK_ENTRY) return false;
if (prevValue != LOCK_ENTRY) {
prevValue = (prev >= 0) ? atomicExch(&d_hashBucketMutex[prev], LOCK_ENTRY) : 0;
if (prevValue == LOCK_ENTRY) return false;
if (prevValue != LOCK_ENTRY) {
const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE;
appendHeap(curr.ptr / linBlockSize);
deleteHashEntry(i);
if (prev >= 0) {
d_hash[prev].offset = curr.offset;
}
return true;
}
}
}
if (curr.offset == 0) { //we have found the end of the list
return false; //should actually never happen because we need to find that guy before
}
prev = i;
i += curr.offset; //go to next element in the list
i %= (params().m_hashNumBuckets); //check for overflow
++maxIter;
}
return false;
}
\ No newline at end of file
...@@ -263,7 +263,7 @@ unsigned int SceneRep::getHeapFreeCount() { ...@@ -263,7 +263,7 @@ unsigned int SceneRep::getHeapFreeCount() {
//! debug only! //! debug only!
void SceneRep::debugHash() { void SceneRep::debugHash() {
HashEntry* hashCPU = new HashEntry[m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets]; HashEntry* hashCPU = new HashEntry[m_hashParams.m_hashNumBuckets];
unsigned int* heapCPU = new unsigned int[m_hashParams.m_numSDFBlocks]; unsigned int* heapCPU = new unsigned int[m_hashParams.m_numSDFBlocks];
unsigned int heapCounterCPU; unsigned int heapCounterCPU;
...@@ -271,7 +271,7 @@ void SceneRep::debugHash() { ...@@ -271,7 +271,7 @@ void SceneRep::debugHash() {
heapCounterCPU++; //points to the first free entry: number of blocks is one more heapCounterCPU++; //points to the first free entry: number of blocks is one more
cudaSafeCall(cudaMemcpy(heapCPU, m_hashData.d_heap, sizeof(unsigned int)*m_hashParams.m_numSDFBlocks, cudaMemcpyDeviceToHost)); cudaSafeCall(cudaMemcpy(heapCPU, m_hashData.d_heap, sizeof(unsigned int)*m_hashParams.m_numSDFBlocks, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashCPU, m_hashData.d_hash, sizeof(HashEntry)*m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets, cudaMemcpyDeviceToHost)); cudaSafeCall(cudaMemcpy(hashCPU, m_hashData.d_hash, sizeof(HashEntry)*m_hashParams.m_hashNumBuckets, cudaMemcpyDeviceToHost));
//Check for duplicates //Check for duplicates
class myint3Voxel { class myint3Voxel {
...@@ -316,7 +316,7 @@ void SceneRep::debugHash() { ...@@ -316,7 +316,7 @@ void SceneRep::debugHash() {
std::list<myint3Voxel> l; std::list<myint3Voxel> l;
//std::vector<myint3Voxel> v; //std::vector<myint3Voxel> v;
for (unsigned int i = 0; i < m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets; i++) { for (unsigned int i = 0; i < m_hashParams.m_hashNumBuckets; i++) {
if (hashCPU[i].ptr == -1) { if (hashCPU[i].ptr == -1) {
numMinusOne++; numMinusOne++;
} }
...@@ -376,8 +376,6 @@ HashParams SceneRep::_parametersFromConfig() { ...@@ -376,8 +376,6 @@ HashParams SceneRep::_parametersFromConfig() {
params.m_rigidTransform.setIdentity(); params.m_rigidTransform.setIdentity();
params.m_rigidTransformInverse.setIdentity(); params.m_rigidTransformInverse.setIdentity();
params.m_hashNumBuckets = value("hashNumBuckets", 100000); params.m_hashNumBuckets = value("hashNumBuckets", 100000);
params.m_hashBucketSize = HASH_BUCKET_SIZE;
params.m_hashMaxCollisionLinkedListSize = value("hashMaxCollisionLinkedListSize", 7);
params.m_SDFBlockSize = SDF_BLOCK_SIZE; params.m_SDFBlockSize = SDF_BLOCK_SIZE;
params.m_numSDFBlocks = value("hashNumSDFBlocks",500000); params.m_numSDFBlocks = value("hashNumSDFBlocks",500000);
params.m_virtualVoxelSize = value("SDFVoxelSize", 0.006f); params.m_virtualVoxelSize = value("SDFVoxelSize", 0.006f);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment