diff --git a/applications/reconstruct/include/ftl/voxel_hash.hpp b/applications/reconstruct/include/ftl/voxel_hash.hpp deleted file mode 100644 index 98c2eca90530c309b5d2e46848493634aaaa053c..0000000000000000000000000000000000000000 --- a/applications/reconstruct/include/ftl/voxel_hash.hpp +++ /dev/null @@ -1,428 +0,0 @@ -// From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/VoxelUtilHashSDF.h - -#pragma once - -#ifndef sint -typedef signed int sint; -#endif - -#ifndef uint -typedef unsigned int uint; -#endif - -#ifndef slong -typedef signed long slong; -#endif - -#ifndef ulong -typedef unsigned long ulong; -#endif - -#ifndef uchar -typedef unsigned char uchar; -#endif - -#ifndef schar -typedef signed char schar; -#endif - - - - -#include <ftl/cuda_util.hpp> - -#include <ftl/cuda_matrix_util.hpp> -#include <ftl/voxel_hash_params.hpp> - -#include <ftl/depth_camera.hpp> - -#define SDF_BLOCK_SIZE 8 -#define SDF_BLOCK_SIZE_OLAP 8 - -#ifndef MINF -#define MINF __int_as_float(0xff800000) -#endif - -#ifndef PINF -#define PINF __int_as_float(0x7f800000) -#endif - -extern __constant__ ftl::voxhash::HashParams c_hashParams; -extern "C" void updateConstantHashParams(const ftl::voxhash::HashParams& hashParams); - -namespace ftl { -namespace voxhash { - -//status flags for hash entries -static const int LOCK_ENTRY = -1; -static const int FREE_ENTRY = -2147483648; -static const int NO_OFFSET = 0; - -static const uint kFlagSurface = 0x00000001; - -struct __align__(16) HashEntryHead { - union { - short4 posXYZ; // hash position (lower left corner of SDFBlock)) - uint64_t pos; - }; - int offset; // offset for collisions - uint flags; -}; - -struct __align__(16) HashEntry -{ - HashEntryHead head; - uint voxels[16]; // 512 bits, 1 bit per voxel - //uint validity[16]; // Is the voxel valid, 512 bit - - /*__device__ void operator=(const struct HashEntry& e) { - ((long long*)this)[0] = ((const long long*)&e)[0]; - ((long long*)this)[1] = ((const long long*)&e)[1]; - //((int*)this)[4] = ((const int*)&e)[4]; - ((long long*)this)[2] = ((const long long*)&e)[2]; - ((long long*)this)[2] = ((const long long*)&e)[3]; - ((long long*)this)[2] = ((const long long*)&e)[4]; - ((long long*)this)[2] = ((const long long*)&e)[5]; - ((long long*)this)[2] = ((const long long*)&e)[6]; - ((long long*)this)[2] = ((const long long*)&e)[7]; - ((long long*)this)[2] = ((const long long*)&e)[8]; - ((long long*)this)[2] = ((const long long*)&e)[9]; - ((long long*)this)[2] = ((const long long*)&e)[10]; - }*/ -}; - -struct __align__(8) Voxel { - float sdf; //signed distance function - uchar3 color; //color - uchar weight; //accumulated sdf weight - - __device__ void operator=(const struct Voxel& v) { - ((long long*)this)[0] = ((const long long*)&v)[0]; - } - -}; - -/** - * Voxel Hash Table structure and operations. Works on both CPU and GPU with - * host <-> device transfer included. - */ -struct HashData { - - /////////////// - // Host part // - /////////////// - - __device__ __host__ - HashData() { - //d_heap = NULL; - //d_heapCounter = NULL; - d_hash = NULL; - d_hashDecision = NULL; - d_hashDecisionPrefix = NULL; - d_hashCompactified = NULL; - d_hashCompactifiedCounter = NULL; - //d_SDFBlocks = NULL; - d_hashBucketMutex = NULL; - m_bIsOnGPU = false; - } - - /** - * Create all the data structures, either on GPU or system memory. - */ - __host__ void allocate(const HashParams& params, bool dataOnGPU = true); - - __host__ void updateParams(const HashParams& params); - - __host__ void free(); - - /** - * Download entire hash table from GPU to CPU memory. - */ - __host__ HashData download() const; - - /** - * Upload entire hash table from CPU to GPU memory. - */ - __host__ HashData upload() const; - - __host__ size_t getAllocatedBlocks() const; - - __host__ size_t getFreeBlocks() const; - - __host__ size_t getCollisionCount() const; - - - - ///////////////// - // Device part // - ///////////////// -//#define __CUDACC__ -#ifdef __CUDACC__ - - __device__ - const HashParams& params() const { - return c_hashParams; - } - - //! see teschner et al. (but with correct prime values) - __device__ - uint computeHashPos(const int3& virtualVoxelPos) const { - const int p0 = 73856093; - const int p1 = 19349669; - const int p2 = 83492791; - - int res = ((virtualVoxelPos.x * p0) ^ (virtualVoxelPos.y * p1) ^ (virtualVoxelPos.z * p2)) % params().m_hashNumBuckets; - if (res < 0) res += params().m_hashNumBuckets; - return (uint)res; - } - - //merges two voxels (v0 the currently stored voxel, v1 is the input voxel) - __device__ - void combineVoxel(const Voxel &v0, const Voxel& v1, Voxel &out) const { - - //v.color = (10*v0.weight * v0.color + v1.weight * v1.color)/(10*v0.weight + v1.weight); //give the currently observed color more weight - //v.color = (v0.weight * v0.color + v1.weight * v1.color)/(v0.weight + v1.weight); - //out.color = 0.5f * (v0.color + v1.color); //exponential running average - - - float3 c0 = make_float3(v0.color.x, v0.color.y, v0.color.z); - float3 c1 = make_float3(v1.color.x, v1.color.y, v1.color.z); - - //float3 res = (c0.x+c0.y+c0.z == 0) ? c1 : 0.5f*c0 + 0.5f*c1; - //float3 res = (c0+c1)/2; - float3 res = (c0 * (float)v0.weight + c1 * (float)v1.weight) / ((float)v0.weight + (float)v1.weight); - //float3 res = c1; - - out.color.x = (uchar)(res.x+0.5f); out.color.y = (uchar)(res.y+0.5f); out.color.z = (uchar)(res.z+0.5f); - - // Nick: reduces colour flicker but not ideal.. - //out.color = v1.color; - - // Option 3 (Nick): Use colour with minimum SDF since it should be closest to surface. - // Results in stable but pixelated output - //out.color = (v0.weight > 0 && (fabs(v0.sdf) < fabs(v1.sdf))) ? v0.color : v1.color; - - // Option 4 (Nick): Merge colours based upon relative closeness - /*float3 c0 = make_float3(v0.color.x, v0.color.y, v0.color.z); - float3 c1 = make_float3(v1.color.x, v1.color.y, v1.color.z); - float factor = fabs(v0.sdf - v1.sdf) / 0.05f / 2.0f; - if (factor > 0.5f) factor = 0.5f; - float factor0 = (fabs(v0.sdf) < fabs(v1.sdf)) ? 1.0f - factor : factor; - float factor1 = 1.0f - factor0; - out.color.x = (v0.weight > 0) ? (uchar)(c0.x * factor0 + c1.x * factor1) : c1.x; - out.color.y = (v0.weight > 0) ? (uchar)(c0.y * factor0 + c1.y * factor1) : c1.y; - out.color.z = (v0.weight > 0) ? (uchar)(c0.z * factor0 + c1.z * factor1) : c1.z;*/ - - out.sdf = (v0.sdf * (float)v0.weight + v1.sdf * (float)v1.weight) / ((float)v0.weight + (float)v1.weight); - out.weight = min(params().m_integrationWeightMax, (unsigned int)v0.weight + (unsigned int)v1.weight); - } - - - //! returns the truncation of the SDF for a given distance value - __device__ - float getTruncation(float z) const { - return params().m_truncation + params().m_truncScale * z; - } - - - __device__ - float3 worldToVirtualVoxelPosFloat(const float3& pos) const { - return pos / params().m_virtualVoxelSize; - } - - __device__ - int3 worldToVirtualVoxelPos(const float3& pos) const { - //const float3 p = pos*g_VirtualVoxelResolutionScalar; - const float3 p = pos / params().m_virtualVoxelSize; - return make_int3(p+make_float3(sign(p))*0.5f); - } - - __device__ - int3 virtualVoxelPosToSDFBlock(int3 virtualVoxelPos) const { - if (virtualVoxelPos.x < 0) virtualVoxelPos.x -= SDF_BLOCK_SIZE_OLAP-1; - if (virtualVoxelPos.y < 0) virtualVoxelPos.y -= SDF_BLOCK_SIZE_OLAP-1; - if (virtualVoxelPos.z < 0) virtualVoxelPos.z -= SDF_BLOCK_SIZE_OLAP-1; - - return make_int3( - virtualVoxelPos.x/SDF_BLOCK_SIZE_OLAP, - virtualVoxelPos.y/SDF_BLOCK_SIZE_OLAP, - virtualVoxelPos.z/SDF_BLOCK_SIZE_OLAP); - } - - // Computes virtual voxel position of corner sample position - __device__ - int3 SDFBlockToVirtualVoxelPos(const int3& sdfBlock) const { - return sdfBlock*SDF_BLOCK_SIZE_OLAP; - } - - __device__ - float3 virtualVoxelPosToWorld(const int3& pos) const { - return make_float3(pos)*params().m_virtualVoxelSize; - } - - __device__ - float3 SDFBlockToWorld(const int3& sdfBlock) const { - return virtualVoxelPosToWorld(SDFBlockToVirtualVoxelPos(sdfBlock)); - } - - __device__ - int3 worldToSDFBlock(const float3& worldPos) const { - return virtualVoxelPosToSDFBlock(worldToVirtualVoxelPos(worldPos)); - } - - __device__ - bool isInBoundingBox(const HashParams &hashParams, const int3& sdfBlock) { - // NOTE (Nick): Changed, just assume all voxels are potentially in frustrum - //float3 posWorld = virtualVoxelPosToWorld(SDFBlockToVirtualVoxelPos(sdfBlock)) + hashParams.m_virtualVoxelSize * 0.5f * (SDF_BLOCK_SIZE - 1.0f); - //return camera.isInCameraFrustumApprox(hashParams.m_rigidTransformInverse, posWorld); - return !(hashParams.m_flags & ftl::voxhash::kFlagClipping) || sdfBlock.x > hashParams.m_minBounds.x && sdfBlock.x < hashParams.m_maxBounds.x && - sdfBlock.y > hashParams.m_minBounds.y && sdfBlock.y < hashParams.m_maxBounds.y && - sdfBlock.z > hashParams.m_minBounds.z && sdfBlock.z < hashParams.m_maxBounds.z; - } - - //! computes the (local) virtual voxel pos of an index; idx in [0;511] - __device__ - uint3 delinearizeVoxelIndex(uint idx) const { - uint x = idx % SDF_BLOCK_SIZE; - uint y = (idx % (SDF_BLOCK_SIZE * SDF_BLOCK_SIZE)) / SDF_BLOCK_SIZE; - uint z = idx / (SDF_BLOCK_SIZE * SDF_BLOCK_SIZE); - return make_uint3(x,y,z); - } - - //! computes the linearized index of a local virtual voxel pos; pos in [0;7]^3 - __device__ - uint linearizeVoxelPos(const int3& virtualVoxelPos) const { - return - virtualVoxelPos.z * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE + - virtualVoxelPos.y * SDF_BLOCK_SIZE + - virtualVoxelPos.x; - } - - __device__ - int virtualVoxelPosToLocalSDFBlockIndex(const int3& virtualVoxelPos) const { - int3 localVoxelPos = make_int3( - virtualVoxelPos.x % SDF_BLOCK_SIZE, - virtualVoxelPos.y % SDF_BLOCK_SIZE, - virtualVoxelPos.z % SDF_BLOCK_SIZE); - - if (localVoxelPos.x < 0) localVoxelPos.x += SDF_BLOCK_SIZE; - if (localVoxelPos.y < 0) localVoxelPos.y += SDF_BLOCK_SIZE; - if (localVoxelPos.z < 0) localVoxelPos.z += SDF_BLOCK_SIZE; - - return linearizeVoxelPos(localVoxelPos); - } - - __device__ - int worldToLocalSDFBlockIndex(const float3& world) const { - int3 virtualVoxelPos = worldToVirtualVoxelPos(world); - return virtualVoxelPosToLocalSDFBlockIndex(virtualVoxelPos); - } - - - //! returns the hash entry for a given worldPos; if there was no hash entry the returned entry will have a ptr with FREE_ENTRY set - __device__ - int getHashEntry(const float3& worldPos) const { - //int3 blockID = worldToSDFVirtualVoxelPos(worldPos)/SDF_BLOCK_SIZE; //position of sdf block - int3 blockID = worldToSDFBlock(worldPos); - return getHashEntryForSDFBlockPos(blockID); - } - - - __device__ - void deleteHashEntry(uint id) { - deleteHashEntry(d_hash[id]); - } - - __device__ - void deleteHashEntry(HashEntry& hashEntry) { - hashEntry.head.pos = 0; - hashEntry.head.offset = FREE_ENTRY; - for (int i=0; i<16; ++i) hashEntry.voxels[i] = 0; - } - - __device__ - bool voxelExists(const float3& worldPos) const { - int hashEntry = getHashEntry(worldPos); - return (hashEntry != -1); - } - - __device__ - void deleteVoxel(Voxel& v) const { - v.color = make_uchar3(0,0,0); - v.weight = 0; - v.sdf = 0.0f; - } - - - __device__ - bool getVoxel(const float3& worldPos) const { - int hashEntry = getHashEntry(worldPos); - if (hashEntry == -1) { - return false; - } else { - int3 virtualVoxelPos = worldToVirtualVoxelPos(worldPos); - int ix = virtualVoxelPosToLocalSDFBlockIndex(virtualVoxelPos); - return d_hash[hashEntry].voxels[ix/32] & (0x1 << (ix % 32)); - } - } - - __device__ - bool getVoxel(const int3& virtualVoxelPos) const { - int hashEntry = getHashEntryForSDFBlockPos(virtualVoxelPosToSDFBlock(virtualVoxelPos)); - if (hashEntry == -1) { - return false; - } else { - int ix = virtualVoxelPosToLocalSDFBlockIndex(virtualVoxelPos); - return d_hash[hashEntry].voxels[ix >> 5] & (0x1 << (ix & 0x1F)); - } - } - - /*__device__ - void setVoxel(const int3& virtualVoxelPos, bool voxelInput) const { - int hashEntry = getHashEntryForSDFBlockPos(virtualVoxelPosToSDFBlock(virtualVoxelPos)); - if (hashEntry == -1) { - d_SDFBlocks[hashEntry.ptr + virtualVoxelPosToLocalSDFBlockIndex(virtualVoxelPos)] = voxelInput; - int ix = virtualVoxelPosToLocalSDFBlockIndex(virtualVoxelPos); - d_hash[hashEntry].voxels[ix >> 5] |= (0x1 << (ix & 0x1F)); - } - }*/ - - //! returns the hash entry for a given sdf block id; if there was no hash entry the returned entry will have a ptr with FREE_ENTRY set - __device__ - int getHashEntryForSDFBlockPos(const int3& sdfBlock) const; - - //for histogram (no collision traversal) - __device__ - unsigned int getNumHashEntriesPerBucket(unsigned int bucketID); - - //for histogram (collisions traversal only) - __device__ - unsigned int getNumHashLinkedList(unsigned int bucketID); - - - //pos in SDF block coordinates - __device__ - void allocBlock(const int3& pos); - - //!inserts a hash entry without allocating any memory: used by streaming: TODO MATTHIAS check the atomics in this function - __device__ - bool insertHashEntry(HashEntry entry); - - //! deletes a hash entry position for a given sdfBlock index (returns true uppon successful deletion; otherwise returns false) - __device__ - bool deleteHashEntryElement(const int3& sdfBlock); - -#endif //CUDACC - - int* d_hashDecision; // - int* d_hashDecisionPrefix; // - HashEntry* d_hash; //hash that stores pointers to sdf blocks - HashEntry** d_hashCompactified; //same as before except that only valid pointers are there - int* d_hashCompactifiedCounter; //atomic counter to add compactified entries atomically - int* d_hashBucketMutex; //binary flag per hash bucket; used for allocation to atomically lock a bucket - - bool m_bIsOnGPU; //the class be be used on both cpu and gpu -}; - -} // namespace voxhash -} // namespace ftl diff --git a/applications/reconstruct/src/compactors.cu b/applications/reconstruct/src/compactors.cu deleted file mode 100644 index b7cdd5028f0f5ec78de47d8bf6f9099c5448a494..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/compactors.cu +++ /dev/null @@ -1,236 +0,0 @@ -#include "compactors.hpp" - -using ftl::voxhash::HashData; -using ftl::voxhash::HashParams; -using ftl::voxhash::Voxel; -using ftl::voxhash::HashEntry; -using ftl::voxhash::FREE_ENTRY; - -#define COMPACTIFY_HASH_THREADS_PER_BLOCK 256 -//#define COMPACTIFY_HASH_SIMPLE - - -/*__global__ void fillDecisionArrayKernel(HashData hashData, DepthCameraData depthCameraData) -{ - const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; - - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { - hashData.d_hashDecision[idx] = 0; - if (hashData.d_hash[idx].ptr != FREE_ENTRY) { - if (hashData.isSDFBlockInCameraFrustumApprox(hashData.d_hash[idx].pos)) { - hashData.d_hashDecision[idx] = 1; //yes - } - } - } -}*/ - -/*extern "C" void fillDecisionArrayCUDA(HashData& hashData, const HashParams& hashParams, const DepthCameraData& depthCameraData) -{ - const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); - const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); - - fillDecisionArrayKernel<<<gridSize, blockSize>>>(hashData, depthCameraData); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif - -}*/ - -/*__global__ void compactifyHashKernel(HashData hashData) -{ - const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { - if (hashData.d_hashDecision[idx] == 1) { - hashData.d_hashCompactified[hashData.d_hashDecisionPrefix[idx]-1] = hashData.d_hash[idx]; - } - } -}*/ - -/*extern "C" void compactifyHashCUDA(HashData& hashData, const HashParams& hashParams) -{ - const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); - const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); - - compactifyHashKernel<<<gridSize, blockSize>>>(hashData); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -}*/ - -/*__global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams, DepthCameraParams camera) -{ - //const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; -#ifdef COMPACTIFY_HASH_SIMPLE - if (idx < hashParams.m_hashNumBuckets) { - if (hashData.d_hash[idx].ptr != FREE_ENTRY) { - if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, camera, hashData.d_hash[idx].pos)) - { - int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1); - hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; - } - } - } -#else - __shared__ int localCounter; - if (threadIdx.x == 0) localCounter = 0; - __syncthreads(); - - int addrLocal = -1; - if (idx < hashParams.m_hashNumBuckets) { - if (hashData.d_hash[idx].ptr != FREE_ENTRY) { - if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, camera, hashData.d_hash[idx].pos)) - { - addrLocal = atomicAdd(&localCounter, 1); - } - } - } - - __syncthreads(); - - __shared__ int addrGlobal; - if (threadIdx.x == 0 && localCounter > 0) { - addrGlobal = atomicAdd(hashData.d_hashCompactifiedCounter, localCounter); - } - __syncthreads(); - - if (addrLocal != -1) { - const unsigned int addr = addrGlobal + addrLocal; - hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; - } -#endif -} - -void ftl::cuda::compactifyVisible(HashData& hashData, const HashParams& hashParams, const DepthCameraParams &camera, cudaStream_t stream) { - const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; - const dim3 gridSize((hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); - const dim3 blockSize(threadsPerBlock, 1); - - cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int),stream)); - compactifyVisibleKernel << <gridSize, blockSize, 0, stream >> >(hashData, hashParams, camera); - //unsigned int res = 0; - //cudaSafeCall(cudaMemcpyAsync(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream)); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif - //return res; -}*/ - -__global__ void compactifyAllocatedKernel(HashData hashData) -{ - const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; -#ifdef COMPACTIFY_HASH_SIMPLE - if (idx < hashParams.m_hashNumBuckets) { - if (hashData.d_hash[idx].head.offset != FREE_ENTRY) { - int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1); - hashData.d_hashCompactified[addr] = &hashData.d_hash[idx]; - } - } -#else - __shared__ int localCounter; - if (threadIdx.x == 0) localCounter = 0; - __syncthreads(); - - int addrLocal = -1; - if (idx < hashParams.m_hashNumBuckets) { - if (hashData.d_hash[idx].head.offset != FREE_ENTRY) { - addrLocal = atomicAdd(&localCounter, 1); - } - } - - __syncthreads(); - - __shared__ int addrGlobal; - if (threadIdx.x == 0 && localCounter > 0) { - addrGlobal = atomicAdd(hashData.d_hashCompactifiedCounter, localCounter); - } - __syncthreads(); - - if (addrLocal != -1) { - const unsigned int addr = addrGlobal + addrLocal; - hashData.d_hashCompactified[addr] = &hashData.d_hash[idx]; - } -#endif -} - -void ftl::cuda::compactifyAllocated(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { - const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; - const dim3 gridSize((hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); - const dim3 blockSize(threadsPerBlock, 1); - - cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int), stream)); - compactifyAllocatedKernel << <gridSize, blockSize, 0, stream >> >(hashData); - //unsigned int res = 0; - //cudaSafeCall(cudaMemcpyAsync(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream)); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif - //return res; -} - - -__global__ void compactifyOccupiedKernel(HashData hashData) -{ - const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; -#ifdef COMPACTIFY_HASH_SIMPLE - if (idx < hashParams.m_hashNumBuckets) { - if (hashData.d_hash[idx].head.offset != FREE_ENTRY && hashData.d_hash[idx].head.flags & ftl::voxhash::kFlagSurface) { - int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1); - hashData.d_hashCompactified[addr] = &hashData.d_hash[idx]; - } - } -#else - __shared__ int localCounter; - if (threadIdx.x == 0) localCounter = 0; - __syncthreads(); - - int addrLocal = -1; - if (idx < hashParams.m_hashNumBuckets) { - if (hashData.d_hash[idx].head.offset != FREE_ENTRY && (hashData.d_hash[idx].head.flags & ftl::voxhash::kFlagSurface)) { // TODO:(Nick) Check voxels for all 0 or all 1 - addrLocal = atomicAdd(&localCounter, 1); - } - } - - __syncthreads(); - - __shared__ int addrGlobal; - if (threadIdx.x == 0 && localCounter > 0) { - addrGlobal = atomicAdd(hashData.d_hashCompactifiedCounter, localCounter); - } - __syncthreads(); - - if (addrLocal != -1) { - const unsigned int addr = addrGlobal + addrLocal; - hashData.d_hashCompactified[addr] = &hashData.d_hash[idx]; - } -#endif -} - -void ftl::cuda::compactifyOccupied(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { - const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; - const dim3 gridSize((hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); - const dim3 blockSize(threadsPerBlock, 1); - - cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int), stream)); - compactifyAllocatedKernel << <gridSize, blockSize, 0, stream >> >(hashData); - //unsigned int res = 0; - //cudaSafeCall(cudaMemcpyAsync(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream)); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif - //return res; -} diff --git a/applications/reconstruct/src/compactors.hpp b/applications/reconstruct/src/compactors.hpp deleted file mode 100644 index 6c61985eea8448a078b8abe3e821992519c9425f..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/compactors.hpp +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef _FTL_RECONSTRUCT_COMPACTORS_HPP_ -#define _FTL_RECONSTRUCT_COMPACTORS_HPP_ - -#include <ftl/voxel_hash.hpp> - -namespace ftl { -namespace cuda { - -// Compact visible -//void compactifyVisible(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraParams &camera, cudaStream_t); - -// Compact allocated -void compactifyAllocated(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t); - -// Compact visible surfaces -void compactifyOccupied(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t stream); - -} -} - -#endif // _FTL_RECONSTRUCT_COMPACTORS_HPP_ diff --git a/applications/reconstruct/src/garbage.cu b/applications/reconstruct/src/garbage.cu deleted file mode 100644 index b685e9e6b7d94434ff425eff268699a715261522..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/garbage.cu +++ /dev/null @@ -1,135 +0,0 @@ -#include <ftl/voxel_hash.hpp> -#include "garbage.hpp" - -using namespace ftl::voxhash; - -#define T_PER_BLOCK 8 -#define NUM_CUDA_BLOCKS 10000 - -/*__global__ void starveVoxelsKernel(HashData hashData) { - int ptr; - - // Stride over all allocated blocks - for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) { - - ptr = hashData.d_hashCompactified[bi].ptr; - int weight = hashData.d_SDFBlocks[ptr + threadIdx.x].weight; - weight = max(0, weight-2); - hashData.d_SDFBlocks[ptr + threadIdx.x].weight = weight; //CHECK Remove to totally clear previous frame (Nick) - - } -} - -void ftl::cuda::starveVoxels(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { - const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; - const dim3 gridSize(NUM_CUDA_BLOCKS, 1); - const dim3 blockSize(threadsPerBlock, 1); - - //if (hashParams.m_numOccupiedBlocks > 0) { - starveVoxelsKernel << <gridSize, blockSize, 0, stream >> >(hashData); - //} -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -}*/ - -#define ENTRIES_PER_BLOCK 4 - -__global__ void clearVoxelsKernel(HashData hashData) { - const int lane = threadIdx.x % 16; - const int halfWarp = threadIdx.x / 16; - - // Stride over all allocated blocks - for (int bi=blockIdx.x+halfWarp; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS*ENTRIES_PER_BLOCK) { - - HashEntry *entry = hashData.d_hashCompactified[bi]; - //hashData.d_SDFBlocks[entry.ptr + threadIdx.x].weight = 0; - entry->voxels[lane] = 0; - - } -} - -void ftl::cuda::clearVoxels(HashData& hashData, const HashParams& hashParams) { - const unsigned int threadsPerBlock = 16 * ENTRIES_PER_BLOCK; - const dim3 gridSize(NUM_CUDA_BLOCKS, 1); - const dim3 blockSize(threadsPerBlock, 1); - - clearVoxelsKernel << <gridSize, blockSize >> >(hashData); -} - - -__global__ void garbageCollectIdentifyKernel(HashData hashData) { - const int lane = threadIdx.x % 16; - const int halfWarp = threadIdx.x / 16; - - // Stride over all allocated blocks - for (int bi=blockIdx.x+halfWarp; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS * ENTRIES_PER_BLOCK) { - - const HashEntry *entry = hashData.d_hashCompactified[bi]; - - const uint v = entry->voxels[lane]; - const uint mask = (halfWarp & 0x1) ? 0xFFFF0000 : 0x0000FFFF; - uint ballot_result = __ballot_sync(mask, v == 0 || v == 0xFFFFFFFF); - - if (lane == 0) hashData.d_hashDecision[bi] = (ballot_result == mask) ? 1 : 0; - - } -} - -void ftl::cuda::garbageCollectIdentify(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { - - const unsigned int threadsPerBlock = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE / 2; - const dim3 gridSize(NUM_CUDA_BLOCKS, 1); - const dim3 blockSize(threadsPerBlock, 1); - - //if (hashParams.m_numOccupiedBlocks > 0) { - garbageCollectIdentifyKernel << <gridSize, blockSize, 0, stream >> >(hashData); - //} -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -} - - -__global__ void garbageCollectFreeKernel(HashData hashData) { - - // Stride over all allocated blocks - for (int bi=blockIdx.x*blockDim.x + threadIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS*blockDim.x) { - - HashEntry *entry = hashData.d_hashCompactified[bi]; - - if ((entry->head.flags & ftl::voxhash::kFlagSurface) == 0) { //decision to delete the hash entry - - - //if (entry->head.offset == FREE_ENTRY) return; //should never happen since we did compactify before - - int3 posI3 = make_int3(entry->head.posXYZ.x, entry->head.posXYZ.y, entry->head.posXYZ.z); - - if (hashData.deleteHashEntryElement(posI3)) { //delete hash entry from hash (and performs heap append) - //#pragma unroll - //for (uint i = 0; i < 16; i++) { //clear sdf block: CHECK TODO another kernel? - // entry->voxels[i] = 0; - //} - } - } - - } -} - - -void ftl::cuda::garbageCollectFree(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { - - const unsigned int threadsPerBlock = T_PER_BLOCK*T_PER_BLOCK; - const dim3 gridSize(NUM_CUDA_BLOCKS, 1); // (hashParams.m_numOccupiedBlocks + threadsPerBlock - 1) / threadsPerBlock - const dim3 blockSize(threadsPerBlock, 1); - - //if (hashParams.m_numOccupiedBlocks > 0) { - garbageCollectFreeKernel << <gridSize, blockSize, 0, stream >> >(hashData); - //} -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -} diff --git a/applications/reconstruct/src/garbage.hpp b/applications/reconstruct/src/garbage.hpp deleted file mode 100644 index 5d1d7574d252b40da18008da39f1bf89a7d667fb..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/garbage.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _FTL_RECONSTRUCTION_GARBAGE_HPP_ -#define _FTL_RECONSTRUCTION_GARBAGE_HPP_ - -namespace ftl { -namespace cuda { - -void clearVoxels(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -void starveVoxels(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t stream); -void garbageCollectIdentify(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t stream); -void garbageCollectFree(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t stream); - -} -} - -#endif // _FTL_RECONSTRUCTION_GARBAGE_HPP_ diff --git a/applications/reconstruct/src/integrators.cu b/applications/reconstruct/src/integrators.cu deleted file mode 100644 index d23fada9982aed2ff49039aa91bfa8760f91fcd1..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/integrators.cu +++ /dev/null @@ -1,342 +0,0 @@ -#include "integrators.hpp" -//#include <ftl/ray_cast_params.hpp> -#include <vector_types.h> -#include <cuda_runtime.h> -#include <ftl/cuda_matrix_util.hpp> -#include <ftl/cuda_util.hpp> -#include <ftl/cuda_common.hpp> - -#define T_PER_BLOCK 8 -#define NUM_CUDA_BLOCKS 10000 -#define WARP_SIZE 32 - -using ftl::voxhash::HashData; -using ftl::voxhash::HashParams; -using ftl::voxhash::Voxel; -using ftl::voxhash::HashEntry; -using ftl::voxhash::HashEntryHead; -using ftl::voxhash::FREE_ENTRY; - -extern __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS]; -extern __constant__ HashParams c_hashParams; - -__device__ float4 make_float4(uchar4 c) { - return make_float4(static_cast<float>(c.x), static_cast<float>(c.y), static_cast<float>(c.z), static_cast<float>(c.w)); -} - -__device__ float colourDistance(const uchar4 &c1, const uchar3 &c2) { - float x = c1.x-c2.x; - float y = c1.y-c2.y; - float z = c1.z-c2.z; - return x*x + y*y + z*z; -} - -/* - * Kim, K., Chalidabhongse, T. H., Harwood, D., & Davis, L. (2005). - * Real-time foreground-background segmentation using codebook model. - * Real-Time Imaging. https://doi.org/10.1016/j.rti.2004.12.004 - */ -__device__ bool colordiff(const uchar4 &pa, const uchar3 &pb, float epsilon) { - float x_2 = pb.x * pb.x + pb.y * pb.y + pb.z * pb.z; - float v_2 = pa.x * pa.x + pa.y * pa.y + pa.z * pa.z; - float xv_2 = powf(float(pb.x * pa.x + pb.y * pa.y + pb.z * pa.z), 2.0f); - float p_2 = xv_2 / v_2; - return sqrt(x_2 - p_2) < epsilon; -} - -/* - * Guennebaud, G.; Gross, M. Algebraic point set surfaces. ACMTransactions on Graphics Vol. 26, No. 3, Article No. 23, 2007. - * Used in: FusionMLS: Highly dynamic 3D reconstruction with consumer-grade RGB-D cameras - * r = distance between points - * h = smoothing parameter in meters (default 4cm) - */ -__device__ float spatialWeighting(float r) { - const float h = c_hashParams.m_spatialSmoothing; - if (r >= h) return 0.0f; - float rh = r / h; - rh = 1.0f - rh*rh; - return rh*rh*rh*rh; -} - -__device__ float spatialWeighting(float r, float h) { - //const float h = c_hashParams.m_spatialSmoothing; - if (r >= h) return 0.0f; - float rh = r / h; - rh = 1.0f - rh*rh; - return rh*rh*rh*rh; -} - - -__global__ void integrateDepthMapsKernel(HashData hashData, HashParams hashParams, int numcams) { - __shared__ uint all_warp_ballot; - __shared__ uint voxels[16]; - - const uint i = threadIdx.x; //inside of an SDF block - const int3 po = make_int3(hashData.delinearizeVoxelIndex(i)); - - // Stride over all allocated blocks - for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) { - - //TODO check if we should load this in shared memory - //HashEntryHead entry = hashData.d_hashCompactified[bi]->head; - - int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(make_int3(hashData.d_hashCompactified[bi]->head.posXYZ)); - - //uint idx = entry.offset + i; - int3 pi = pi_base + po; - float3 pfb = hashData.virtualVoxelPosToWorld(pi); - int count = 0; - //float camdepths[MAX_CAMERAS]; - - Voxel oldVoxel; // = hashData.d_SDFBlocks[idx]; - hashData.deleteVoxel(oldVoxel); - - for (uint cam=0; cam<numcams; ++cam) { - const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - - float3 pf = camera.poseInverse * pfb; - uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf)); - - // For this voxel in hash, get its screen position and check it is on screen - if (screenPos.x < camera.params.m_imageWidth && screenPos.y < camera.params.m_imageHeight) { //on screen - - //float depth = g_InputDepth[screenPos]; - float depth = tex2D<float>(camera.depth, screenPos.x, screenPos.y); - //if (depth > 20.0f) return; - - //uchar4 color = make_uchar4(0, 0, 0, 0); - //if (cameraData.d_colorData) { - //color = (cam == 0) ? make_uchar4(255,0,0,255) : make_uchar4(0,0,255,255); - //color = tex2D<uchar4>(camera.colour, screenPos.x, screenPos.y); - //color = bilinearFilterColor(cameraData.cameraToKinectScreenFloat(pf)); - //} - - //printf("screen pos %d\n", color.x); - //return; - - // TODO:(Nick) Accumulate weighted positions - // TODO:(Nick) Accumulate weighted normals - // TODO:(Nick) Accumulate weights - - // Depth is within accepted max distance from camera - if (depth > 0.01f && depth < hashParams.m_maxIntegrationDistance) { // valid depth and color (Nick: removed colour check) - //camdepths[count] = depth; - ++count; - - // Calculate SDF of this voxel wrt the depth map value - float sdf = depth - pf.z; - float truncation = hashData.getTruncation(depth); - float depthZeroOne = camera.params.cameraToKinectProjZ(depth); - - // Is this voxel close enough to cam for depth map value - // CHECK Nick: If is too close then free space violation so remove? - if (sdf > -truncation) // && depthZeroOne >= 0.0f && depthZeroOne <= 1.0f) //check if in truncation range should already be made in depth map computation - { - float weightUpdate = max(hashParams.m_integrationWeightSample * 1.5f * (1.0f-depthZeroOne), 1.0f); - - Voxel curr; //construct current voxel - curr.sdf = sdf; - curr.weight = weightUpdate; - //curr.color = make_uchar3(color.x, color.y, color.z); - - - //if (entry.flags != cameraParams.flags & 0xFF) { - // entry.flags = cameraParams.flags & 0xFF; - //hashData.d_SDFBlocks[idx].color = make_uchar3(0,0,0); - //} - - Voxel newVoxel; - //if (color.x == MINF) hashData.combineVoxelDepthOnly(hashData.d_SDFBlocks[idx], curr, newVoxel); - //else hashData.combineVoxel(hashData.d_SDFBlocks[idx], curr, newVoxel); - hashData.combineVoxel(oldVoxel, curr, newVoxel); - - oldVoxel = newVoxel; - - //Voxel prev = getVoxel(g_SDFBlocksSDFUAV, g_SDFBlocksRGBWUAV, idx); - //Voxel newVoxel = combineVoxel(curr, prev); - //setVoxel(g_SDFBlocksSDFUAV, g_SDFBlocksRGBWUAV, idx, newVoxel); - } - } else { - // Depth is invalid so what to do here? - // TODO(Nick) Use past voxel if available (set weight from 0 to 1) - - // Naive: need to know if this is a foreground voxel - //bool coldist = colordiff(color, hashData.d_SDFBlocks[idx].color, 5.0f); - //if (!coldist) ++count; - - } - } - } - - // Calculate voxel sign values across a warp - int warpNum = i / WARP_SIZE; - //uint ballot_result = __ballot_sync(0xFFFFFFFF, (oldVoxel.sdf >= 0.0f) ? 0 : 1); - uint ballot_result = __ballot_sync(0xFFFFFFFF, (fabs(oldVoxel.sdf) <= hashParams.m_virtualVoxelSize && oldVoxel.weight > 0) ? 1 : 0); - - // Aggregate each warp result into voxel mask - if (i % WARP_SIZE == 0) { - voxels[warpNum] = ballot_result; - } - - __syncthreads(); - - // Work out if block is occupied or not and save voxel masks - // TODO:(Nick) Is it faster to do this in a separate garbage kernel? - if (i < 16) { - const uint v = voxels[i]; - hashData.d_hashCompactified[bi]->voxels[i] = v; - const uint mask = 0x0000FFFF; - uint b1 = __ballot_sync(mask, v == 0xFFFFFFFF); - uint b2 = __ballot_sync(mask, v == 0); - if (i == 0) { - if (b1 != mask && b2 != mask) hashData.d_hashCompactified[bi]->head.flags |= ftl::voxhash::kFlagSurface; - else hashData.d_hashCompactified[bi]->head.flags &= ~ftl::voxhash::kFlagSurface; - } - } - - } -} - -#define WINDOW_RADIUS 1 -#define PATCH_SIZE 32 - -__global__ void integrateMLSKernel(HashData hashData, HashParams hashParams, int numcams) { - __shared__ uint voxels[16]; - - const uint i = threadIdx.x; //inside of an SDF block - const int3 po = make_int3(hashData.delinearizeVoxelIndex(i)); - const int warpNum = i / WARP_SIZE; - const int lane = i % WARP_SIZE; - - // Stride over all allocated blocks - for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) { - - //TODO check if we should load this in shared memory - //HashEntryHead entry = hashData.d_hashCompactified[bi]->head; - - const int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(make_int3(hashData.d_hashCompactified[bi]->head.posXYZ)); - - //uint idx = entry.offset + i; - const int3 pi = pi_base + po; - const float3 pfb = hashData.virtualVoxelPosToWorld(pi); - //int count = 0; - //float camdepths[MAX_CAMERAS]; - - //Voxel oldVoxel; // = hashData.d_SDFBlocks[idx]; - //hashData.deleteVoxel(oldVoxel); - - //float3 awpos = make_float3(0.0f); - //float3 awnorm = make_float3(0.0f); - //float aweights = 0.0f; - float sdf = 0.0f; - float weights = 0.0f; - - // Preload depth values - // 1. Find min and max screen positions - // 2. Subtract/Add WINDOW_RADIUS to min/max - // ... check that the buffer is not too small to cover this - // ... if buffer not big enough then don't buffer at all. - // 3. Populate shared mem depth map buffer using all threads - // 4. Adjust window lookups to use shared mem buffer - - //uint cam=0; - for (uint cam=0; cam<numcams; ++cam) { - const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - const uint height = camera.params.m_imageHeight; - const uint width = camera.params.m_imageWidth; - - const float3 pf = camera.poseInverse * pfb; - const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf)); - - //float3 wpos = make_float3(0.0f); - float3 wnorm = make_float3(0.0f); - - - #pragma unroll - for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) { - for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) { - if (screenPos.x+u < width && screenPos.y+v < height) { //on screen - float4 depth = tex2D<float4>(camera.points, screenPos.x+u, screenPos.y+v); - if (depth.z == MINF) continue; - - //float4 normal = tex2D<float4>(camera.normal, screenPos.x+u, screenPos.y+v); - const float3 camPos = camera.poseInverse * make_float3(depth); //camera.pose * camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth); - const float weight = spatialWeighting(length(pf - camPos)); - - //wpos += weight*worldPos; - sdf += weight*(camPos.z - pf.z); - //sdf += camPos.z - pf.z; - //wnorm += weight*make_float3(normal); - //weights += 1.0f; - weights += weight; - } - } - } - - //awpos += wpos; - //aweights += weights; - } - - //awpos /= aweights; - //wnorm /= weights; - - sdf /= weights; - - //float sdf = (aweights == 0.0f) ? MINF : length(pfb - awpos); - //float sdf = wnorm.x * (pfb.x - wpos.x) + wnorm.y * (pfb.y - wpos.y) + wnorm.z * (pfb.z - wpos.z); - - //printf("WEIGHTS: %f\n", weights); - - //if (weights < 0.00001f) sdf = 0.0f; - - // Calculate voxel sign values across a warp - int warpNum = i / WARP_SIZE; - - //uint solid_ballot = __ballot_sync(0xFFFFFFFF, (fabs(sdf) < hashParams.m_virtualVoxelSize && aweights >= 0.5f) ? 1 : 0); - //uint solid_ballot = __ballot_sync(0xFFFFFFFF, (fabs(sdf) <= hashParams.m_virtualVoxelSize) ? 1 : 0); - //uint solid_ballot = __ballot_sync(0xFFFFFFFF, (aweights >= 0.0f) ? 1 : 0); - uint solid_ballot = __ballot_sync(0xFFFFFFFF, (sdf < 0.0f ) ? 1 : 0); - - // Aggregate each warp result into voxel mask - if (i % WARP_SIZE == 0) { - voxels[warpNum] = solid_ballot; - //valid[warpNum] = valid_ballot; - } - - __syncthreads(); - - // Work out if block is occupied or not and save voxel masks - // TODO:(Nick) Is it faster to do this in a separate garbage kernel? - if (i < 16) { - const uint v = voxels[i]; - hashData.d_hashCompactified[bi]->voxels[i] = v; - //hashData.d_hashCompactified[bi]->validity[i] = valid[i]; - const uint mask = 0x0000FFFF; - uint b1 = __ballot_sync(mask, v == 0xFFFFFFFF); - uint b2 = __ballot_sync(mask, v == 0); - if (i == 0) { - if (b1 != mask && b2 != mask) hashData.d_hashCompactified[bi]->head.flags |= ftl::voxhash::kFlagSurface; - else hashData.d_hashCompactified[bi]->head.flags &= ~ftl::voxhash::kFlagSurface; - } - } - - } -} - - - -void ftl::cuda::integrateDepthMaps(HashData& hashData, const HashParams& hashParams, int numcams, cudaStream_t stream) { -const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; -const dim3 gridSize(NUM_CUDA_BLOCKS, 1); -const dim3 blockSize(threadsPerBlock, 1); - -//if (hashParams.m_numOccupiedBlocks > 0) { //this guard is important if there is no depth in the current frame (i.e., no blocks were allocated) - integrateMLSKernel << <gridSize, blockSize, 0, stream >> >(hashData, hashParams, numcams); -//} - -//cudaSafeCall( cudaGetLastError() ); -#ifdef _DEBUG -cudaSafeCall(cudaDeviceSynchronize()); -//cutilCheckMsg(__FUNCTION__); -#endif -} diff --git a/applications/reconstruct/src/integrators.hpp b/applications/reconstruct/src/integrators.hpp deleted file mode 100644 index 789551dd1fa7347bf02c518c8c5a73f6ae4269b4..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/integrators.hpp +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _FTL_RECONSTRUCTION_INTEGRATORS_HPP_ -#define _FTL_RECONSTRUCTION_INTEGRATORS_HPP_ - -#include <ftl/voxel_hash.hpp> -#include <ftl/depth_camera.hpp> - -namespace ftl { -namespace cuda { - -/*void integrateDepthMap(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, - const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t stream); - -void integrateRegistration(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, - const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t stream); -*/ - -void integrateDepthMaps(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, int numcams, cudaStream_t stream); - -} -} - -#endif // _FTL_RECONSTRUCTION_INTEGRATORS_HPP_ diff --git a/applications/reconstruct/src/voxel_hash.cpp b/applications/reconstruct/src/voxel_hash.cpp deleted file mode 100644 index 6f929c746d66cae5c382bf15b2d25e26f6b46702..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/voxel_hash.cpp +++ /dev/null @@ -1,95 +0,0 @@ -#include <ftl/voxel_hash.hpp> -#include <loguru.hpp> - -using ftl::voxhash::HashData; -using ftl::voxhash::HashParams; - -void HashData::allocate(const HashParams& params, bool dataOnGPU) { - m_bIsOnGPU = dataOnGPU; - if (m_bIsOnGPU) { - cudaSafeCall(cudaMalloc(&d_hash, sizeof(HashEntry)* params.m_hashNumBuckets)); - cudaSafeCall(cudaMalloc(&d_hashDecision, sizeof(int)* params.m_hashNumBuckets)); - cudaSafeCall(cudaMalloc(&d_hashDecisionPrefix, sizeof(int)* params.m_hashNumBuckets)); - cudaSafeCall(cudaMalloc(&d_hashCompactified, sizeof(HashEntry*)* params.m_hashNumBuckets)); - cudaSafeCall(cudaMalloc(&d_hashCompactifiedCounter, sizeof(int))); - cudaSafeCall(cudaMalloc(&d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets)); - } else { - d_hash = new HashEntry[params.m_hashNumBuckets]; - d_hashDecision = new int[params.m_hashNumBuckets]; - d_hashDecisionPrefix = new int[params.m_hashNumBuckets]; - d_hashCompactified = new HashEntry*[params.m_hashNumBuckets]; - d_hashCompactifiedCounter = new int[1]; - d_hashBucketMutex = new int[params.m_hashNumBuckets]; - } - - updateParams(params); -} - -void HashData::updateParams(const HashParams& params) { - if (m_bIsOnGPU) { - updateConstantHashParams(params); - } -} - -void HashData::free() { - if (m_bIsOnGPU) { - cudaSafeCall(cudaFree(d_hash)); - cudaSafeCall(cudaFree(d_hashDecision)); - cudaSafeCall(cudaFree(d_hashDecisionPrefix)); - cudaSafeCall(cudaFree(d_hashCompactified)); - cudaSafeCall(cudaFree(d_hashCompactifiedCounter)); - cudaSafeCall(cudaFree(d_hashBucketMutex)); - } else { - if (d_hash) delete[] d_hash; - if (d_hashDecision) delete[] d_hashDecision; - if (d_hashDecisionPrefix) delete[] d_hashDecisionPrefix; - if (d_hashCompactified) delete[] d_hashCompactified; - if (d_hashCompactifiedCounter) delete[] d_hashCompactifiedCounter; - if (d_hashBucketMutex) delete[] d_hashBucketMutex; - } - - d_hash = NULL; - d_hashDecision = NULL; - d_hashDecisionPrefix = NULL; - d_hashCompactified = NULL; - d_hashCompactifiedCounter = NULL; - d_hashBucketMutex = NULL; -} - -HashData HashData::download() const { - if (!m_bIsOnGPU) return *this; - HashParams params; - - HashData hashData; - hashData.allocate(params, false); //allocate the data on the CPU - cudaSafeCall(cudaMemcpy(hashData.d_hash, d_hash, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_hashDecision, d_hashDecision, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_hashDecisionPrefix, d_hashDecisionPrefix, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_hashCompactified, d_hashCompactified, sizeof(HashEntry*)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_hashCompactifiedCounter, d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_hashBucketMutex, d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost)); - - return hashData; -} - -HashData HashData::upload() const { - if (m_bIsOnGPU) return *this; - HashParams params; - - HashData hashData; - hashData.allocate(params, false); //allocate the data on the CPU - cudaSafeCall(cudaMemcpy(hashData.d_hash, d_hash, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyHostToDevice)); - cudaSafeCall(cudaMemcpy(hashData.d_hashDecision, d_hashDecision, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyHostToDevice)); - cudaSafeCall(cudaMemcpy(hashData.d_hashDecisionPrefix, d_hashDecisionPrefix, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyHostToDevice)); - cudaSafeCall(cudaMemcpy(hashData.d_hashCompactified, d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyHostToDevice)); - cudaSafeCall(cudaMemcpy(hashData.d_hashCompactifiedCounter, d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyHostToDevice)); - cudaSafeCall(cudaMemcpy(hashData.d_hashBucketMutex, d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets, cudaMemcpyHostToDevice)); - - return hashData; -} - -/*size_t HashData::getAllocatedBlocks() const { - unsigned int count; - cudaSafeCall(cudaMemcpy(d_heapCounter, &count, sizeof(unsigned int), cudaMemcpyDeviceToHost)); - return count; -}*/ diff --git a/applications/reconstruct/src/voxel_hash.cu b/applications/reconstruct/src/voxel_hash.cu deleted file mode 100644 index c2d07c391a6e48d2b45cc23dbf32b00878ffd5c9..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/voxel_hash.cu +++ /dev/null @@ -1,257 +0,0 @@ -#include <ftl/voxel_hash.hpp> - -using namespace ftl::voxhash; - -#define COLLISION_LIST_SIZE 6 - -__device__ inline uint64_t compactPosition(const int3 &pos) { - union __align__(8) { - short4 posXYZ; - uint64_t pos64; - }; - posXYZ.x = pos.x; posXYZ.y = pos.y; posXYZ.z = pos.z; posXYZ.w = 0; - return pos64; -} - -//! returns the hash entry for a given sdf block id; if there was no hash entry the returned entry will have a ptr with FREE_ENTRY set -__device__ -int HashData::getHashEntryForSDFBlockPos(const int3& sdfBlock) const { - uint h = computeHashPos(sdfBlock); //hash - uint64_t pos = compactPosition(sdfBlock); - - HashEntryHead curr; - - int i = h; - unsigned int maxIter = 0; - - #pragma unroll 2 - while (maxIter < COLLISION_LIST_SIZE) { - curr = d_hash[i].head; - - if (curr.pos == pos && curr.offset != FREE_ENTRY) return i; - if (curr.offset == 0 || curr.offset == FREE_ENTRY) break; - - i += curr.offset; //go to next element in the list - i %= (params().m_hashNumBuckets); //check for overflow - ++maxIter; - } - - // Could not find - return -1; -} - -//for histogram (collisions traversal only) -__device__ -unsigned int HashData::getNumHashLinkedList(unsigned int bucketID) { - unsigned int listLen = 0; - - unsigned int i = bucketID; //start with the last entry of the current bucket - HashEntryHead curr; curr.offset = 0; - - unsigned int maxIter = 0; - - #pragma unroll 2 - while (maxIter < COLLISION_LIST_SIZE) { - curr = d_hash[i].head; - - if (curr.offset == 0 || curr.offset == FREE_ENTRY) break; - - i += curr.offset; //go to next element in the list - i %= (params().m_hashNumBuckets); //check for overflow - ++listLen; - ++maxIter; - } - - return listLen; -} - -//pos in SDF block coordinates -__device__ -void HashData::allocBlock(const int3& pos) { - uint h = computeHashPos(pos); //hash bucket - uint i = h; - HashEntryHead curr; //curr.offset = 0; - const uint64_t pos64 = compactPosition(pos); - - unsigned int maxIter = 0; - #pragma unroll 2 - while (maxIter < COLLISION_LIST_SIZE) { - //offset = curr.offset; - curr = d_hash[i].head; //TODO MATTHIAS do by reference - if (curr.pos == pos64 && curr.offset != FREE_ENTRY) return; - if (curr.offset == 0 || curr.offset == FREE_ENTRY) break; - - i += curr.offset; //go to next element in the list - i %= (params().m_hashNumBuckets); //check for overflow - ++maxIter; - } - - // Limit reached... - //if (maxIter == COLLISION_LIST_SIZE) return; - - int j = i; - while (maxIter < COLLISION_LIST_SIZE) { - //offset = curr.offset; - - if (curr.offset == FREE_ENTRY) { - int prevValue = atomicExch(&d_hashBucketMutex[i], LOCK_ENTRY); - if (prevValue != LOCK_ENTRY) { - if (i == j) { - HashEntryHead& entry = d_hash[j].head; - entry.pos = pos64; - entry.offset = 0; - entry.flags = 0; - } else { - //InterlockedExchange(g_HashBucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket where we have found a free entry - prevValue = atomicExch(&d_hashBucketMutex[j], LOCK_ENTRY); - if (prevValue != LOCK_ENTRY) { //only proceed if the bucket has been locked - HashEntryHead& entry = d_hash[j].head; - entry.pos = pos64; - entry.offset = 0; - entry.flags = 0; // Flag block as valid in this frame (Nick) - //entry.ptr = consumeHeap() * SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; //memory alloc - d_hash[i].head.offset = j-i; - //setHashEntry(g_Hash, idxLastEntryInBucket, lastEntryInBucket); - } - } - } - return; //bucket was already locked - } - - ++j; - j %= (params().m_hashNumBuckets); //check for overflow - curr = d_hash[j].head; //TODO MATTHIAS do by reference - ++maxIter; - } -} - - -//!inserts a hash entry without allocating any memory: used by streaming: TODO MATTHIAS check the atomics in this function -/*__device__ -bool HashData::insertHashEntry(HashEntry entry) -{ - uint h = computeHashPos(entry.pos); - uint hp = h * HASH_BUCKET_SIZE; - - for (uint j = 0; j < HASH_BUCKET_SIZE; j++) { - uint i = j + hp; - //const HashEntry& curr = d_hash[i]; - int prevWeight = 0; - //InterlockedCompareExchange(hash[3*i+2], FREE_ENTRY, LOCK_ENTRY, prevWeight); - prevWeight = atomicCAS(&d_hash[i].ptr, FREE_ENTRY, LOCK_ENTRY); - if (prevWeight == FREE_ENTRY) { - d_hash[i] = entry; - //setHashEntry(hash, i, entry); - return true; - } - } - -#ifdef HANDLE_COLLISIONS - //updated variables as after the loop - const uint idxLastEntryInBucket = (h+1)*HASH_BUCKET_SIZE - 1; //get last index of bucket - - uint i = idxLastEntryInBucket; //start with the last entry of the current bucket - HashEntry curr; - - unsigned int maxIter = 0; - //[allow_uav_condition] - uint g_MaxLoopIterCount = params().m_hashMaxCollisionLinkedListSize; - #pragma unroll 1 - while (maxIter < g_MaxLoopIterCount) { //traverse list until end // why find the end? we you are inserting at the start !!! - //curr = getHashEntry(hash, i); - curr = d_hash[i]; //TODO MATTHIAS do by reference - if (curr.offset == 0) break; //we have found the end of the list - i = idxLastEntryInBucket + curr.offset; //go to next element in the list - i %= (HASH_BUCKET_SIZE * params().m_hashNumBuckets); //check for overflow - - maxIter++; - } - - maxIter = 0; - int offset = 0; - #pragma unroll 1 - while (maxIter < g_MaxLoopIterCount) { //linear search for free entry - offset++; - uint i = (idxLastEntryInBucket + offset) % (HASH_BUCKET_SIZE * params().m_hashNumBuckets); //go to next hash element - if ((offset % HASH_BUCKET_SIZE) == 0) continue; //cannot insert into a last bucket element (would conflict with other linked lists) - - int prevWeight = 0; - //InterlockedCompareExchange(hash[3*i+2], FREE_ENTRY, LOCK_ENTRY, prevWeight); //check for a free entry - uint* d_hashUI = (uint*)d_hash; - prevWeight = prevWeight = atomicCAS(&d_hashUI[3*idxLastEntryInBucket+1], (uint)FREE_ENTRY, (uint)LOCK_ENTRY); - if (prevWeight == FREE_ENTRY) { //if free entry found set prev->next = curr & curr->next = prev->next - //[allow_uav_condition] - //while(hash[3*idxLastEntryInBucket+2] == LOCK_ENTRY); // expects setHashEntry to set the ptr last, required because pos.z is packed into the same value -> prev->next = curr -> might corrput pos.z - - HashEntry lastEntryInBucket = d_hash[idxLastEntryInBucket]; //get prev (= lastEntry in Bucket) - - int newOffsetPrev = (offset << 16) | (lastEntryInBucket.pos.z & 0x0000ffff); //prev->next = curr (maintain old z-pos) - int oldOffsetPrev = 0; - //InterlockedExchange(hash[3*idxLastEntryInBucket+1], newOffsetPrev, oldOffsetPrev); //set prev offset atomically - uint* d_hashUI = (uint*)d_hash; - oldOffsetPrev = prevWeight = atomicExch(&d_hashUI[3*idxLastEntryInBucket+1], newOffsetPrev); - entry.offset = oldOffsetPrev >> 16; //remove prev z-pos from old offset - - //setHashEntry(hash, i, entry); //sets the current hashEntry with: curr->next = prev->next - d_hash[i] = entry; - return true; - } - - maxIter++; - } -#endif - - return false; -}*/ - - - -//! deletes a hash entry position for a given sdfBlock index (returns true uppon successful deletion; otherwise returns false) -__device__ -bool HashData::deleteHashEntryElement(const int3& sdfBlock) { - uint h = computeHashPos(sdfBlock); //hash bucket - const uint64_t pos = compactPosition(sdfBlock); - - int i = h; - int prev = -1; - HashEntryHead curr; - unsigned int maxIter = 0; - - #pragma unroll 2 - while (maxIter < COLLISION_LIST_SIZE) { - curr = d_hash[i].head; - - //found that dude that we need/want to delete - if (curr.pos == pos && curr.offset != FREE_ENTRY) { - //int prevValue = 0; - //InterlockedExchange(bucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket - int prevValue = atomicExch(&d_hashBucketMutex[i], LOCK_ENTRY); - if (prevValue == LOCK_ENTRY) return false; - if (prevValue != LOCK_ENTRY) { - prevValue = (prev >= 0) ? atomicExch(&d_hashBucketMutex[prev], LOCK_ENTRY) : 0; - if (prevValue == LOCK_ENTRY) return false; - if (prevValue != LOCK_ENTRY) { - //const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE; - //appendHeap(curr.ptr / linBlockSize); - deleteHashEntry(i); - - if (prev >= 0) { - d_hash[prev].head.offset = curr.offset; - } - return true; - } - } - } - - if (curr.offset == 0 || curr.offset == FREE_ENTRY) { //we have found the end of the list - return false; //should actually never happen because we need to find that guy before - } - prev = i; - i += curr.offset; //go to next element in the list - i %= (params().m_hashNumBuckets); //check for overflow - - ++maxIter; - } - - return false; -} \ No newline at end of file