diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt index 6518c7bb51b8bb3491c9577c146cd6772c04d217..0b4b0384b668c254d17c3513f2262c8036b4af49 100644 --- a/applications/reconstruct/CMakeLists.txt +++ b/applications/reconstruct/CMakeLists.txt @@ -6,6 +6,7 @@ set(REPSRC src/main.cpp src/voxel_scene.cpp src/scene_rep_hash_sdf.cu + src/compactors.cu src/ray_cast_sdf.cu src/camera_util.cu src/ray_cast_sdf.cpp diff --git a/applications/reconstruct/include/ftl/voxel_scene.hpp b/applications/reconstruct/include/ftl/voxel_scene.hpp index 6b7a6080e1662af27705e3c422de0a16708910f0..913c38e62193d5af2e15bfec5a5c461df29aedfc 100644 --- a/applications/reconstruct/include/ftl/voxel_scene.hpp +++ b/applications/reconstruct/include/ftl/voxel_scene.hpp @@ -49,11 +49,11 @@ class SceneRep : public ftl::Configurable { * Note: lastRigidTransform appears to be the estimated camera pose. * Note: bitMask can be nullptr if not streaming out voxels from GPU */ - void integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask); + //void integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask); void setLastRigidTransform(const Eigen::Matrix4f& lastRigidTransform); - void setLastRigidTransformAndCompactify(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData); + //void setLastRigidTransformAndCompactify(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData); const Eigen::Matrix4f getLastRigidTransform() const; @@ -81,9 +81,10 @@ class SceneRep : public ftl::Configurable { void _create(const HashParams& params); void _destroy(); void _alloc(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, const unsigned int* d_bitMask); - void _compactifyHashEntries(); + void _compactifyVisible(); + void _compactifyAllocated(); void _integrateDepthMap(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams); - void _garbageCollect(const DepthCameraData& depthCameraData); + void _garbageCollect(); diff --git a/applications/reconstruct/src/compactors.cu b/applications/reconstruct/src/compactors.cu new file mode 100644 index 0000000000000000000000000000000000000000..3f350ca6be27e080e2ea99f3a6a1225a5ed84521 --- /dev/null +++ b/applications/reconstruct/src/compactors.cu @@ -0,0 +1,180 @@ +#include "compactors.hpp" + +using ftl::voxhash::HashData; +using ftl::voxhash::HashParams; +using ftl::voxhash::Voxel; +using ftl::voxhash::HashEntry; +using ftl::voxhash::FREE_ENTRY; + +#define COMPACTIFY_HASH_THREADS_PER_BLOCK 256 +//#define COMPACTIFY_HASH_SIMPLE + + +/*__global__ void fillDecisionArrayKernel(HashData hashData, DepthCameraData depthCameraData) +{ + const HashParams& hashParams = c_hashParams; + const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; + + if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + hashData.d_hashDecision[idx] = 0; + if (hashData.d_hash[idx].ptr != FREE_ENTRY) { + if (hashData.isSDFBlockInCameraFrustumApprox(hashData.d_hash[idx].pos)) { + hashData.d_hashDecision[idx] = 1; //yes + } + } + } +}*/ + +/*extern "C" void fillDecisionArrayCUDA(HashData& hashData, const HashParams& hashParams, const DepthCameraData& depthCameraData) +{ + const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); + const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); + + fillDecisionArrayKernel<<<gridSize, blockSize>>>(hashData, depthCameraData); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + //cutilCheckMsg(__FUNCTION__); +#endif + +}*/ + +/*__global__ void compactifyHashKernel(HashData hashData) +{ + const HashParams& hashParams = c_hashParams; + const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + if (hashData.d_hashDecision[idx] == 1) { + hashData.d_hashCompactified[hashData.d_hashDecisionPrefix[idx]-1] = hashData.d_hash[idx]; + } + } +}*/ + +/*extern "C" void compactifyHashCUDA(HashData& hashData, const HashParams& hashParams) +{ + const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); + const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); + + compactifyHashKernel<<<gridSize, blockSize>>>(hashData); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + //cutilCheckMsg(__FUNCTION__); +#endif +}*/ + +__global__ void compactifyVisibleKernel(HashData hashData) +{ + const HashParams& hashParams = c_hashParams; + const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; +#ifdef COMPACTIFY_HASH_SIMPLE + if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + if (hashData.d_hash[idx].ptr != FREE_ENTRY) { + if (hashData.isSDFBlockInCameraFrustumApprox(hashData.d_hash[idx].pos)) + { + int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1); + hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; + } + } + } +#else + __shared__ int localCounter; + if (threadIdx.x == 0) localCounter = 0; + __syncthreads(); + + int addrLocal = -1; + if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + if (hashData.d_hash[idx].ptr != FREE_ENTRY) { + if (hashData.isSDFBlockInCameraFrustumApprox(hashData.d_hash[idx].pos)) + { + addrLocal = atomicAdd(&localCounter, 1); + } + } + } + + __syncthreads(); + + __shared__ int addrGlobal; + if (threadIdx.x == 0 && localCounter > 0) { + addrGlobal = atomicAdd(hashData.d_hashCompactifiedCounter, localCounter); + } + __syncthreads(); + + if (addrLocal != -1) { + const unsigned int addr = addrGlobal + addrLocal; + hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; + } +#endif +} + +unsigned int ftl::cuda::compactifyVisible(HashData& hashData, const HashParams& hashParams) { + const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; + const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); + const dim3 blockSize(threadsPerBlock, 1); + + cudaSafeCall(cudaMemset(hashData.d_hashCompactifiedCounter, 0, sizeof(int))); + compactifyVisibleKernel << <gridSize, blockSize >> >(hashData); + unsigned int res = 0; + cudaSafeCall(cudaMemcpy(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + //cutilCheckMsg(__FUNCTION__); +#endif + return res; +} + +__global__ void compactifyAllocatedKernel(HashData hashData) +{ + const HashParams& hashParams = c_hashParams; + const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; +#ifdef COMPACTIFY_HASH_SIMPLE + if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + if (hashData.d_hash[idx].ptr != FREE_ENTRY) { + int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1); + hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; + } + } +#else + __shared__ int localCounter; + if (threadIdx.x == 0) localCounter = 0; + __syncthreads(); + + int addrLocal = -1; + if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + if (hashData.d_hash[idx].ptr != FREE_ENTRY) { + addrLocal = atomicAdd(&localCounter, 1); + } + } + + __syncthreads(); + + __shared__ int addrGlobal; + if (threadIdx.x == 0 && localCounter > 0) { + addrGlobal = atomicAdd(hashData.d_hashCompactifiedCounter, localCounter); + } + __syncthreads(); + + if (addrLocal != -1) { + const unsigned int addr = addrGlobal + addrLocal; + hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; + } +#endif +} + +unsigned int ftl::cuda::compactifyAllocated(HashData& hashData, const HashParams& hashParams) { + const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; + const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); + const dim3 blockSize(threadsPerBlock, 1); + + cudaSafeCall(cudaMemset(hashData.d_hashCompactifiedCounter, 0, sizeof(int))); + compactifyAllocatedKernel << <gridSize, blockSize >> >(hashData); + unsigned int res = 0; + cudaSafeCall(cudaMemcpy(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + //cutilCheckMsg(__FUNCTION__); +#endif + return res; +} diff --git a/applications/reconstruct/src/compactors.hpp b/applications/reconstruct/src/compactors.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9fb961d6c809b21d570175fc9b2e6fc506463fb8 --- /dev/null +++ b/applications/reconstruct/src/compactors.hpp @@ -0,0 +1,20 @@ +#ifndef _FTL_RECONSTRUCT_COMPACTORS_HPP_ +#define _FTL_RECONSTRUCT_COMPACTORS_HPP_ + +#include <ftl/voxel_hash.hpp> + +namespace ftl { +namespace cuda { + +// Compact visible +unsigned int compactifyVisible(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); + +// Compact allocated +unsigned int compactifyAllocated(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); + +// Compact visible surfaces + +} +} + +#endif // _FTL_RECONSTRUCT_COMPACTORS_HPP_ diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp index de5cc68756bd3b9bff75fb6f591e665d8a621aa7..cfd8be7823e840e4ae66ecc9bf9eeda7c7ea7b33 100644 --- a/applications/reconstruct/src/main.cpp +++ b/applications/reconstruct/src/main.cpp @@ -195,8 +195,8 @@ static void run(ftl::Configurable *root) { stream->wait(); scene->integrate(); + scene->garbage(); - // scene->garbage(); } else { active = 1; } diff --git a/applications/reconstruct/src/ray_cast_sdf.cpp b/applications/reconstruct/src/ray_cast_sdf.cpp index 0f792b4c3608b1c113631bdb9b791595bd50e007..97d75dee7ccd3ec50ce6fef4dac469799f57e60d 100644 --- a/applications/reconstruct/src/ray_cast_sdf.cpp +++ b/applications/reconstruct/src/ray_cast_sdf.cpp @@ -1,6 +1,7 @@ //#include <stdafx.h> #include <ftl/voxel_hash.hpp> +#include "compactors.hpp" //#include "Util.h" @@ -36,12 +37,12 @@ void CUDARayCastSDF::destroy(void) //m_rayIntervalSplatting.OnD3D11DestroyDevice(); } -extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); +//extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); void CUDARayCastSDF::compactifyHashEntries(ftl::voxhash::HashData& hashData, ftl::voxhash::HashParams& hashParams) { //const DepthCameraData& depthCameraData) { - hashParams.m_numOccupiedBlocks = compactifyHashAllInOneCUDA(hashData, hashParams); //this version uses atomics over prefix sums, which has a much better performance + hashParams.m_numOccupiedBlocks = ftl::cuda::compactifyVisible(hashData, hashParams); //this version uses atomics over prefix sums, which has a much better performance std::cout << "Ray blocks = " << hashParams.m_numOccupiedBlocks << std::endl; hashData.updateParams(hashParams); //make sure numOccupiedBlocks is updated on the GPU } diff --git a/applications/reconstruct/src/scene_rep_hash_sdf.cu b/applications/reconstruct/src/scene_rep_hash_sdf.cu index c29e66b16e856857f6a3136869df742851dc0617..8a7c0b25a62e089a84121c809fa07e202ea74559 100644 --- a/applications/reconstruct/src/scene_rep_hash_sdf.cu +++ b/applications/reconstruct/src/scene_rep_hash_sdf.cu @@ -322,126 +322,6 @@ extern "C" void allocCUDA(HashData& hashData, const HashParams& hashParams, cons #endif } - - -__global__ void fillDecisionArrayKernel(HashData hashData, DepthCameraData depthCameraData) -{ - const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; - - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { - hashData.d_hashDecision[idx] = 0; - if (hashData.d_hash[idx].ptr != FREE_ENTRY) { - if (hashData.isSDFBlockInCameraFrustumApprox(hashData.d_hash[idx].pos)) { - hashData.d_hashDecision[idx] = 1; //yes - } - } - } -} - -extern "C" void fillDecisionArrayCUDA(HashData& hashData, const HashParams& hashParams, const DepthCameraData& depthCameraData) -{ - const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); - const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); - - fillDecisionArrayKernel<<<gridSize, blockSize>>>(hashData, depthCameraData); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif - -} - -__global__ void compactifyHashKernel(HashData hashData) -{ - const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { - if (hashData.d_hashDecision[idx] == 1) { - hashData.d_hashCompactified[hashData.d_hashDecisionPrefix[idx]-1] = hashData.d_hash[idx]; - } - } -} - -extern "C" void compactifyHashCUDA(HashData& hashData, const HashParams& hashParams) -{ - const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); - const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); - - compactifyHashKernel<<<gridSize, blockSize>>>(hashData); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -} - - -#define COMPACTIFY_HASH_THREADS_PER_BLOCK 256 -//#define COMPACTIFY_HASH_SIMPLE -__global__ void compactifyHashAllInOneKernel(HashData hashData) -{ - const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; -#ifdef COMPACTIFY_HASH_SIMPLE - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { - if (hashData.d_hash[idx].ptr != FREE_ENTRY) { - if (hashData.isSDFBlockInCameraFrustumApprox(hashData.d_hash[idx].pos)) - { - int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1); - hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; - } - } - } -#else - __shared__ int localCounter; - if (threadIdx.x == 0) localCounter = 0; - __syncthreads(); - - int addrLocal = -1; - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { - if (hashData.d_hash[idx].ptr != FREE_ENTRY) { - if (hashData.isSDFBlockInCameraFrustumApprox(hashData.d_hash[idx].pos)) - { - addrLocal = atomicAdd(&localCounter, 1); - } - } - } - - __syncthreads(); - - __shared__ int addrGlobal; - if (threadIdx.x == 0 && localCounter > 0) { - addrGlobal = atomicAdd(hashData.d_hashCompactifiedCounter, localCounter); - } - __syncthreads(); - - if (addrLocal != -1) { - const unsigned int addr = addrGlobal + addrLocal; - hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; - } -#endif -} - -extern "C" unsigned int compactifyHashAllInOneCUDA(HashData& hashData, const HashParams& hashParams) -{ - const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; - const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); - const dim3 blockSize(threadsPerBlock, 1); - - cudaSafeCall(cudaMemset(hashData.d_hashCompactifiedCounter, 0, sizeof(int))); - compactifyHashAllInOneKernel << <gridSize, blockSize >> >(hashData); - unsigned int res = 0; - cudaSafeCall(cudaMemcpy(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif - return res; -} - __device__ float4 make_float4(uchar4 c) { return make_float4(static_cast<float>(c.x), static_cast<float>(c.y), static_cast<float>(c.z), static_cast<float>(c.w)); } diff --git a/applications/reconstruct/src/voxel_scene.cpp b/applications/reconstruct/src/voxel_scene.cpp index 78b981c6e3f04995bab30b6f74750172c2c56a5a..445c6b9f637e815726edec4d4b3d550bcdb02260 100644 --- a/applications/reconstruct/src/voxel_scene.cpp +++ b/applications/reconstruct/src/voxel_scene.cpp @@ -1,4 +1,5 @@ #include <ftl/voxel_scene.hpp> +#include "compactors.hpp" using namespace ftl::voxhash; using ftl::rgbd::Source; @@ -10,11 +11,11 @@ using cv::Mat; extern "C" void resetCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); extern "C" void resetHashBucketMutexCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); extern "C" void allocCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, const unsigned int* d_bitMask); -extern "C" void fillDecisionArrayCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData); -extern "C" void compactifyHashCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); +//extern "C" void fillDecisionArrayCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData); +//extern "C" void compactifyHashCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); +//extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); extern "C" void integrateDepthMapCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams); -extern "C" void bindInputDepthColorTextures(const DepthCameraData& depthCameraData); +//extern "C" void bindInputDepthColorTextures(const DepthCameraData& depthCameraData); extern "C" void starveVoxelsKernelCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); extern "C" void garbageCollectIdentifyCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); @@ -128,7 +129,7 @@ void SceneRep::integrate() { m_hashData.updateParams(m_hashParams); //generate a linear hash array with only occupied entries - _compactifyHashEntries(); + _compactifyVisible(); //volumetrically integrate the depth data into the depth SDFBlocks _integrateDepthMap(cam.gpu, cam.params); @@ -139,7 +140,13 @@ void SceneRep::integrate() { } } -void SceneRep::integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask) { +void SceneRep::garbage() { + _compactifyAllocated(); + _garbageCollect(); + +} + +/*void SceneRep::integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask) { setLastRigidTransform(lastRigidTransform); @@ -158,17 +165,17 @@ void SceneRep::integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthC _garbageCollect(depthCameraData); m_numIntegratedFrames++; -} +}*/ void SceneRep::setLastRigidTransform(const Eigen::Matrix4f& lastRigidTransform) { m_hashParams.m_rigidTransform = MatrixConversion::toCUDA(lastRigidTransform); m_hashParams.m_rigidTransformInverse = m_hashParams.m_rigidTransform.getInverse(); } -void SceneRep::setLastRigidTransformAndCompactify(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData) { +/*void SceneRep::setLastRigidTransformAndCompactify(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData) { setLastRigidTransform(lastRigidTransform); _compactifyHashEntries(); -} +}*/ const Eigen::Matrix4f SceneRep::getLastRigidTransform() const { @@ -376,8 +383,13 @@ void SceneRep::_alloc(const DepthCameraData& depthCameraData, const DepthCameraP } -void SceneRep::_compactifyHashEntries() { //const DepthCameraData& depthCameraData) { - m_hashParams.m_numOccupiedBlocks = compactifyHashAllInOneCUDA(m_hashData, m_hashParams); //this version uses atomics over prefix sums, which has a much better performance +void SceneRep::_compactifyVisible() { //const DepthCameraData& depthCameraData) { + m_hashParams.m_numOccupiedBlocks = ftl::cuda::compactifyVisible(m_hashData, m_hashParams); //this version uses atomics over prefix sums, which has a much better performance + m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU +} + +void SceneRep::_compactifyAllocated() { + m_hashParams.m_numOccupiedBlocks = ftl::cuda::compactifyAllocated(m_hashData, m_hashParams); //this version uses atomics over prefix sums, which has a much better performance std::cout << "Occ blocks = " << m_hashParams.m_numOccupiedBlocks << std::endl; m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU } @@ -386,7 +398,7 @@ void SceneRep::_integrateDepthMap(const DepthCameraData& depthCameraData, const integrateDepthMapCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams); } -void SceneRep::_garbageCollect(const DepthCameraData& depthCameraData) { +void SceneRep::_garbageCollect() { garbageCollectIdentifyCUDA(m_hashData, m_hashParams); resetHashBucketMutexCUDA(m_hashData, m_hashParams); //needed if linked lists are enabled -> for memeory deletion garbageCollectFreeCUDA(m_hashData, m_hashParams);