diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt index 409a425e425adadb20a340006d2e5f8d41550893..0b4b0384b668c254d17c3513f2262c8036b4af49 100644 --- a/applications/reconstruct/CMakeLists.txt +++ b/applications/reconstruct/CMakeLists.txt @@ -4,7 +4,9 @@ set(REPSRC src/main.cpp + src/voxel_scene.cpp src/scene_rep_hash_sdf.cu + src/compactors.cu src/ray_cast_sdf.cu src/camera_util.cu src/ray_cast_sdf.cpp diff --git a/applications/reconstruct/include/ftl/scene_rep_hash_sdf.hpp b/applications/reconstruct/include/ftl/scene_rep_hash_sdf.hpp deleted file mode 100644 index fa0d6f46ed5c344e113b63ec79e875d2bef5b40e..0000000000000000000000000000000000000000 --- a/applications/reconstruct/include/ftl/scene_rep_hash_sdf.hpp +++ /dev/null @@ -1,412 +0,0 @@ -// From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/CUDASceneRepHashSDF.h - -#pragma once - -#include <cuda_runtime.h> - -#include <ftl/configurable.hpp> -#include <ftl/matrix_conversion.hpp> -#include <ftl/voxel_hash.hpp> -#include <ftl/depth_camera.hpp> -#include <unordered_set> -//#include "CUDAScan.h" -// #include "CUDATimer.h" - -// #include "GlobalAppState.h" -// #include "TimingLog.h" - -#define SAFE_DELETE_ARRAY(a) { delete [] (a); (a) = NULL; } - -extern "C" void resetCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -extern "C" void resetHashBucketMutexCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -extern "C" void allocCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, const unsigned int* d_bitMask); -extern "C" void fillDecisionArrayCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData); -extern "C" void compactifyHashCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -extern "C" void integrateDepthMapCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams); -extern "C" void bindInputDepthColorTextures(const DepthCameraData& depthCameraData); - -extern "C" void starveVoxelsKernelCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -extern "C" void garbageCollectIdentifyCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -extern "C" void garbageCollectFreeCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); - -namespace ftl { -namespace voxhash { - -class SceneRep : public ftl::Configurable { - public: - SceneRep(nlohmann::json &config) : Configurable(config), do_reset_(false) { - REQUIRED({ - {"hashNumBuckets", "Desired hash entries divide bucket size", "number"}, - {"hashMaxCollisionLinkedListSize", "", "number"}, - {"hashNumSDFBlocks", "", "number"}, - {"SDFVoxelSize", "Size in meters of one voxel", "number"}, - {"SDFMaxIntegrationDistance", "", "number"}, - {"SDFTruncation", "Base error size", "number"}, - {"SDFTruncationScale", "Error size scale with depth", "number"}, - {"SDFIntegrationWeightSample", "", "number"}, - {"SDFIntegrationWeightMax", "", "number"} - }); - create(parametersFromConfig()); - - on("SDFVoxelSize", [this](const ftl::config::Event &e) { - do_reset_ = true; - }); - on("hashNumSDFBlocks", [this](const ftl::config::Event &e) { - do_reset_ = true; - }); - on("hashNumBuckets", [this](const ftl::config::Event &e) { - do_reset_ = true; - }); - on("hashMaxCollisionLinkedListSize", [this](const ftl::config::Event &e) { - do_reset_ = true; - }); - on("SDFTruncation", [this](const ftl::config::Event &e) { - m_hashParams.m_truncation = value("SDFTruncation", 0.1f); - }); - on("SDFTruncationScale", [this](const ftl::config::Event &e) { - m_hashParams.m_truncScale = value("SDFTruncationScale", 0.01f); - }); - on("SDFMaxIntegrationDistance", [this](const ftl::config::Event &e) { - m_hashParams.m_maxIntegrationDistance = value("SDFMaxIntegrationDistance", 10.0f); - }); - } - ~SceneRep() { - destroy(); - } - - HashParams parametersFromConfig() { - //auto &cfg = ftl::config::resolve(config); - HashParams params; - // First camera view is set to identity pose to be at the centre of - // the virtual coordinate space. - params.m_rigidTransform.setIdentity(); - params.m_rigidTransformInverse.setIdentity(); - params.m_hashNumBuckets = value("hashNumBuckets", 100000); - params.m_hashBucketSize = HASH_BUCKET_SIZE; - params.m_hashMaxCollisionLinkedListSize = value("hashMaxCollisionLinkedListSize", 7); - params.m_SDFBlockSize = SDF_BLOCK_SIZE; - params.m_numSDFBlocks = value("hashNumSDFBlocks",500000); - params.m_virtualVoxelSize = value("SDFVoxelSize", 0.006f); - params.m_maxIntegrationDistance = value("SDFMaxIntegrationDistance", 10.0f); - params.m_truncation = value("SDFTruncation", 0.1f); - params.m_truncScale = value("SDFTruncationScale", 0.01f); - params.m_integrationWeightSample = value("SDFIntegrationWeightSample", 10); - params.m_integrationWeightMax = value("SDFIntegrationWeightMax", 255); - // Note (Nick): We are not streaming voxels in/out of GPU - //params.m_streamingVoxelExtents = MatrixConversion::toCUDA(gas.s_streamingVoxelExtents); - //params.m_streamingGridDimensions = MatrixConversion::toCUDA(gas.s_streamingGridDimensions); - //params.m_streamingMinGridPos = MatrixConversion::toCUDA(gas.s_streamingMinGridPos); - //params.m_streamingInitialChunkListSize = gas.s_streamingInitialChunkListSize; - return params; - } - - void bindDepthCameraTextures(const DepthCameraData& depthCameraData) { - //bindInputDepthColorTextures(depthCameraData); - } - - /** - * Note: lastRigidTransform appears to be the estimated camera pose. - * Note: bitMask can be nullptr if not streaming out voxels from GPU - */ - void integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask) { - - setLastRigidTransform(lastRigidTransform); - - //make the rigid transform available on the GPU - m_hashData.updateParams(m_hashParams); - - //allocate all hash blocks which are corresponding to depth map entries - alloc(depthCameraData, depthCameraParams, d_bitMask); - - //generate a linear hash array with only occupied entries - compactifyHashEntries(); - - //volumetrically integrate the depth data into the depth SDFBlocks - integrateDepthMap(depthCameraData, depthCameraParams); - - garbageCollect(depthCameraData); - - m_numIntegratedFrames++; - } - - void setLastRigidTransform(const Eigen::Matrix4f& lastRigidTransform) { - m_hashParams.m_rigidTransform = MatrixConversion::toCUDA(lastRigidTransform); - m_hashParams.m_rigidTransformInverse = m_hashParams.m_rigidTransform.getInverse(); - } - - void setLastRigidTransformAndCompactify(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData) { - setLastRigidTransform(lastRigidTransform); - compactifyHashEntries(); - } - - - const Eigen::Matrix4f getLastRigidTransform() const { - return MatrixConversion::toEigen(m_hashParams.m_rigidTransform); - } - - /* Nick: To reduce weights between frames */ - void nextFrame() { - if (do_reset_) { - do_reset_ = false; - destroy(); - create(parametersFromConfig()); - } else { - starveVoxelsKernelCUDA(m_hashData, m_hashParams); - m_numIntegratedFrames = 0; - } - } - - //! resets the hash to the initial state (i.e., clears all data) - void reset() { - m_numIntegratedFrames = 0; - - m_hashParams.m_rigidTransform.setIdentity(); - m_hashParams.m_rigidTransformInverse.setIdentity(); - m_hashParams.m_numOccupiedBlocks = 0; - m_hashData.updateParams(m_hashParams); - resetCUDA(m_hashData, m_hashParams); - } - - - ftl::voxhash::HashData& getHashData() { - return m_hashData; - } - - HashParams& getHashParams() { - return m_hashParams; - } - - - //! debug only! - unsigned int getHeapFreeCount() { - unsigned int count; - cudaSafeCall(cudaMemcpy(&count, m_hashData.d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); - return count+1; //there is one more free than the address suggests (0 would be also a valid address) - } - - //! debug only! - void debugHash() { - HashEntry* hashCPU = new HashEntry[m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets]; - unsigned int* heapCPU = new unsigned int[m_hashParams.m_numSDFBlocks]; - unsigned int heapCounterCPU; - - cudaSafeCall(cudaMemcpy(&heapCounterCPU, m_hashData.d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); - heapCounterCPU++; //points to the first free entry: number of blocks is one more - - cudaSafeCall(cudaMemcpy(heapCPU, m_hashData.d_heap, sizeof(unsigned int)*m_hashParams.m_numSDFBlocks, cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashCPU, m_hashData.d_hash, sizeof(HashEntry)*m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets, cudaMemcpyDeviceToHost)); - - //Check for duplicates - class myint3Voxel { - public: - myint3Voxel() {} - ~myint3Voxel() {} - bool operator<(const myint3Voxel& other) const { - if (x == other.x) { - if (y == other.y) { - return z < other.z; - } - return y < other.y; - } - return x < other.x; - } - - bool operator==(const myint3Voxel& other) const { - return x == other.x && y == other.y && z == other.z; - } - - int x,y,z, i; - int offset; - int ptr; - }; - - - std::unordered_set<unsigned int> pointersFreeHash; - std::vector<int> pointersFreeVec(m_hashParams.m_numSDFBlocks, 0); // CHECK Nick Changed to int from unsigned in - for (unsigned int i = 0; i < heapCounterCPU; i++) { - pointersFreeHash.insert(heapCPU[i]); - pointersFreeVec[heapCPU[i]] = FREE_ENTRY; - } - if (pointersFreeHash.size() != heapCounterCPU) { - throw std::runtime_error("ERROR: duplicate free pointers in heap array"); - } - - - unsigned int numOccupied = 0; - unsigned int numMinusOne = 0; - //unsigned int listOverallFound = 0; - - std::list<myint3Voxel> l; - //std::vector<myint3Voxel> v; - - for (unsigned int i = 0; i < m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets; i++) { - if (hashCPU[i].ptr == -1) { - numMinusOne++; - } - - if (hashCPU[i].ptr != -2) { - numOccupied++; // != FREE_ENTRY - myint3Voxel a; - a.x = hashCPU[i].pos.x; - a.y = hashCPU[i].pos.y; - a.z = hashCPU[i].pos.z; - l.push_back(a); - //v.push_back(a); - - unsigned int linearBlockSize = m_hashParams.m_SDFBlockSize*m_hashParams.m_SDFBlockSize*m_hashParams.m_SDFBlockSize; - if (pointersFreeHash.find(hashCPU[i].ptr / linearBlockSize) != pointersFreeHash.end()) { - throw std::runtime_error("ERROR: ptr is on free heap, but also marked as an allocated entry"); - } - pointersFreeVec[hashCPU[i].ptr / linearBlockSize] = LOCK_ENTRY; - } - } - - unsigned int numHeapFree = 0; - unsigned int numHeapOccupied = 0; - for (unsigned int i = 0; i < m_hashParams.m_numSDFBlocks; i++) { - if (pointersFreeVec[i] == FREE_ENTRY) numHeapFree++; - else if (pointersFreeVec[i] == LOCK_ENTRY) numHeapOccupied++; - else { - throw std::runtime_error("memory leak detected: neither free nor allocated"); - } - } - if (numHeapFree + numHeapOccupied == m_hashParams.m_numSDFBlocks) std::cout << "HEAP OK!" << std::endl; - else throw std::runtime_error("HEAP CORRUPTED"); - - l.sort(); - size_t sizeBefore = l.size(); - l.unique(); - size_t sizeAfter = l.size(); - - - std::cout << "diff: " << sizeBefore - sizeAfter << std::endl; - std::cout << "minOne: " << numMinusOne << std::endl; - std::cout << "numOccupied: " << numOccupied << "\t numFree: " << getHeapFreeCount() << std::endl; - std::cout << "numOccupied + free: " << numOccupied + getHeapFreeCount() << std::endl; - std::cout << "numInFrustum: " << m_hashParams.m_numOccupiedBlocks << std::endl; - - SAFE_DELETE_ARRAY(heapCPU); - SAFE_DELETE_ARRAY(hashCPU); - - //getchar(); - } -private: - - void create(const HashParams& params) { - m_hashParams = params; - m_hashData.allocate(m_hashParams); - - reset(); - } - - void destroy() { - m_hashData.free(); - } - - void alloc(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, const unsigned int* d_bitMask) { - //Start Timing - //if (GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.start(); } - - // NOTE (nick): We might want this later... - if (true) { - //allocate until all blocks are allocated - unsigned int prevFree = getHeapFreeCount(); - while (1) { - resetHashBucketMutexCUDA(m_hashData, m_hashParams); - allocCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams, d_bitMask); - - unsigned int currFree = getHeapFreeCount(); - - if (prevFree != currFree) { - prevFree = currFree; - } - else { - break; - } - } - } - else { - //this version is faster, but it doesn't guarantee that all blocks are allocated (staggers alloc to the next frame) - resetHashBucketMutexCUDA(m_hashData, m_hashParams); - allocCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams, d_bitMask); - } - - - - - // Stop Timing - //if(GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.stop(); TimingLog::totalTimeAlloc += m_timer.getElapsedTimeMS(); TimingLog::countTimeAlloc++; } - } - - - void compactifyHashEntries() { //const DepthCameraData& depthCameraData) { - //Start Timing - //if(GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.start(); } - - //CUDATimer t; - - //t.startEvent("fillDecisionArray"); - //fillDecisionArrayCUDA(m_hashData, m_hashParams, depthCameraData); - //t.endEvent(); - - //t.startEvent("prefixSum"); - //m_hashParams.m_numOccupiedBlocks = - // m_cudaScan.prefixSum( - // m_hashParams.m_hashNumBuckets*m_hashParams.m_hashBucketSize, - // m_hashData.d_hashDecision, - // m_hashData.d_hashDecisionPrefix); - //t.endEvent(); - - //t.startEvent("compactifyHash"); - //m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU - //compactifyHashCUDA(m_hashData, m_hashParams); - //t.endEvent(); - - //t.startEvent("compactifyAllInOne"); - m_hashParams.m_numOccupiedBlocks = compactifyHashAllInOneCUDA(m_hashData, m_hashParams); //this version uses atomics over prefix sums, which has a much better performance - std::cout << "Occ blocks = " << m_hashParams.m_numOccupiedBlocks << std::endl; - m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU - //t.endEvent(); - //t.evaluate(); - - // Stop Timing - //if(GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.stop(); TimingLog::totalTimeCompactifyHash += m_timer.getElapsedTimeMS(); TimingLog::countTimeCompactifyHash++; } - - //std::cout << "numOccupiedBlocks: " << m_hashParams.m_numOccupiedBlocks << std::endl; - } - - void integrateDepthMap(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams) { - //Start Timing - //if(GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.start(); } - - integrateDepthMapCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams); - - // Stop Timing - //if(GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.stop(); TimingLog::totalTimeIntegrate += m_timer.getElapsedTimeMS(); TimingLog::countTimeIntegrate++; } - } - - void garbageCollect(const DepthCameraData& depthCameraData) { - //only perform if enabled by global app state - //if (GlobalAppState::get().s_garbageCollectionEnabled) { - - garbageCollectIdentifyCUDA(m_hashData, m_hashParams); - resetHashBucketMutexCUDA(m_hashData, m_hashParams); //needed if linked lists are enabled -> for memeory deletion - garbageCollectFreeCUDA(m_hashData, m_hashParams); - //} - } - - - - HashParams m_hashParams; - ftl::voxhash::HashData m_hashData; - - //CUDAScan m_cudaScan; - unsigned int m_numIntegratedFrames; //used for garbage collect - bool do_reset_; - - // static Timer m_timer; -}; - -}; // namespace voxhash -}; // namespace ftl diff --git a/applications/reconstruct/include/ftl/voxel_scene.hpp b/applications/reconstruct/include/ftl/voxel_scene.hpp new file mode 100644 index 0000000000000000000000000000000000000000..913c38e62193d5af2e15bfec5a5c461df29aedfc --- /dev/null +++ b/applications/reconstruct/include/ftl/voxel_scene.hpp @@ -0,0 +1,102 @@ +// From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/CUDASceneRepHashSDF.h + +#pragma once + +#include <cuda_runtime.h> + +#include <ftl/rgbd/source.hpp> +#include <ftl/configurable.hpp> +#include <ftl/matrix_conversion.hpp> +#include <ftl/voxel_hash.hpp> +#include <ftl/depth_camera.hpp> +#include <unordered_set> + +namespace ftl { +namespace voxhash { + +struct Cameras { + ftl::rgbd::Source *source; + DepthCameraData gpu; + DepthCameraParams params; +}; + +class SceneRep : public ftl::Configurable { + public: + SceneRep(nlohmann::json &config); + ~SceneRep(); + + void addSource(ftl::rgbd::Source *); + + /** + * Send all camera frames to GPU and allocate required voxels. + */ + int upload(); + + /** + * Merge all camera frames into the voxel hash datastructure. + */ + void integrate(); + + /** + * Remove any voxel blocks that are no longer used. + */ + void garbage(); + + // Mark voxels as surfaces + // void isosurface(); + + /** + * Note: lastRigidTransform appears to be the estimated camera pose. + * Note: bitMask can be nullptr if not streaming out voxels from GPU + */ + //void integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask); + + void setLastRigidTransform(const Eigen::Matrix4f& lastRigidTransform); + + //void setLastRigidTransformAndCompactify(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData); + + + const Eigen::Matrix4f getLastRigidTransform() const; + + /* Nick: To reduce weights between frames */ + void nextFrame(); + + //! resets the hash to the initial state (i.e., clears all data) + void reset(); + + + ftl::voxhash::HashData& getHashData() { return m_hashData; } + + HashParams& getHashParams() { return m_hashParams; } + + //! debug only! + unsigned int getHeapFreeCount(); + + //! debug only! + void debugHash(); + + private: + + HashParams _parametersFromConfig(); + void _create(const HashParams& params); + void _destroy(); + void _alloc(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, const unsigned int* d_bitMask); + void _compactifyVisible(); + void _compactifyAllocated(); + void _integrateDepthMap(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams); + void _garbageCollect(); + + + + HashParams m_hashParams; + ftl::voxhash::HashData m_hashData; + + //CUDAScan m_cudaScan; + unsigned int m_numIntegratedFrames; //used for garbage collect + unsigned int m_frameCount; + bool do_reset_; + std::vector<Cameras> cameras_; +}; + +}; // namespace voxhash +}; // namespace ftl diff --git a/applications/reconstruct/src/compactors.cu b/applications/reconstruct/src/compactors.cu new file mode 100644 index 0000000000000000000000000000000000000000..3f350ca6be27e080e2ea99f3a6a1225a5ed84521 --- /dev/null +++ b/applications/reconstruct/src/compactors.cu @@ -0,0 +1,180 @@ +#include "compactors.hpp" + +using ftl::voxhash::HashData; +using ftl::voxhash::HashParams; +using ftl::voxhash::Voxel; +using ftl::voxhash::HashEntry; +using ftl::voxhash::FREE_ENTRY; + +#define COMPACTIFY_HASH_THREADS_PER_BLOCK 256 +//#define COMPACTIFY_HASH_SIMPLE + + +/*__global__ void fillDecisionArrayKernel(HashData hashData, DepthCameraData depthCameraData) +{ + const HashParams& hashParams = c_hashParams; + const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; + + if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + hashData.d_hashDecision[idx] = 0; + if (hashData.d_hash[idx].ptr != FREE_ENTRY) { + if (hashData.isSDFBlockInCameraFrustumApprox(hashData.d_hash[idx].pos)) { + hashData.d_hashDecision[idx] = 1; //yes + } + } + } +}*/ + +/*extern "C" void fillDecisionArrayCUDA(HashData& hashData, const HashParams& hashParams, const DepthCameraData& depthCameraData) +{ + const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); + const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); + + fillDecisionArrayKernel<<<gridSize, blockSize>>>(hashData, depthCameraData); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + //cutilCheckMsg(__FUNCTION__); +#endif + +}*/ + +/*__global__ void compactifyHashKernel(HashData hashData) +{ + const HashParams& hashParams = c_hashParams; + const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + if (hashData.d_hashDecision[idx] == 1) { + hashData.d_hashCompactified[hashData.d_hashDecisionPrefix[idx]-1] = hashData.d_hash[idx]; + } + } +}*/ + +/*extern "C" void compactifyHashCUDA(HashData& hashData, const HashParams& hashParams) +{ + const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); + const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); + + compactifyHashKernel<<<gridSize, blockSize>>>(hashData); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + //cutilCheckMsg(__FUNCTION__); +#endif +}*/ + +__global__ void compactifyVisibleKernel(HashData hashData) +{ + const HashParams& hashParams = c_hashParams; + const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; +#ifdef COMPACTIFY_HASH_SIMPLE + if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + if (hashData.d_hash[idx].ptr != FREE_ENTRY) { + if (hashData.isSDFBlockInCameraFrustumApprox(hashData.d_hash[idx].pos)) + { + int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1); + hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; + } + } + } +#else + __shared__ int localCounter; + if (threadIdx.x == 0) localCounter = 0; + __syncthreads(); + + int addrLocal = -1; + if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + if (hashData.d_hash[idx].ptr != FREE_ENTRY) { + if (hashData.isSDFBlockInCameraFrustumApprox(hashData.d_hash[idx].pos)) + { + addrLocal = atomicAdd(&localCounter, 1); + } + } + } + + __syncthreads(); + + __shared__ int addrGlobal; + if (threadIdx.x == 0 && localCounter > 0) { + addrGlobal = atomicAdd(hashData.d_hashCompactifiedCounter, localCounter); + } + __syncthreads(); + + if (addrLocal != -1) { + const unsigned int addr = addrGlobal + addrLocal; + hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; + } +#endif +} + +unsigned int ftl::cuda::compactifyVisible(HashData& hashData, const HashParams& hashParams) { + const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; + const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); + const dim3 blockSize(threadsPerBlock, 1); + + cudaSafeCall(cudaMemset(hashData.d_hashCompactifiedCounter, 0, sizeof(int))); + compactifyVisibleKernel << <gridSize, blockSize >> >(hashData); + unsigned int res = 0; + cudaSafeCall(cudaMemcpy(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + //cutilCheckMsg(__FUNCTION__); +#endif + return res; +} + +__global__ void compactifyAllocatedKernel(HashData hashData) +{ + const HashParams& hashParams = c_hashParams; + const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; +#ifdef COMPACTIFY_HASH_SIMPLE + if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + if (hashData.d_hash[idx].ptr != FREE_ENTRY) { + int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1); + hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; + } + } +#else + __shared__ int localCounter; + if (threadIdx.x == 0) localCounter = 0; + __syncthreads(); + + int addrLocal = -1; + if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { + if (hashData.d_hash[idx].ptr != FREE_ENTRY) { + addrLocal = atomicAdd(&localCounter, 1); + } + } + + __syncthreads(); + + __shared__ int addrGlobal; + if (threadIdx.x == 0 && localCounter > 0) { + addrGlobal = atomicAdd(hashData.d_hashCompactifiedCounter, localCounter); + } + __syncthreads(); + + if (addrLocal != -1) { + const unsigned int addr = addrGlobal + addrLocal; + hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; + } +#endif +} + +unsigned int ftl::cuda::compactifyAllocated(HashData& hashData, const HashParams& hashParams) { + const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; + const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); + const dim3 blockSize(threadsPerBlock, 1); + + cudaSafeCall(cudaMemset(hashData.d_hashCompactifiedCounter, 0, sizeof(int))); + compactifyAllocatedKernel << <gridSize, blockSize >> >(hashData); + unsigned int res = 0; + cudaSafeCall(cudaMemcpy(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + //cutilCheckMsg(__FUNCTION__); +#endif + return res; +} diff --git a/applications/reconstruct/src/compactors.hpp b/applications/reconstruct/src/compactors.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9fb961d6c809b21d570175fc9b2e6fc506463fb8 --- /dev/null +++ b/applications/reconstruct/src/compactors.hpp @@ -0,0 +1,20 @@ +#ifndef _FTL_RECONSTRUCT_COMPACTORS_HPP_ +#define _FTL_RECONSTRUCT_COMPACTORS_HPP_ + +#include <ftl/voxel_hash.hpp> + +namespace ftl { +namespace cuda { + +// Compact visible +unsigned int compactifyVisible(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); + +// Compact allocated +unsigned int compactifyAllocated(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); + +// Compact visible surfaces + +} +} + +#endif // _FTL_RECONSTRUCT_COMPACTORS_HPP_ diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp index f3a29a9fd329275d822c8a547190c4762b7a855f..1f04714547306d25fa7e1597eebaeac5d0143ea8 100644 --- a/applications/reconstruct/src/main.cpp +++ b/applications/reconstruct/src/main.cpp @@ -9,46 +9,27 @@ #include <ftl/config.h> #include <ftl/configuration.hpp> #include <ftl/depth_camera.hpp> -#include <ftl/scene_rep_hash_sdf.hpp> +#include <ftl/voxel_scene.hpp> #include <ftl/rgbd.hpp> #include <ftl/virtual_source.hpp> #include <ftl/rgbd/streamer.hpp> #include <ftl/slave.hpp> -// #include <zlib.h> -// #include <lz4.h> - #include <string> #include <vector> #include <thread> #include <chrono> -#include <mutex> #include <opencv2/opencv.hpp> #include <ftl/net/universe.hpp> -#include <ftl/rgbd_display.hpp> -#include <nlohmann/json.hpp> - -#include <opencv2/imgproc.hpp> -#include <opencv2/imgcodecs.hpp> -#include <opencv2/highgui.hpp> -#include <opencv2/core/utility.hpp> -#include <ftl/utility/opencv_to_pcl.hpp> #include <ftl/registration.hpp> #ifdef WIN32 #pragma comment(lib, "Rpcrt4.lib") #endif -#ifdef HAVE_PCL -#include <pcl/point_cloud.h> -#include <pcl/common/transforms.h> -#include <pcl/filters/uniform_sampling.h> -#endif - using ftl::net::Universe; -using ftl::rgbd::Display; using std::string; using std::vector; using ftl::rgbd::Source; @@ -57,22 +38,14 @@ using ftl::config::json_t; using json = nlohmann::json; using std::this_thread::sleep_for; using std::chrono::milliseconds; -using std::mutex; -using std::unique_lock; - -using std::vector; +//using std::mutex; +//using std::unique_lock; -using cv::Mat; +//using cv::Mat; using ftl::registration::loadTransformations; using ftl::registration::saveTransformations; -struct Cameras { - Source *source; - DepthCameraData gpu; - DepthCameraParams params; -}; - static void run(ftl::Configurable *root) { Universe *net = ftl::create<Universe>(root, "net"); ftl::ctrl::Slave slave(net, root); @@ -80,110 +53,59 @@ static void run(ftl::Configurable *root) { net->start(); net->waitConnections(); - std::vector<Cameras> inputs; - auto sources = ftl::createArray<Source>(root, "sources", net); //root->get<vector<json_t>>("sources"); + // Create a vector of all input RGB-Depth sources + auto sources = ftl::createArray<Source>(root, "sources", net); if (sources.size() == 0) { LOG(ERROR) << "No sources configured!"; return; } - for (int i=0; i<sources.size(); i++) { - Source *in = sources[i]; - auto &cam = inputs.emplace_back(); - cam.source = in; - cam.params.m_imageWidth = 0; - } - - // TODO move this block in its own method and add automated tests - // for error cases - - std::optional<json_t> merge = root->get<json_t>("merge"); - if (!merge) { - LOG(WARNING) << "Input merging not configured, using only first input in configuration"; - inputs = { inputs[0] }; - inputs[0].source->setPose(Eigen::Matrix4d::Identity()); - } - - if (inputs.size() > 1) { + // Must find pose for each source... + if (sources.size() > 1) { std::map<std::string, Eigen::Matrix4d> transformations; - /*if ((*merge)["register"]) { - LOG(INFO) << "Registration requested"; - - ftl::registration::Registration *reg = ftl::registration::ChessboardRegistration::create(*merge); - for (auto &input : inputs) { - while(!input.source->isReady()) { std::this_thread::sleep_for(std::chrono::milliseconds(50)); } - reg->addSource(input.source); - } - - reg->run(); - if (reg->findTransformations(transformations)) { - if (!saveTransformations(string(FTL_LOCAL_CONFIG_ROOT) + "/registration.json", transformations)) { - LOG(ERROR) << "Error saving new registration"; - }; - } - else { - LOG(ERROR) << "Registration failed"; - } - - free(reg); + if (loadTransformations(string(FTL_LOCAL_CONFIG_ROOT) + "/registration.json", transformations)) { + LOG(INFO) << "Loaded camera trasformations from file"; + } + else { + LOG(ERROR) << "Error loading camera transformations from file"; } - else {*/ - if (loadTransformations(string(FTL_LOCAL_CONFIG_ROOT) + "/registration.json", transformations)) { - LOG(INFO) << "Loaded camera trasformations from file"; - } - else { - LOG(ERROR) << "Error loading camera transformations from file"; - } - //} - for (auto &input : inputs) { - string uri = input.source->getURI(); + for (auto &input : sources) { + string uri = input->getURI(); auto T = transformations.find(uri); if (T == transformations.end()) { LOG(ERROR) << "Camera pose for " + uri + " not found in transformations"; LOG(WARNING) << "Using only first configured source"; // TODO: use target source if configured and found - inputs = { inputs[0] }; - inputs[0].source->setPose(Eigen::Matrix4d::Identity()); + sources = { sources[0] }; + sources[0]->setPose(Eigen::Matrix4d::Identity()); break; } - input.source->setPose(T->second); + input->setPose(T->second); } } - // Displays and Inputs configured - - LOG(INFO) << "Using sources:"; - for (auto &input : inputs) { LOG(INFO) << " " + (string) input.source->getURI(); } - - //ftl::rgbd::Display *display = ftl::create<ftl::rgbd::Display>(root, "display"); + ftl::voxhash::SceneRep *scene = ftl::create<ftl::voxhash::SceneRep>(root, "voxelhash"); + ftl::rgbd::Streamer *stream = ftl::create<ftl::rgbd::Streamer>(root, "stream", net); ftl::rgbd::Source *virt = ftl::create<ftl::rgbd::Source>(root, "virtual", net); auto virtimpl = new ftl::rgbd::VirtualSource(virt); virt->customImplementation(virtimpl); - - ftl::voxhash::SceneRep *scene = ftl::create<ftl::voxhash::SceneRep>(root, "voxelhash"); virtimpl->setScene(scene); - //display->setSource(virt); - - ftl::rgbd::Streamer *stream = ftl::create<ftl::rgbd::Streamer>(root, "stream", net); stream->add(virt); - // Also proxy all inputs - for (auto &in : inputs) { - stream->add(in.source); + + for (int i=0; i<sources.size(); i++) { + Source *in = sources[i]; + stream->add(in); + scene->addSource(in); } unsigned char frameCount = 0; bool paused = false; - // Keyboard camera controls - //display->onKey([&paused](int key) { - // if (key == 32) paused = !paused; - //}); - - int active = inputs.size(); + int active = sources.size(); while (ftl::running) { if (active == 0) { LOG(INFO) << "Waiting for sources..."; @@ -193,69 +115,27 @@ static void run(ftl::Configurable *root) { active = 0; if (!slave.isPaused()) { - //net.broadcast("grab"); // To sync cameras + // Mark voxels as cleared scene->nextFrame(); - // TODO(Nick) Improve sync further... - for (size_t i = 0; i < inputs.size(); i++) { - inputs[i].source->grab(); - } + // Grab, upload frames and allocate voxel blocks + active = scene->upload(); + // Make sure previous virtual camera frame has finished rendering stream->wait(); - for (size_t i = 0; i < inputs.size(); i++) { - if (!inputs[i].source->isReady()) { - inputs[i].params.m_imageWidth = 0; - // TODO(Nick) : Free gpu allocs if was ready before - continue; - } else { - auto &cam = inputs[i]; - auto in = inputs[i].source; - if (cam.params.m_imageWidth == 0) { - cam.params.fx = in->parameters().fx; - cam.params.fy = in->parameters().fy; - cam.params.mx = -in->parameters().cx; - cam.params.my = -in->parameters().cy; - cam.params.m_imageWidth = in->parameters().width; - cam.params.m_imageHeight = in->parameters().height; - cam.params.m_sensorDepthWorldMax = in->parameters().maxDepth; - cam.params.m_sensorDepthWorldMin = in->parameters().minDepth; - cam.gpu.alloc(cam.params); - } - - //LOG(INFO) << in->getURI() << " loaded " << cam.params.fx; - } + // Merge new frames into the voxel structure + scene->integrate(); - // Get the RGB-Depth frame from input - Source *input = inputs[i].source; - Mat rgb, depth; - input->getFrames(rgb,depth); - - active += 1; + // Remove any redundant voxels + scene->garbage(); - if (depth.cols == 0) continue; - - // Must be in RGBA for GPU - Mat rgba; - cv::cvtColor(rgb,rgba, cv::COLOR_BGR2BGRA); - - inputs[i].params.flags = frameCount; - - // Send to GPU and merge view into scene - inputs[i].gpu.updateParams(inputs[i].params); - inputs[i].gpu.updateData(depth, rgba); - // TODO(Nick): Use double pose - scene->integrate(inputs[i].source->getPose().cast<float>(), inputs[i].gpu, inputs[i].params, nullptr); - } } else { active = 1; } - frameCount++; - + // Start virtual camera rendering and previous frame compression stream->poll(); - //display->update(); - //sleep_for(milliseconds(10)); } } diff --git a/applications/reconstruct/src/ray_cast_sdf.cpp b/applications/reconstruct/src/ray_cast_sdf.cpp index 0f792b4c3608b1c113631bdb9b791595bd50e007..97d75dee7ccd3ec50ce6fef4dac469799f57e60d 100644 --- a/applications/reconstruct/src/ray_cast_sdf.cpp +++ b/applications/reconstruct/src/ray_cast_sdf.cpp @@ -1,6 +1,7 @@ //#include <stdafx.h> #include <ftl/voxel_hash.hpp> +#include "compactors.hpp" //#include "Util.h" @@ -36,12 +37,12 @@ void CUDARayCastSDF::destroy(void) //m_rayIntervalSplatting.OnD3D11DestroyDevice(); } -extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); +//extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); void CUDARayCastSDF::compactifyHashEntries(ftl::voxhash::HashData& hashData, ftl::voxhash::HashParams& hashParams) { //const DepthCameraData& depthCameraData) { - hashParams.m_numOccupiedBlocks = compactifyHashAllInOneCUDA(hashData, hashParams); //this version uses atomics over prefix sums, which has a much better performance + hashParams.m_numOccupiedBlocks = ftl::cuda::compactifyVisible(hashData, hashParams); //this version uses atomics over prefix sums, which has a much better performance std::cout << "Ray blocks = " << hashParams.m_numOccupiedBlocks << std::endl; hashData.updateParams(hashParams); //make sure numOccupiedBlocks is updated on the GPU } diff --git a/applications/reconstruct/src/scene_rep_hash_sdf.cu b/applications/reconstruct/src/scene_rep_hash_sdf.cu index c29e66b16e856857f6a3136869df742851dc0617..8a7c0b25a62e089a84121c809fa07e202ea74559 100644 --- a/applications/reconstruct/src/scene_rep_hash_sdf.cu +++ b/applications/reconstruct/src/scene_rep_hash_sdf.cu @@ -322,126 +322,6 @@ extern "C" void allocCUDA(HashData& hashData, const HashParams& hashParams, cons #endif } - - -__global__ void fillDecisionArrayKernel(HashData hashData, DepthCameraData depthCameraData) -{ - const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; - - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { - hashData.d_hashDecision[idx] = 0; - if (hashData.d_hash[idx].ptr != FREE_ENTRY) { - if (hashData.isSDFBlockInCameraFrustumApprox(hashData.d_hash[idx].pos)) { - hashData.d_hashDecision[idx] = 1; //yes - } - } - } -} - -extern "C" void fillDecisionArrayCUDA(HashData& hashData, const HashParams& hashParams, const DepthCameraData& depthCameraData) -{ - const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); - const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); - - fillDecisionArrayKernel<<<gridSize, blockSize>>>(hashData, depthCameraData); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif - -} - -__global__ void compactifyHashKernel(HashData hashData) -{ - const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { - if (hashData.d_hashDecision[idx] == 1) { - hashData.d_hashCompactified[hashData.d_hashDecisionPrefix[idx]-1] = hashData.d_hash[idx]; - } - } -} - -extern "C" void compactifyHashCUDA(HashData& hashData, const HashParams& hashParams) -{ - const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); - const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); - - compactifyHashKernel<<<gridSize, blockSize>>>(hashData); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -} - - -#define COMPACTIFY_HASH_THREADS_PER_BLOCK 256 -//#define COMPACTIFY_HASH_SIMPLE -__global__ void compactifyHashAllInOneKernel(HashData hashData) -{ - const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; -#ifdef COMPACTIFY_HASH_SIMPLE - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { - if (hashData.d_hash[idx].ptr != FREE_ENTRY) { - if (hashData.isSDFBlockInCameraFrustumApprox(hashData.d_hash[idx].pos)) - { - int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1); - hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; - } - } - } -#else - __shared__ int localCounter; - if (threadIdx.x == 0) localCounter = 0; - __syncthreads(); - - int addrLocal = -1; - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { - if (hashData.d_hash[idx].ptr != FREE_ENTRY) { - if (hashData.isSDFBlockInCameraFrustumApprox(hashData.d_hash[idx].pos)) - { - addrLocal = atomicAdd(&localCounter, 1); - } - } - } - - __syncthreads(); - - __shared__ int addrGlobal; - if (threadIdx.x == 0 && localCounter > 0) { - addrGlobal = atomicAdd(hashData.d_hashCompactifiedCounter, localCounter); - } - __syncthreads(); - - if (addrLocal != -1) { - const unsigned int addr = addrGlobal + addrLocal; - hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; - } -#endif -} - -extern "C" unsigned int compactifyHashAllInOneCUDA(HashData& hashData, const HashParams& hashParams) -{ - const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; - const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); - const dim3 blockSize(threadsPerBlock, 1); - - cudaSafeCall(cudaMemset(hashData.d_hashCompactifiedCounter, 0, sizeof(int))); - compactifyHashAllInOneKernel << <gridSize, blockSize >> >(hashData); - unsigned int res = 0; - cudaSafeCall(cudaMemcpy(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif - return res; -} - __device__ float4 make_float4(uchar4 c) { return make_float4(static_cast<float>(c.x), static_cast<float>(c.y), static_cast<float>(c.z), static_cast<float>(c.w)); } diff --git a/applications/reconstruct/src/virtual_source.cpp b/applications/reconstruct/src/virtual_source.cpp index 80f08a0e35e9a6c3966da82923a775711a2b26c8..a64a857db224cd191899558d16873bf3c7267aff 100644 --- a/applications/reconstruct/src/virtual_source.cpp +++ b/applications/reconstruct/src/virtual_source.cpp @@ -1,6 +1,6 @@ #include <ftl/virtual_source.hpp> #include <ftl/depth_camera.hpp> -#include <ftl/scene_rep_hash_sdf.hpp> +#include <ftl/voxel_scene.hpp> #include <ftl/ray_cast_sdf.hpp> #define LOGURU_WITH_STREAMS 1 diff --git a/applications/reconstruct/src/voxel_scene.cpp b/applications/reconstruct/src/voxel_scene.cpp new file mode 100644 index 0000000000000000000000000000000000000000..445c6b9f637e815726edec4d4b3d550bcdb02260 --- /dev/null +++ b/applications/reconstruct/src/voxel_scene.cpp @@ -0,0 +1,405 @@ +#include <ftl/voxel_scene.hpp> +#include "compactors.hpp" + +using namespace ftl::voxhash; +using ftl::rgbd::Source; +using ftl::Configurable; +using cv::Mat; + +#define SAFE_DELETE_ARRAY(a) { delete [] (a); (a) = NULL; } + +extern "C" void resetCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); +extern "C" void resetHashBucketMutexCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); +extern "C" void allocCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, const unsigned int* d_bitMask); +//extern "C" void fillDecisionArrayCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData); +//extern "C" void compactifyHashCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); +//extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); +extern "C" void integrateDepthMapCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams); +//extern "C" void bindInputDepthColorTextures(const DepthCameraData& depthCameraData); + +extern "C" void starveVoxelsKernelCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); +extern "C" void garbageCollectIdentifyCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); +extern "C" void garbageCollectFreeCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); + +SceneRep::SceneRep(nlohmann::json &config) : Configurable(config), do_reset_(false), m_frameCount(0) { + // Allocates voxel structure on GPU + _create(_parametersFromConfig()); + + on("SDFVoxelSize", [this](const ftl::config::Event &e) { + do_reset_ = true; + }); + on("hashNumSDFBlocks", [this](const ftl::config::Event &e) { + do_reset_ = true; + }); + on("hashNumBuckets", [this](const ftl::config::Event &e) { + do_reset_ = true; + }); + on("hashMaxCollisionLinkedListSize", [this](const ftl::config::Event &e) { + do_reset_ = true; + }); + on("SDFTruncation", [this](const ftl::config::Event &e) { + m_hashParams.m_truncation = value("SDFTruncation", 0.1f); + }); + on("SDFTruncationScale", [this](const ftl::config::Event &e) { + m_hashParams.m_truncScale = value("SDFTruncationScale", 0.01f); + }); + on("SDFMaxIntegrationDistance", [this](const ftl::config::Event &e) { + m_hashParams.m_maxIntegrationDistance = value("SDFMaxIntegrationDistance", 10.0f); + }); +} + +SceneRep::~SceneRep() { + _destroy(); +} + +void SceneRep::addSource(ftl::rgbd::Source *src) { + auto &cam = cameras_.emplace_back(); + cam.source = src; + cam.params.m_imageWidth = 0; +} + +int SceneRep::upload() { + int active = 0; + + for (size_t i=0; i<cameras_.size(); ++i) { + cameras_[i].source->grab(); + } + + for (size_t i=0; i<cameras_.size(); ++i) { + auto &cam = cameras_[i]; + + if (!cam.source->isReady()) { + cam.params.m_imageWidth = 0; + // TODO(Nick) : Free gpu allocs if was ready before + continue; + } else { + auto in = cam.source; + // Only now do we have camera parameters for allocations... + if (cam.params.m_imageWidth == 0) { + cam.params.fx = in->parameters().fx; + cam.params.fy = in->parameters().fy; + cam.params.mx = -in->parameters().cx; + cam.params.my = -in->parameters().cy; + cam.params.m_imageWidth = in->parameters().width; + cam.params.m_imageHeight = in->parameters().height; + cam.params.m_sensorDepthWorldMax = in->parameters().maxDepth; + cam.params.m_sensorDepthWorldMin = in->parameters().minDepth; + cam.gpu.alloc(cam.params); + } + } + + // Get the RGB-Depth frame from input + Source *input = cam.source; + Mat rgb, depth; + + // TODO(Nick) Direct GPU upload to save copy + input->getFrames(rgb,depth); + + active += 1; + + if (depth.cols == 0) continue; + + // Must be in RGBA for GPU + Mat rgba; + cv::cvtColor(rgb,rgba, cv::COLOR_BGR2BGRA); + + cam.params.flags = m_frameCount; + + // Send to GPU and merge view into scene + cam.gpu.updateParams(cam.params); + cam.gpu.updateData(depth, rgba); + + setLastRigidTransform(input->getPose().cast<float>()); + + //make the rigid transform available on the GPU + m_hashData.updateParams(m_hashParams); + + //allocate all hash blocks which are corresponding to depth map entries + _alloc(cam.gpu, cam.params, nullptr); + } + + return active; +} + +void SceneRep::integrate() { + for (size_t i=0; i<cameras_.size(); ++i) { + auto &cam = cameras_[i]; + + setLastRigidTransform(cam.source->getPose().cast<float>()); + m_hashData.updateParams(m_hashParams); + + //generate a linear hash array with only occupied entries + _compactifyVisible(); + + //volumetrically integrate the depth data into the depth SDFBlocks + _integrateDepthMap(cam.gpu, cam.params); + + //_garbageCollect(cam.gpu); + + m_numIntegratedFrames++; + } +} + +void SceneRep::garbage() { + _compactifyAllocated(); + _garbageCollect(); + +} + +/*void SceneRep::integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask) { + + setLastRigidTransform(lastRigidTransform); + + //make the rigid transform available on the GPU + m_hashData.updateParams(m_hashParams); + + //allocate all hash blocks which are corresponding to depth map entries + _alloc(depthCameraData, depthCameraParams, d_bitMask); + + //generate a linear hash array with only occupied entries + _compactifyHashEntries(); + + //volumetrically integrate the depth data into the depth SDFBlocks + _integrateDepthMap(depthCameraData, depthCameraParams); + + _garbageCollect(depthCameraData); + + m_numIntegratedFrames++; +}*/ + +void SceneRep::setLastRigidTransform(const Eigen::Matrix4f& lastRigidTransform) { + m_hashParams.m_rigidTransform = MatrixConversion::toCUDA(lastRigidTransform); + m_hashParams.m_rigidTransformInverse = m_hashParams.m_rigidTransform.getInverse(); +} + +/*void SceneRep::setLastRigidTransformAndCompactify(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData) { + setLastRigidTransform(lastRigidTransform); + _compactifyHashEntries(); +}*/ + + +const Eigen::Matrix4f SceneRep::getLastRigidTransform() const { + return MatrixConversion::toEigen(m_hashParams.m_rigidTransform); +} + +/* Nick: To reduce weights between frames */ +void SceneRep::nextFrame() { + if (do_reset_) { + do_reset_ = false; + _destroy(); + _create(_parametersFromConfig()); + } else { + starveVoxelsKernelCUDA(m_hashData, m_hashParams); + m_numIntegratedFrames = 0; + } +} + +//! resets the hash to the initial state (i.e., clears all data) +void SceneRep::reset() { + m_numIntegratedFrames = 0; + + m_hashParams.m_rigidTransform.setIdentity(); + m_hashParams.m_rigidTransformInverse.setIdentity(); + m_hashParams.m_numOccupiedBlocks = 0; + m_hashData.updateParams(m_hashParams); + resetCUDA(m_hashData, m_hashParams); +} + +//! debug only! +unsigned int SceneRep::getHeapFreeCount() { + unsigned int count; + cudaSafeCall(cudaMemcpy(&count, m_hashData.d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); + return count+1; //there is one more free than the address suggests (0 would be also a valid address) +} + +//! debug only! +void SceneRep::debugHash() { + HashEntry* hashCPU = new HashEntry[m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets]; + unsigned int* heapCPU = new unsigned int[m_hashParams.m_numSDFBlocks]; + unsigned int heapCounterCPU; + + cudaSafeCall(cudaMemcpy(&heapCounterCPU, m_hashData.d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); + heapCounterCPU++; //points to the first free entry: number of blocks is one more + + cudaSafeCall(cudaMemcpy(heapCPU, m_hashData.d_heap, sizeof(unsigned int)*m_hashParams.m_numSDFBlocks, cudaMemcpyDeviceToHost)); + cudaSafeCall(cudaMemcpy(hashCPU, m_hashData.d_hash, sizeof(HashEntry)*m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets, cudaMemcpyDeviceToHost)); + + //Check for duplicates + class myint3Voxel { + public: + myint3Voxel() {} + ~myint3Voxel() {} + bool operator<(const myint3Voxel& other) const { + if (x == other.x) { + if (y == other.y) { + return z < other.z; + } + return y < other.y; + } + return x < other.x; + } + + bool operator==(const myint3Voxel& other) const { + return x == other.x && y == other.y && z == other.z; + } + + int x,y,z, i; + int offset; + int ptr; + }; + + + std::unordered_set<unsigned int> pointersFreeHash; + std::vector<int> pointersFreeVec(m_hashParams.m_numSDFBlocks, 0); // CHECK Nick Changed to int from unsigned in + for (unsigned int i = 0; i < heapCounterCPU; i++) { + pointersFreeHash.insert(heapCPU[i]); + pointersFreeVec[heapCPU[i]] = FREE_ENTRY; + } + if (pointersFreeHash.size() != heapCounterCPU) { + throw std::runtime_error("ERROR: duplicate free pointers in heap array"); + } + + + unsigned int numOccupied = 0; + unsigned int numMinusOne = 0; + //unsigned int listOverallFound = 0; + + std::list<myint3Voxel> l; + //std::vector<myint3Voxel> v; + + for (unsigned int i = 0; i < m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets; i++) { + if (hashCPU[i].ptr == -1) { + numMinusOne++; + } + + if (hashCPU[i].ptr != -2) { + numOccupied++; // != FREE_ENTRY + myint3Voxel a; + a.x = hashCPU[i].pos.x; + a.y = hashCPU[i].pos.y; + a.z = hashCPU[i].pos.z; + l.push_back(a); + //v.push_back(a); + + unsigned int linearBlockSize = m_hashParams.m_SDFBlockSize*m_hashParams.m_SDFBlockSize*m_hashParams.m_SDFBlockSize; + if (pointersFreeHash.find(hashCPU[i].ptr / linearBlockSize) != pointersFreeHash.end()) { + throw std::runtime_error("ERROR: ptr is on free heap, but also marked as an allocated entry"); + } + pointersFreeVec[hashCPU[i].ptr / linearBlockSize] = LOCK_ENTRY; + } + } + + unsigned int numHeapFree = 0; + unsigned int numHeapOccupied = 0; + for (unsigned int i = 0; i < m_hashParams.m_numSDFBlocks; i++) { + if (pointersFreeVec[i] == FREE_ENTRY) numHeapFree++; + else if (pointersFreeVec[i] == LOCK_ENTRY) numHeapOccupied++; + else { + throw std::runtime_error("memory leak detected: neither free nor allocated"); + } + } + if (numHeapFree + numHeapOccupied == m_hashParams.m_numSDFBlocks) std::cout << "HEAP OK!" << std::endl; + else throw std::runtime_error("HEAP CORRUPTED"); + + l.sort(); + size_t sizeBefore = l.size(); + l.unique(); + size_t sizeAfter = l.size(); + + + std::cout << "diff: " << sizeBefore - sizeAfter << std::endl; + std::cout << "minOne: " << numMinusOne << std::endl; + std::cout << "numOccupied: " << numOccupied << "\t numFree: " << getHeapFreeCount() << std::endl; + std::cout << "numOccupied + free: " << numOccupied + getHeapFreeCount() << std::endl; + std::cout << "numInFrustum: " << m_hashParams.m_numOccupiedBlocks << std::endl; + + SAFE_DELETE_ARRAY(heapCPU); + SAFE_DELETE_ARRAY(hashCPU); + + //getchar(); +} + +HashParams SceneRep::_parametersFromConfig() { + //auto &cfg = ftl::config::resolve(config); + HashParams params; + // First camera view is set to identity pose to be at the centre of + // the virtual coordinate space. + params.m_rigidTransform.setIdentity(); + params.m_rigidTransformInverse.setIdentity(); + params.m_hashNumBuckets = value("hashNumBuckets", 100000); + params.m_hashBucketSize = HASH_BUCKET_SIZE; + params.m_hashMaxCollisionLinkedListSize = value("hashMaxCollisionLinkedListSize", 7); + params.m_SDFBlockSize = SDF_BLOCK_SIZE; + params.m_numSDFBlocks = value("hashNumSDFBlocks",500000); + params.m_virtualVoxelSize = value("SDFVoxelSize", 0.006f); + params.m_maxIntegrationDistance = value("SDFMaxIntegrationDistance", 10.0f); + params.m_truncation = value("SDFTruncation", 0.1f); + params.m_truncScale = value("SDFTruncationScale", 0.01f); + params.m_integrationWeightSample = value("SDFIntegrationWeightSample", 10); + params.m_integrationWeightMax = value("SDFIntegrationWeightMax", 255); + // Note (Nick): We are not streaming voxels in/out of GPU + //params.m_streamingVoxelExtents = MatrixConversion::toCUDA(gas.s_streamingVoxelExtents); + //params.m_streamingGridDimensions = MatrixConversion::toCUDA(gas.s_streamingGridDimensions); + //params.m_streamingMinGridPos = MatrixConversion::toCUDA(gas.s_streamingMinGridPos); + //params.m_streamingInitialChunkListSize = gas.s_streamingInitialChunkListSize; + return params; +} + +void SceneRep::_create(const HashParams& params) { + m_hashParams = params; + m_hashData.allocate(m_hashParams); + + reset(); +} + +void SceneRep::_destroy() { + m_hashData.free(); +} + +void SceneRep::_alloc(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, const unsigned int* d_bitMask) { + // NOTE (nick): We might want this later... + if (true) { + //allocate until all blocks are allocated + unsigned int prevFree = getHeapFreeCount(); + while (1) { + resetHashBucketMutexCUDA(m_hashData, m_hashParams); + allocCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams, d_bitMask); + + unsigned int currFree = getHeapFreeCount(); + + if (prevFree != currFree) { + prevFree = currFree; + } + else { + break; + } + } + } + else { + //this version is faster, but it doesn't guarantee that all blocks are allocated (staggers alloc to the next frame) + resetHashBucketMutexCUDA(m_hashData, m_hashParams); + allocCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams, d_bitMask); + } +} + + +void SceneRep::_compactifyVisible() { //const DepthCameraData& depthCameraData) { + m_hashParams.m_numOccupiedBlocks = ftl::cuda::compactifyVisible(m_hashData, m_hashParams); //this version uses atomics over prefix sums, which has a much better performance + m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU +} + +void SceneRep::_compactifyAllocated() { + m_hashParams.m_numOccupiedBlocks = ftl::cuda::compactifyAllocated(m_hashData, m_hashParams); //this version uses atomics over prefix sums, which has a much better performance + std::cout << "Occ blocks = " << m_hashParams.m_numOccupiedBlocks << std::endl; + m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU +} + +void SceneRep::_integrateDepthMap(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams) { + integrateDepthMapCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams); +} + +void SceneRep::_garbageCollect() { + garbageCollectIdentifyCUDA(m_hashData, m_hashParams); + resetHashBucketMutexCUDA(m_hashData, m_hashParams); //needed if linked lists are enabled -> for memeory deletion + garbageCollectFreeCUDA(m_hashData, m_hashParams); +}