-
Nicolas Pope authoredNicolas Pope authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
voxel_scene.cpp 15.06 KiB
#include <ftl/voxel_scene.hpp>
#include "compactors.hpp"
#include "garbage.hpp"
#include "integrators.hpp"
#include <opencv2/core/cuda_stream_accessor.hpp>
using namespace ftl::voxhash;
using ftl::rgbd::Source;
using ftl::Configurable;
using cv::Mat;
#define SAFE_DELETE_ARRAY(a) { delete [] (a); (a) = NULL; }
extern "C" void resetCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
extern "C" void resetHashBucketMutexCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t);
extern "C" void allocCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t);
//extern "C" void fillDecisionArrayCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData);
//extern "C" void compactifyHashCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
//extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
extern "C" void integrateDepthMapCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t);
//extern "C" void bindInputDepthColorTextures(const DepthCameraData& depthCameraData);
SceneRep::SceneRep(nlohmann::json &config) : Configurable(config), do_reset_(false), m_frameCount(0) {
// Allocates voxel structure on GPU
_create(_parametersFromConfig());
on("SDFVoxelSize", [this](const ftl::config::Event &e) {
do_reset_ = true;
});
on("hashNumSDFBlocks", [this](const ftl::config::Event &e) {
do_reset_ = true;
});
on("hashNumBuckets", [this](const ftl::config::Event &e) {
do_reset_ = true;
});
on("hashMaxCollisionLinkedListSize", [this](const ftl::config::Event &e) {
do_reset_ = true;
});
on("SDFTruncation", [this](const ftl::config::Event &e) {
m_hashParams.m_truncation = value("SDFTruncation", 0.1f);
});
on("SDFTruncationScale", [this](const ftl::config::Event &e) {
m_hashParams.m_truncScale = value("SDFTruncationScale", 0.01f);
});
on("SDFMaxIntegrationDistance", [this](const ftl::config::Event &e) {
m_hashParams.m_maxIntegrationDistance = value("SDFMaxIntegrationDistance", 10.0f);
});
on("showRegistration", [this](const ftl::config::Event &e) {
reg_mode_ = value("showRegistration", false);
});
reg_mode_ = value("showRegistration", false);
cudaSafeCall(cudaStreamCreate(&integ_stream_));
//integ_stream_ = 0;
}
SceneRep::~SceneRep() {
_destroy();
cudaStreamDestroy(integ_stream_);
}
void SceneRep::addSource(ftl::rgbd::Source *src) {
auto &cam = cameras_.emplace_back();
cam.source = src;
cam.params.m_imageWidth = 0;
}
int SceneRep::upload() {
int active = 0;
for (size_t i=0; i<cameras_.size(); ++i) {
cameras_[i].source->grab();
}
for (size_t i=0; i<cameras_.size(); ++i) {
auto &cam = cameras_[i];
if (!cam.source->isReady()) {
cam.params.m_imageWidth = 0;
// TODO(Nick) : Free gpu allocs if was ready before
continue;
} else {
auto in = cam.source;
cam.params.fx = in->parameters().fx;
cam.params.fy = in->parameters().fy;
cam.params.mx = -in->parameters().cx;
cam.params.my = -in->parameters().cy;
// Only now do we have camera parameters for allocations...
if (cam.params.m_imageWidth == 0) {
cam.params.m_imageWidth = in->parameters().width;
cam.params.m_imageHeight = in->parameters().height;
cam.params.m_sensorDepthWorldMax = in->parameters().maxDepth;
cam.params.m_sensorDepthWorldMin = in->parameters().minDepth;
cam.gpu.alloc(cam.params);
}
}
// Get the RGB-Depth frame from input
Source *input = cam.source;
Mat rgb, depth;
// TODO(Nick) Direct GPU upload to save copy
input->getFrames(rgb,depth);
active += 1;
if (depth.cols == 0) continue;
// Must be in RGBA for GPU
Mat rgba;
cv::cvtColor(rgb,rgba, cv::COLOR_BGR2BGRA);
cam.params.flags = m_frameCount;
// Send to GPU and merge view into scene
//cam.gpu.updateParams(cam.params);
cam.gpu.updateData(depth, rgba, cam.stream);
setLastRigidTransform(input->getPose().cast<float>());
//make the rigid transform available on the GPU
//m_hashData.updateParams(m_hashParams, cv::cuda::StreamAccessor::getStream(cam.stream));
//if (i > 0) cudaSafeCall(cudaStreamSynchronize(cv::cuda::StreamAccessor::getStream(cameras_[i-1].stream)));
//allocate all hash blocks which are corresponding to depth map entries
_alloc(cam.gpu, cam.params, cv::cuda::StreamAccessor::getStream(cam.stream));
}
// Must have finished all allocations and rendering before next integration
cudaSafeCall(cudaDeviceSynchronize());
return active;
}
void SceneRep::integrate() {
for (size_t i=0; i<cameras_.size(); ++i) {
auto &cam = cameras_[i];
setLastRigidTransform(cam.source->getPose().cast<float>());
//m_hashData.updateParams(m_hashParams);
//generate a linear hash array with only occupied entries
_compactifyVisible(cam.params);
//volumetrically integrate the depth data into the depth SDFBlocks
_integrateDepthMap(cam.gpu, cam.params);
//_garbageCollect();
m_numIntegratedFrames++;
}
}
void SceneRep::garbage() {
_compactifyAllocated();
_garbageCollect();
//cudaSafeCall(cudaStreamSynchronize(integ_stream_));
}
/*void SceneRep::integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask) {
setLastRigidTransform(lastRigidTransform);
//make the rigid transform available on the GPU
m_hashData.updateParams(m_hashParams);
//allocate all hash blocks which are corresponding to depth map entries
_alloc(depthCameraData, depthCameraParams, d_bitMask);
//generate a linear hash array with only occupied entries
_compactifyHashEntries();
//volumetrically integrate the depth data into the depth SDFBlocks
_integrateDepthMap(depthCameraData, depthCameraParams);
_garbageCollect(depthCameraData);
m_numIntegratedFrames++;
}*/
void SceneRep::setLastRigidTransform(const Eigen::Matrix4f& lastRigidTransform) {
m_hashParams.m_rigidTransform = MatrixConversion::toCUDA(lastRigidTransform);
m_hashParams.m_rigidTransformInverse = MatrixConversion::toCUDA(lastRigidTransform.inverse()); //m_hashParams.m_rigidTransform.getInverse();
}
/*void SceneRep::setLastRigidTransformAndCompactify(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData) {
setLastRigidTransform(lastRigidTransform);
_compactifyHashEntries();
}*/
const Eigen::Matrix4f SceneRep::getLastRigidTransform() const {
return MatrixConversion::toEigen(m_hashParams.m_rigidTransform);
}
/* Nick: To reduce weights between frames */
void SceneRep::nextFrame() {
if (do_reset_) {
do_reset_ = false;
_destroy();
_create(_parametersFromConfig());
} else {
//ftl::cuda::compactifyAllocated(m_hashData, m_hashParams, integ_stream_);
if (reg_mode_) ftl::cuda::clearVoxels(m_hashData, m_hashParams);
else ftl::cuda::starveVoxels(m_hashData, m_hashParams);
m_numIntegratedFrames = 0;
}
}
//! resets the hash to the initial state (i.e., clears all data)
void SceneRep::reset() {
m_numIntegratedFrames = 0;
m_hashParams.m_rigidTransform.setIdentity();
m_hashParams.m_rigidTransformInverse.setIdentity();
m_hashParams.m_numOccupiedBlocks = 0;
m_hashData.updateParams(m_hashParams);
resetCUDA(m_hashData, m_hashParams);
}
//! debug only!
unsigned int SceneRep::getHeapFreeCount() {
unsigned int count;
cudaSafeCall(cudaMemcpy(&count, m_hashData.d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
return count+1; //there is one more free than the address suggests (0 would be also a valid address)
}
//! debug only!
void SceneRep::debugHash() {
HashEntry* hashCPU = new HashEntry[m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets];
unsigned int* heapCPU = new unsigned int[m_hashParams.m_numSDFBlocks];
unsigned int heapCounterCPU;
cudaSafeCall(cudaMemcpy(&heapCounterCPU, m_hashData.d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
heapCounterCPU++; //points to the first free entry: number of blocks is one more
cudaSafeCall(cudaMemcpy(heapCPU, m_hashData.d_heap, sizeof(unsigned int)*m_hashParams.m_numSDFBlocks, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashCPU, m_hashData.d_hash, sizeof(HashEntry)*m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets, cudaMemcpyDeviceToHost));
//Check for duplicates
class myint3Voxel {
public:
myint3Voxel() {}
~myint3Voxel() {}
bool operator<(const myint3Voxel& other) const {
if (x == other.x) {
if (y == other.y) {
return z < other.z;
}
return y < other.y;
}
return x < other.x;
}
bool operator==(const myint3Voxel& other) const {
return x == other.x && y == other.y && z == other.z;
}
int x,y,z, i;
int offset;
int ptr;
};
std::unordered_set<unsigned int> pointersFreeHash;
std::vector<int> pointersFreeVec(m_hashParams.m_numSDFBlocks, 0); // CHECK Nick Changed to int from unsigned in
for (unsigned int i = 0; i < heapCounterCPU; i++) {
pointersFreeHash.insert(heapCPU[i]);
pointersFreeVec[heapCPU[i]] = FREE_ENTRY;
}
if (pointersFreeHash.size() != heapCounterCPU) {
throw std::runtime_error("ERROR: duplicate free pointers in heap array");
}
unsigned int numOccupied = 0;
unsigned int numMinusOne = 0;
//unsigned int listOverallFound = 0;
std::list<myint3Voxel> l;
//std::vector<myint3Voxel> v;
for (unsigned int i = 0; i < m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets; i++) {
if (hashCPU[i].ptr == -1) {
numMinusOne++;
}
if (hashCPU[i].ptr != -2) {
numOccupied++; // != FREE_ENTRY
myint3Voxel a;
a.x = hashCPU[i].pos.x;
a.y = hashCPU[i].pos.y;
a.z = hashCPU[i].pos.z;
l.push_back(a);
//v.push_back(a);
unsigned int linearBlockSize = m_hashParams.m_SDFBlockSize*m_hashParams.m_SDFBlockSize*m_hashParams.m_SDFBlockSize;
if (pointersFreeHash.find(hashCPU[i].ptr / linearBlockSize) != pointersFreeHash.end()) {
throw std::runtime_error("ERROR: ptr is on free heap, but also marked as an allocated entry");
}
pointersFreeVec[hashCPU[i].ptr / linearBlockSize] = LOCK_ENTRY;
}
}
unsigned int numHeapFree = 0;
unsigned int numHeapOccupied = 0;
for (unsigned int i = 0; i < m_hashParams.m_numSDFBlocks; i++) {
if (pointersFreeVec[i] == FREE_ENTRY) numHeapFree++;
else if (pointersFreeVec[i] == LOCK_ENTRY) numHeapOccupied++;
else {
throw std::runtime_error("memory leak detected: neither free nor allocated");
}
}
if (numHeapFree + numHeapOccupied == m_hashParams.m_numSDFBlocks) std::cout << "HEAP OK!" << std::endl;
else throw std::runtime_error("HEAP CORRUPTED");
l.sort();
size_t sizeBefore = l.size();
l.unique();
size_t sizeAfter = l.size();
std::cout << "diff: " << sizeBefore - sizeAfter << std::endl;
std::cout << "minOne: " << numMinusOne << std::endl;
std::cout << "numOccupied: " << numOccupied << "\t numFree: " << getHeapFreeCount() << std::endl;
std::cout << "numOccupied + free: " << numOccupied + getHeapFreeCount() << std::endl;
std::cout << "numInFrustum: " << m_hashParams.m_numOccupiedBlocks << std::endl;
SAFE_DELETE_ARRAY(heapCPU);
SAFE_DELETE_ARRAY(hashCPU);
//getchar();
}
HashParams SceneRep::_parametersFromConfig() {
//auto &cfg = ftl::config::resolve(config);
HashParams params;
// First camera view is set to identity pose to be at the centre of
// the virtual coordinate space.
params.m_rigidTransform.setIdentity();
params.m_rigidTransformInverse.setIdentity();
params.m_hashNumBuckets = value("hashNumBuckets", 100000);
params.m_hashBucketSize = HASH_BUCKET_SIZE;
params.m_hashMaxCollisionLinkedListSize = value("hashMaxCollisionLinkedListSize", 7);
params.m_SDFBlockSize = SDF_BLOCK_SIZE;
params.m_numSDFBlocks = value("hashNumSDFBlocks",500000);
params.m_virtualVoxelSize = value("SDFVoxelSize", 0.006f);
params.m_maxIntegrationDistance = value("SDFMaxIntegrationDistance", 10.0f);
params.m_truncation = value("SDFTruncation", 0.1f);
params.m_truncScale = value("SDFTruncationScale", 0.01f);
params.m_integrationWeightSample = value("SDFIntegrationWeightSample", 10);
params.m_integrationWeightMax = value("SDFIntegrationWeightMax", 255);
// Note (Nick): We are not streaming voxels in/out of GPU
//params.m_streamingVoxelExtents = MatrixConversion::toCUDA(gas.s_streamingVoxelExtents);
//params.m_streamingGridDimensions = MatrixConversion::toCUDA(gas.s_streamingGridDimensions);
//params.m_streamingMinGridPos = MatrixConversion::toCUDA(gas.s_streamingMinGridPos);
//params.m_streamingInitialChunkListSize = gas.s_streamingInitialChunkListSize;
return params;
}
void SceneRep::_create(const HashParams& params) {
m_hashParams = params;
m_hashData.allocate(m_hashParams);
reset();
}
void SceneRep::_destroy() {
m_hashData.free();
}
void SceneRep::_alloc(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t stream) {
// NOTE (nick): We might want this later...
if (false) {
// TODO(Nick) Make this work without memcpy to host first
//allocate until all blocks are allocated
unsigned int prevFree = getHeapFreeCount();
while (1) {
resetHashBucketMutexCUDA(m_hashData, m_hashParams, stream);
allocCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams, stream);
unsigned int currFree = getHeapFreeCount();
if (prevFree != currFree) {
prevFree = currFree;
}
else {
break;
}
}
}
else {
//this version is faster, but it doesn't guarantee that all blocks are allocated (staggers alloc to the next frame)
resetHashBucketMutexCUDA(m_hashData, m_hashParams, stream);
allocCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams, stream);
}
}
void SceneRep::_compactifyVisible(const DepthCameraParams &camera) { //const DepthCameraData& depthCameraData) {
ftl::cuda::compactifyVisible(m_hashData, m_hashParams, camera, integ_stream_); //this version uses atomics over prefix sums, which has a much better performance
//m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU
}
void SceneRep::_compactifyAllocated() {
ftl::cuda::compactifyAllocated(m_hashData, m_hashParams, integ_stream_); //this version uses atomics over prefix sums, which has a much better performance
//std::cout << "Occ blocks = " << m_hashParams.m_numOccupiedBlocks << std::endl;
//m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU
}
void SceneRep::_integrateDepthMap(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams) {
if (!reg_mode_) ftl::cuda::integrateDepthMap(m_hashData, m_hashParams, depthCameraData, depthCameraParams, integ_stream_);
else ftl::cuda::integrateRegistration(m_hashData, m_hashParams, depthCameraData, depthCameraParams, integ_stream_);
}
void SceneRep::_garbageCollect() {
ftl::cuda::garbageCollectIdentify(m_hashData, m_hashParams, integ_stream_);
resetHashBucketMutexCUDA(m_hashData, m_hashParams, integ_stream_); //needed if linked lists are enabled -> for memeory deletion
ftl::cuda::garbageCollectFree(m_hashData, m_hashParams, integ_stream_);
}