Newer
Older
#include <ftl/voxel_scene.hpp>
#include "compactors.hpp"
#include "garbage.hpp"
#include "integrators.hpp"
#include "depth_camera_cuda.hpp"
#include <opencv2/core/cuda_stream_accessor.hpp>
using namespace ftl::voxhash;
using ftl::rgbd::Source;
using ftl::Configurable;
using cv::Mat;
#define SAFE_DELETE_ARRAY(a) { delete [] (a); (a) = NULL; }
extern "C" void resetCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
extern "C" void resetHashBucketMutexCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t);
extern "C" void allocCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, int camid, const DepthCameraParams &depthCameraParams, cudaStream_t);
//extern "C" void fillDecisionArrayCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData);
//extern "C" void compactifyHashCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
//extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
//extern "C" void integrateDepthMapCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t);
//extern "C" void bindInputDepthColorTextures(const DepthCameraData& depthCameraData);
SceneRep::SceneRep(nlohmann::json &config) : Configurable(config), m_frameCount(0), do_reset_(false) {
// Allocates voxel structure on GPU
_create(_parametersFromConfig());
on("SDFVoxelSize", [this](const ftl::config::Event &e) {
do_reset_ = true;
});
on("hashNumSDFBlocks", [this](const ftl::config::Event &e) {
do_reset_ = true;
});
on("hashNumBuckets", [this](const ftl::config::Event &e) {
do_reset_ = true;
});
on("hashMaxCollisionLinkedListSize", [this](const ftl::config::Event &e) {
do_reset_ = true;
});
on("SDFTruncation", [this](const ftl::config::Event &e) {
m_hashParams.m_truncation = value("SDFTruncation", 0.1f);
});
on("SDFTruncationScale", [this](const ftl::config::Event &e) {
m_hashParams.m_truncScale = value("SDFTruncationScale", 0.01f);
});
on("SDFMaxIntegrationDistance", [this](const ftl::config::Event &e) {
m_hashParams.m_maxIntegrationDistance = value("SDFMaxIntegrationDistance", 10.0f);
});
on("showRegistration", [this](const ftl::config::Event &e) {
reg_mode_ = value("showRegistration", false);
});
reg_mode_ = value("showRegistration", false);
cudaSafeCall(cudaStreamCreate(&integ_stream_));
//integ_stream_ = 0;
}
SceneRep::~SceneRep() {
_destroy();
cudaStreamDestroy(integ_stream_);
bool SceneRep::_initCUDA() {
// Do an initial CUDA check
int cuda_device_count = 0;
cudaSafeCall(cudaGetDeviceCount(&cuda_device_count));
CHECK_GE(cuda_device_count, 1) << "No CUDA devices found";
LOG(INFO) << "CUDA Devices (" << cuda_device_count << "):";
vector<cudaDeviceProp> properties(cuda_device_count);
for (int i=0; i<cuda_device_count; i++) {
cudaSafeCall(cudaGetDeviceProperties(&properties[i], i));
LOG(INFO) << " - " << properties[i].name;
}
int desired_device = value("cudaDevice", 0);
cuda_device_ = (desired_device < cuda_device_count) ? desired_device : cuda_device_count-1;
cudaSafeCall(cudaSetDevice(cuda_device_));
// TODO:(Nick) Check memory is sufficient
// TODO:(Nick) Find out what our compute capability should be.
LOG(INFO) << "CUDA Compute: " << properties[cuda_device_].major << "." << properties[cuda_device_].minor;
void SceneRep::addSource(ftl::rgbd::Source *src) {
auto &cam = cameras_.emplace_back();
cam.source = src;
cam.params.m_imageWidth = 0;
}
extern "C" void updateCUDACameraConstant(ftl::voxhash::DepthCameraCUDA *data, int count);
void SceneRep::_updateCameraConstant() {
std::vector<ftl::voxhash::DepthCameraCUDA> cams(cameras_.size());
for (size_t i=0; i<cameras_.size(); ++i) {
cams[i] = cameras_[i].gpu.data;
cams[i].pose = MatrixConversion::toCUDA(cameras_[i].source->getPose().cast<float>());
cams[i].poseInverse = MatrixConversion::toCUDA(cameras_[i].source->getPose().cast<float>().inverse());
}
updateCUDACameraConstant(cams.data(), cams.size());
}
int SceneRep::upload() {
int active = 0;
for (size_t i=0; i<cameras_.size(); ++i) {
cameras_[i].source->grab();
}
for (size_t i=0; i<cameras_.size(); ++i) {
auto &cam = cameras_[i];
if (!cam.source->isReady()) {
cam.params.m_imageWidth = 0;
// TODO(Nick) : Free gpu allocs if was ready before
LOG(INFO) << "Source not ready: " << cam.source->getURI();
continue;
} else {
auto in = cam.source;
cam.params.fx = in->parameters().fx;
cam.params.fy = in->parameters().fy;
cam.params.mx = -in->parameters().cx;
cam.params.my = -in->parameters().cy;
// Only now do we have camera parameters for allocations...
if (cam.params.m_imageWidth == 0) {
cam.params.m_imageWidth = in->parameters().width;
cam.params.m_imageHeight = in->parameters().height;
cam.params.m_sensorDepthWorldMax = in->parameters().maxDepth;
cam.params.m_sensorDepthWorldMin = in->parameters().minDepth;
cam.gpu.alloc(cam.params, true);
LOG(INFO) << "GPU Allocated camera " << i;
cam.params.flags = m_frameCount;
}
_updateCameraConstant();
//cudaSafeCall(cudaDeviceSynchronize());
for (size_t i=0; i<cameras_.size(); ++i) {
auto &cam = cameras_[i];
// Get the RGB-Depth frame from input
Source *input = cam.source;
Mat rgb, depth;
// TODO(Nick) Direct GPU upload to save copy
input->getFrames(rgb,depth);
active += 1;
if (depth.cols == 0) continue;
// Must be in RGBA for GPU
Mat rgbt, rgba;
cv::cvtColor(rgb,rgbt, cv::COLOR_BGR2Lab);
cv::cvtColor(rgbt,rgba, cv::COLOR_BGR2BGRA);
// Send to GPU and merge view into scene
//cam.gpu.updateParams(cam.params);
cam.gpu.updateData(depth, rgba, cam.stream);
//setLastRigidTransform(input->getPose().cast<float>());
//make the rigid transform available on the GPU
//m_hashData.updateParams(m_hashParams, cv::cuda::StreamAccessor::getStream(cam.stream));
//if (i > 0) cudaSafeCall(cudaStreamSynchronize(cv::cuda::StreamAccessor::getStream(cameras_[i-1].stream)));
//allocate all hash blocks which are corresponding to depth map entries
if (value("voxels", false)) _alloc(i, cv::cuda::StreamAccessor::getStream(cam.stream));
// Calculate normals
// Must have finished all allocations and rendering before next integration
cudaSafeCall(cudaDeviceSynchronize());
return active;
}
void SceneRep::integrate() {
/*for (size_t i=0; i<cameras_.size(); ++i) {
auto &cam = cameras_[i];
setLastRigidTransform(cam.source->getPose().cast<float>());
//m_hashData.updateParams(m_hashParams);
//generate a linear hash array with only occupied entries
//volumetrically integrate the depth data into the depth SDFBlocks
//_integrateDepthMap(cam.gpu, cam.params);
}*/
_compactifyAllocated();
_integrateDepthMaps();
}
void SceneRep::garbage() {
if (value("voxels", false)) _garbageCollect();
//cudaSafeCall(cudaStreamSynchronize(integ_stream_));
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
}
/*void SceneRep::integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask) {
setLastRigidTransform(lastRigidTransform);
//make the rigid transform available on the GPU
m_hashData.updateParams(m_hashParams);
//allocate all hash blocks which are corresponding to depth map entries
_alloc(depthCameraData, depthCameraParams, d_bitMask);
//generate a linear hash array with only occupied entries
_compactifyHashEntries();
//volumetrically integrate the depth data into the depth SDFBlocks
_integrateDepthMap(depthCameraData, depthCameraParams);
_garbageCollect(depthCameraData);
m_numIntegratedFrames++;
}*/
/*void SceneRep::setLastRigidTransformAndCompactify(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData) {
setLastRigidTransform(lastRigidTransform);
_compactifyHashEntries();
}*/
/* Nick: To reduce weights between frames */
void SceneRep::nextFrame() {
if (do_reset_) {
do_reset_ = false;
_destroy();
_create(_parametersFromConfig());
} else {
//ftl::cuda::compactifyAllocated(m_hashData, m_hashParams, integ_stream_);
//if (reg_mode_) ftl::cuda::clearVoxels(m_hashData, m_hashParams);
//else ftl::cuda::starveVoxels(m_hashData, m_hashParams, integ_stream_);
m_numIntegratedFrames = 0;
}
}
//! resets the hash to the initial state (i.e., clears all data)
void SceneRep::reset() {
m_numIntegratedFrames = 0;
m_hashData.updateParams(m_hashParams);
resetCUDA(m_hashData, m_hashParams);
}
unsigned int SceneRep::getOccupiedCount() {
cudaSafeCall(cudaMemcpy(&count, m_hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
return count+1; //there is one more free than the address suggests (0 would be also a valid address)
}
HashParams SceneRep::_parametersFromConfig() {
//auto &cfg = ftl::config::resolve(config);
HashParams params;
// First camera view is set to identity pose to be at the centre of
// the virtual coordinate space.
params.m_hashNumBuckets = value("hashNumBuckets", 100000);
params.m_virtualVoxelSize = value("SDFVoxelSize", 0.006f);
params.m_maxIntegrationDistance = value("SDFMaxIntegrationDistance", 10.0f);
params.m_truncation = value("SDFTruncation", 0.1f);
params.m_truncScale = value("SDFTruncationScale", 0.01f);
params.m_integrationWeightSample = value("SDFIntegrationWeightSample", 10);
params.m_integrationWeightMax = value("SDFIntegrationWeightMax", 255);
params.m_spatialSmoothing = value("spatialSmoothing", 0.04f); // 4cm
params.m_colourSmoothing = value("colourSmoothing", 20.0f);
params.m_confidenceThresh = value("confidenceThreshold", 20.0f);
params.m_flags = 0;
params.m_flags |= (value("clipping", false)) ? ftl::voxhash::kFlagClipping : 0;
params.m_flags |= (value("mls", false)) ? ftl::voxhash::kFlagMLS : 0;
params.m_maxBounds = make_int3(
value("bbox_x_max", 2.0f) / (params.m_virtualVoxelSize*SDF_BLOCK_SIZE),
value("bbox_y_max", 2.0f) / (params.m_virtualVoxelSize*SDF_BLOCK_SIZE),
value("bbox_z_max", 2.0f) / (params.m_virtualVoxelSize*SDF_BLOCK_SIZE));
params.m_minBounds = make_int3(
value("bbox_x_min", -2.0f) / (params.m_virtualVoxelSize*SDF_BLOCK_SIZE),
value("bbox_y_min", -2.0f) / (params.m_virtualVoxelSize*SDF_BLOCK_SIZE),
value("bbox_z_min", -2.0f) / (params.m_virtualVoxelSize*SDF_BLOCK_SIZE));
return params;
}
void SceneRep::_create(const HashParams& params) {
m_hashParams = params;
m_hashData.allocate(m_hashParams);
reset();
}
void SceneRep::_destroy() {
m_hashData.free();
}
void SceneRep::_alloc(int camid, cudaStream_t stream) {
// NOTE (nick): We might want this later...
// TODO(Nick) Make this work without memcpy to host first
allocate until all blocks are allocated
unsigned int prevFree = 0; //getHeapFreeCount();
resetHashBucketMutexCUDA(m_hashData, m_hashParams, stream);
allocCUDA(m_hashData, m_hashParams, camid, cameras_[camid].params, stream);
unsigned int currFree = getHeapFreeCount();
if (prevFree != currFree) {
prevFree = currFree;
}
else {
break;
}
}
}
//this version is faster, but it doesn't guarantee that all blocks are allocated (staggers alloc to the next frame)
resetHashBucketMutexCUDA(m_hashData, m_hashParams, stream);
allocCUDA(m_hashData, m_hashParams, camid, cameras_[camid].params, stream);
//}
void SceneRep::_compactifyVisible(const DepthCameraParams &camera) { //const DepthCameraData& depthCameraData) {
ftl::cuda::compactifyOccupied(m_hashData, m_hashParams, integ_stream_); //this version uses atomics over prefix sums, which has a much better performance
//m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU
}
void SceneRep::_compactifyAllocated() {
ftl::cuda::compactifyAllocated(m_hashData, m_hashParams, integ_stream_); //this version uses atomics over prefix sums, which has a much better performance
//std::cout << "Occ blocks = " << m_hashParams.m_numOccupiedBlocks << std::endl;
//m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU
/*void SceneRep::_integrateDepthMap(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams) {
if (!reg_mode_) ftl::cuda::integrateDepthMap(m_hashData, m_hashParams, depthCameraData, depthCameraParams, integ_stream_);
else ftl::cuda::integrateRegistration(m_hashData, m_hashParams, depthCameraData, depthCameraParams, integ_stream_);
extern "C" void bilateralFilterFloatMap(float* d_output, float* d_input, float sigmaD, float sigmaR, unsigned int width, unsigned int height);
//cudaSafeCall(cudaDeviceSynchronize());
for (size_t i=0; i<cameras_.size(); ++i) {
if (!cameras_[i].source->isReady()) continue;
//ftl::cuda::clear_depth(*(cameras_[i].gpu.depth2_tex_), integ_stream_);
ftl::cuda::clear_points(*(cameras_[i].gpu.points_tex_), integ_stream_);
ftl::cuda::mls_smooth(*(cameras_[i].gpu.points_tex_), m_hashParams, cameras_.size(), i, integ_stream_);
//ftl::cuda::int_to_float(*(cameras_[i].gpu.depth2_tex_), *(cameras_[i].gpu.depth_tex_), 1.0f / 1000.0f, integ_stream_);
//ftl::cuda::hole_fill(*(cameras_[i].gpu.depth2_tex_), *(cameras_[i].gpu.depth_tex_), cameras_[i].params, integ_stream_);
//bilateralFilterFloatMap(cameras_[i].gpu.depth_tex_->devicePtr(), cameras_[i].gpu.depth3_tex_->devicePtr(), 3, 7, cameras_[i].gpu.depth_tex_->width(), cameras_[i].gpu.depth_tex_->height());
}
if (value("voxels", false)) ftl::cuda::integrateDepthMaps(m_hashData, m_hashParams, cameras_.size(), integ_stream_);
}
void SceneRep::_garbageCollect() {
//ftl::cuda::garbageCollectIdentify(m_hashData, m_hashParams, integ_stream_);
resetHashBucketMutexCUDA(m_hashData, m_hashParams, integ_stream_); //needed if linked lists are enabled -> for memeory deletion
ftl::cuda::garbageCollectFree(m_hashData, m_hashParams, integ_stream_);