diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt index 0b4b0384b668c254d17c3513f2262c8036b4af49..4fa1cba34d8cbb0aa62c74f68cece41432f9e297 100644 --- a/applications/reconstruct/CMakeLists.txt +++ b/applications/reconstruct/CMakeLists.txt @@ -7,6 +7,8 @@ set(REPSRC src/voxel_scene.cpp src/scene_rep_hash_sdf.cu src/compactors.cu + src/garbage.cu + src/integrators.cu src/ray_cast_sdf.cu src/camera_util.cu src/ray_cast_sdf.cpp diff --git a/applications/reconstruct/include/ftl/depth_camera.hpp b/applications/reconstruct/include/ftl/depth_camera.hpp index e521af34fae2c5cf93aba8ebe3d5bd726d14d8c1..641a9c251c7518f8b8bbe486ca2af32e008557d8 100644 --- a/applications/reconstruct/include/ftl/depth_camera.hpp +++ b/applications/reconstruct/include/ftl/depth_camera.hpp @@ -13,8 +13,8 @@ #include <ftl/depth_camera_params.hpp> -extern "C" void updateConstantDepthCameraParams(const DepthCameraParams& params); -extern __constant__ DepthCameraParams c_depthCameraParams; +//extern "C" void updateConstantDepthCameraParams(const DepthCameraParams& params); +//extern __constant__ DepthCameraParams c_depthCameraParams; struct DepthCameraData { @@ -38,15 +38,6 @@ struct DepthCameraData { __host__ void alloc(const DepthCameraParams& params) { //! todo resizing??? - /*cudaSafeCall(cudaMalloc(&d_depthData, sizeof(float) * params.m_imageWidth * params.m_imageHeight)); - cudaSafeCall(cudaMalloc(&d_colorData, sizeof(float4) * params.m_imageWidth * params.m_imageHeight)); - - h_depthChannelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); - cudaSafeCall(cudaMallocArray(&d_depthArray, &h_depthChannelDesc, params.m_imageWidth, params.m_imageHeight)); - h_colorChannelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat); - cudaSafeCall(cudaMallocArray(&d_colorArray, &h_colorChannelDesc, params.m_imageWidth, params.m_imageHeight));*/ - - std::cout << "Create texture objects: " << params.m_imageWidth << "," << params.m_imageHeight << std::endl; depth_mat_ = new cv::cuda::GpuMat(params.m_imageHeight, params.m_imageWidth, CV_32FC1); colour_mat_ = new cv::cuda::GpuMat(params.m_imageHeight, params.m_imageWidth, CV_8UC4); depth_tex_ = new ftl::cuda::TextureObject<float>((cv::cuda::PtrStepSz<float>)*depth_mat_); @@ -55,29 +46,19 @@ struct DepthCameraData { colour_obj_ = colour_tex_->cudaTexture(); } - __host__ - void updateParams(const DepthCameraParams& params) { - updateConstantDepthCameraParams(params); - } + //__host__ + //void updateParams(const DepthCameraParams& params) { + // updateConstantDepthCameraParams(params); + //} __host__ - void updateData(const cv::Mat &depth, const cv::Mat &rgb) { - depth_mat_->upload(depth); - colour_mat_->upload(rgb); + void updateData(const cv::Mat &depth, const cv::Mat &rgb, cv::cuda::Stream &stream) { + depth_mat_->upload(depth, stream); + colour_mat_->upload(rgb, stream); } __host__ void free() { - /*if (d_depthData) cudaSafeCall(cudaFree(d_depthData)); - if (d_colorData) cudaSafeCall(cudaFree(d_colorData)); - if (d_depthArray) cudaSafeCall(cudaFreeArray(d_depthArray)); - if (d_colorArray) cudaSafeCall(cudaFreeArray(d_colorArray));*/ - - /*d_depthData = NULL; - d_colorData = NULL; - d_depthArray = NULL; - d_colorArray = NULL;*/ - if (depth_mat_) delete depth_mat_; if (colour_mat_) delete colour_mat_; delete depth_tex_; @@ -85,109 +66,11 @@ struct DepthCameraData { } - ///////////////// - // Device part // - ///////////////// - - static inline const DepthCameraParams& params() { - return c_depthCameraParams; - } - - /////////////////////////////////////////////////////////////// - // Camera to Screen - /////////////////////////////////////////////////////////////// - - __device__ - static inline float2 cameraToKinectScreenFloat(const float3& pos) { - //return make_float2(pos.x*c_depthCameraParams.fx/pos.z + c_depthCameraParams.mx, c_depthCameraParams.my - pos.y*c_depthCameraParams.fy/pos.z); - return make_float2( - pos.x*c_depthCameraParams.fx/pos.z + c_depthCameraParams.mx, - pos.y*c_depthCameraParams.fy/pos.z + c_depthCameraParams.my); - } - - __device__ - static inline int2 cameraToKinectScreenInt(const float3& pos) { - float2 pImage = cameraToKinectScreenFloat(pos); - return make_int2(pImage + make_float2(0.5f, 0.5f)); - } - - __device__ - static inline uint2 cameraToKinectScreen(const float3& pos) { - int2 p = cameraToKinectScreenInt(pos); - return make_uint2(p.x, p.y); - } - - __device__ - static inline float cameraToKinectProjZ(float z) { - return (z - c_depthCameraParams.m_sensorDepthWorldMin)/(c_depthCameraParams.m_sensorDepthWorldMax - c_depthCameraParams.m_sensorDepthWorldMin); - } - - __device__ - static inline float3 cameraToKinectProj(const float3& pos) { - float2 proj = cameraToKinectScreenFloat(pos); - - float3 pImage = make_float3(proj.x, proj.y, pos.z); - - pImage.x = (2.0f*pImage.x - (c_depthCameraParams.m_imageWidth- 1.0f))/(c_depthCameraParams.m_imageWidth- 1.0f); - //pImage.y = (2.0f*pImage.y - (c_depthCameraParams.m_imageHeight-1.0f))/(c_depthCameraParams.m_imageHeight-1.0f); - pImage.y = ((c_depthCameraParams.m_imageHeight-1.0f) - 2.0f*pImage.y)/(c_depthCameraParams.m_imageHeight-1.0f); - pImage.z = cameraToKinectProjZ(pImage.z); - - return pImage; - } - - /////////////////////////////////////////////////////////////// - // Screen to Camera (depth in meters) - /////////////////////////////////////////////////////////////// - - __device__ - static inline float3 kinectDepthToSkeleton(uint ux, uint uy, float depth) { - const float x = ((float)ux-c_depthCameraParams.mx) / c_depthCameraParams.fx; - const float y = ((float)uy-c_depthCameraParams.my) / c_depthCameraParams.fy; - //const float y = (c_depthCameraParams.my-(float)uy) / c_depthCameraParams.fy; - return make_float3(depth*x, depth*y, depth); - } - - /////////////////////////////////////////////////////////////// - // RenderScreen to Camera -- ATTENTION ASSUMES [1,0]-Z range!!!! - /////////////////////////////////////////////////////////////// - - __device__ - static inline float kinectProjToCameraZ(float z) { - return z * (c_depthCameraParams.m_sensorDepthWorldMax - c_depthCameraParams.m_sensorDepthWorldMin) + c_depthCameraParams.m_sensorDepthWorldMin; - } - - // z has to be in [0, 1] - __device__ - static inline float3 kinectProjToCamera(uint ux, uint uy, float z) { - float fSkeletonZ = kinectProjToCameraZ(z); - return kinectDepthToSkeleton(ux, uy, fSkeletonZ); - } - - __device__ - static inline bool isInCameraFrustumApprox(const float4x4& viewMatrixInverse, const float3& pos) { - float3 pCamera = viewMatrixInverse * pos; - float3 pProj = cameraToKinectProj(pCamera); - //pProj *= 1.5f; //TODO THIS IS A HACK FIX IT :) - pProj *= 0.95; - return !(pProj.x < -1.0f || pProj.x > 1.0f || pProj.y < -1.0f || pProj.y > 1.0f || pProj.z < 0.0f || pProj.z > 1.0f); - } - - //float* d_depthData; //depth data of the current frame (in screen space):: TODO data allocation lives in RGBD Sensor - //float4* d_colorData; - - //uchar4* d_colorData; //color data of the current frame (in screen space):: TODO data allocation lives in RGBD Sensor - + // TODO(Nick) Should not need to pass all these pointers to device cv::cuda::GpuMat *depth_mat_; cv::cuda::GpuMat *colour_mat_; ftl::cuda::TextureObject<float> *depth_tex_; ftl::cuda::TextureObject<uchar4> *colour_tex_; cudaTextureObject_t depth_obj_; cudaTextureObject_t colour_obj_; - - // cuda arrays for texture access - /*cudaArray* d_depthArray; - cudaArray* d_colorArray; - cudaChannelFormatDesc h_depthChannelDesc; - cudaChannelFormatDesc h_colorChannelDesc;*/ }; diff --git a/applications/reconstruct/include/ftl/depth_camera_params.hpp b/applications/reconstruct/include/ftl/depth_camera_params.hpp index bf4263b9b74e05713ba22009c07da261b60dbe6b..4864fccbdc9fa52647687e885f955a12132b0c04 100644 --- a/applications/reconstruct/include/ftl/depth_camera_params.hpp +++ b/applications/reconstruct/include/ftl/depth_camera_params.hpp @@ -1,5 +1,7 @@ // From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/CUDADepthCameraParams.h +#pragma once + //#include <cutil_inline.h> //#include <cutil_math.h> #include <vector_types.h> @@ -20,4 +22,84 @@ struct __align__(16) DepthCameraParams { float m_sensorDepthWorldMin; float m_sensorDepthWorldMax; + + /////////////////////////////////////////////////////////////// + // Camera to Screen + /////////////////////////////////////////////////////////////// + + __device__ + inline float2 cameraToKinectScreenFloat(const float3& pos) const { + //return make_float2(pos.x*c_depthCameraParams.fx/pos.z + c_depthCameraParams.mx, c_depthCameraParams.my - pos.y*c_depthCameraParams.fy/pos.z); + return make_float2( + pos.x*fx/pos.z + mx, + pos.y*fy/pos.z + my); + } + + __device__ + inline int2 cameraToKinectScreenInt(const float3& pos) const { + float2 pImage = cameraToKinectScreenFloat(pos); + return make_int2(pImage + make_float2(0.5f, 0.5f)); + } + + __device__ + inline uint2 cameraToKinectScreen(const float3& pos) const { + int2 p = cameraToKinectScreenInt(pos); + return make_uint2(p.x, p.y); + } + + __device__ + inline float cameraToKinectProjZ(float z) const { + return (z - m_sensorDepthWorldMin)/(m_sensorDepthWorldMax - m_sensorDepthWorldMin); + } + + __device__ + inline float3 cameraToKinectProj(const float3& pos) const { + float2 proj = cameraToKinectScreenFloat(pos); + + float3 pImage = make_float3(proj.x, proj.y, pos.z); + + pImage.x = (2.0f*pImage.x - (m_imageWidth- 1.0f))/(m_imageWidth- 1.0f); + //pImage.y = (2.0f*pImage.y - (c_depthCameraParams.m_imageHeight-1.0f))/(c_depthCameraParams.m_imageHeight-1.0f); + pImage.y = ((m_imageHeight-1.0f) - 2.0f*pImage.y)/(m_imageHeight-1.0f); + pImage.z = cameraToKinectProjZ(pImage.z); + + return pImage; + } + + /////////////////////////////////////////////////////////////// + // Screen to Camera (depth in meters) + /////////////////////////////////////////////////////////////// + + __device__ + inline float3 kinectDepthToSkeleton(uint ux, uint uy, float depth) const { + const float x = ((float)ux-mx) / fx; + const float y = ((float)uy-my) / fy; + //const float y = (c_depthCameraParams.my-(float)uy) / c_depthCameraParams.fy; + return make_float3(depth*x, depth*y, depth); + } + + /////////////////////////////////////////////////////////////// + // RenderScreen to Camera -- ATTENTION ASSUMES [1,0]-Z range!!!! + /////////////////////////////////////////////////////////////// + + __device__ + inline float kinectProjToCameraZ(float z) const { + return z * (m_sensorDepthWorldMax - m_sensorDepthWorldMin) + m_sensorDepthWorldMin; + } + + // z has to be in [0, 1] + __device__ + inline float3 kinectProjToCamera(uint ux, uint uy, float z) const { + float fSkeletonZ = kinectProjToCameraZ(z); + return kinectDepthToSkeleton(ux, uy, fSkeletonZ); + } + + __device__ + inline bool isInCameraFrustumApprox(const float4x4& viewMatrixInverse, const float3& pos) const { + float3 pCamera = viewMatrixInverse * pos; + float3 pProj = cameraToKinectProj(pCamera); + //pProj *= 1.5f; //TODO THIS IS A HACK FIX IT :) + pProj *= 0.95; + return !(pProj.x < -1.0f || pProj.x > 1.0f || pProj.y < -1.0f || pProj.y > 1.0f || pProj.z < 0.0f || pProj.z > 1.0f); + } }; diff --git a/applications/reconstruct/include/ftl/ray_cast_params.hpp b/applications/reconstruct/include/ftl/ray_cast_params.hpp index 86ebfac586442861d48b7ed429774abcf03de32a..78242cf1fda6abae20cd82dc6cb00b9426ec86a6 100644 --- a/applications/reconstruct/include/ftl/ray_cast_params.hpp +++ b/applications/reconstruct/include/ftl/ray_cast_params.hpp @@ -1,6 +1,9 @@ +#pragma once + #include <ftl/cuda_util.hpp> #include <ftl/cuda_matrix_util.hpp> +#include <ftl/depth_camera_params.hpp> static const uint kShowBlockBorders = 0x0001; @@ -25,4 +28,6 @@ struct __align__(16) RayCastParams { bool m_useGradients; uint m_flags; + + DepthCameraParams camera; }; diff --git a/applications/reconstruct/include/ftl/ray_cast_sdf.hpp b/applications/reconstruct/include/ftl/ray_cast_sdf.hpp index 06e5f2e59f4365ae422b6517ad2d406d35d5c086..1fb9cc074a39262d25df69998e0a68e6500508e7 100644 --- a/applications/reconstruct/include/ftl/ray_cast_sdf.hpp +++ b/applications/reconstruct/include/ftl/ray_cast_sdf.hpp @@ -74,7 +74,7 @@ public: return params; } - void render(ftl::voxhash::HashData& hashData, ftl::voxhash::HashParams& hashParams, const DepthCameraParams& cameraParams, const Eigen::Matrix4f& lastRigidTransform); + void render(ftl::voxhash::HashData& hashData, ftl::voxhash::HashParams& hashParams, const DepthCameraParams& cameraParams, const Eigen::Matrix4f& lastRigidTransform, cudaStream_t); const RayCastData& getRayCastData(void) { return m_data; @@ -92,7 +92,7 @@ private: void create(const RayCastParams& params); void destroy(void); - void compactifyHashEntries(ftl::voxhash::HashData& hashData, ftl::voxhash::HashParams& hashParams); + void compactifyHashEntries(ftl::voxhash::HashData& hashData, ftl::voxhash::HashParams& hashParams, cudaStream_t); void rayIntervalSplatting(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const Eigen::Matrix4f& lastRigidTransform); // rasterize diff --git a/applications/reconstruct/include/ftl/ray_cast_util.hpp b/applications/reconstruct/include/ftl/ray_cast_util.hpp index f3b4a5ec8659073778e667b3ad5edc4366197447..fdf932a5c6d9342768f7f45e2609e2df3c7539ce 100644 --- a/applications/reconstruct/include/ftl/ray_cast_util.hpp +++ b/applications/reconstruct/include/ftl/ray_cast_util.hpp @@ -54,15 +54,15 @@ struct RayCastData { //printf("Allocate ray cast data: %lld \n", (unsigned long long)point_cloud_); } - __host__ - void updateParams(const RayCastParams& params) { - updateConstantRayCastParams(params); - } + //__host__ + //void updateParams(const RayCastParams& params) { + // updateConstantRayCastParams(params); + //} - __host__ void download(int *depth, uchar3 *colours, const RayCastParams& params) const { + __host__ void download(int *depth, uchar3 *colours, const RayCastParams& params, cudaStream_t stream) const { //printf("Download: %d,%d\n", params.m_width, params.m_height); - if (depth) cudaSafeCall(cudaMemcpy(depth, d_depth_i, sizeof(int) * params.m_width * params.m_height, cudaMemcpyDeviceToHost)); - if (colours) cudaSafeCall(cudaMemcpy(colours, d_colors, sizeof(uchar3) * params.m_width * params.m_height, cudaMemcpyDeviceToHost)); + if (depth) cudaSafeCall(cudaMemcpyAsync(depth, d_depth_i, sizeof(int) * params.m_width * params.m_height, cudaMemcpyDeviceToHost, stream)); + if (colours) cudaSafeCall(cudaMemcpyAsync(colours, d_colors, sizeof(uchar3) * params.m_width * params.m_height, cudaMemcpyDeviceToHost, stream)); } __host__ @@ -196,9 +196,9 @@ struct RayCastData { } __device__ - void traverseCoarseGridSimpleSampleAll(const ftl::voxhash::HashData& hash, const float3& worldCamPos, const float3& worldDir, const float3& camDir, const int3& dTid, float minInterval, float maxInterval) const + void traverseCoarseGridSimpleSampleAll(const ftl::voxhash::HashData& hash, const RayCastParams& rayCastParams, const float3& worldCamPos, const float3& worldDir, const float3& camDir, const int3& dTid, float minInterval, float maxInterval) const { - const RayCastParams& rayCastParams = c_rayCastParams; + //const RayCastParams& rayCastParams = c_rayCastParams; // Last Sample RayCastSample lastSample; lastSample.sdf = 0.0f; lastSample.alpha = 0.0f; lastSample.weight = 0; // lastSample.color = int3(0, 0, 0); @@ -236,7 +236,7 @@ struct RayCastData { float depth = alpha / depthToRayLength; // Convert ray length to depth depthToRayLength d_depth[dTid.y*rayCastParams.m_width+dTid.x] = depth; - d_depth3[dTid.y*rayCastParams.m_width+dTid.x] = DepthCameraData::kinectDepthToSkeleton(dTid.x, dTid.y, depth); + d_depth3[dTid.y*rayCastParams.m_width+dTid.x] = rayCastParams.camera.kinectDepthToSkeleton(dTid.x, dTid.y, depth); d_colors[dTid.y*rayCastParams.m_width+dTid.x] = make_uchar3(color2.x, color2.y, color2.z); if(rayCastParams.m_useGradients) diff --git a/applications/reconstruct/include/ftl/voxel_hash.hpp b/applications/reconstruct/include/ftl/voxel_hash.hpp index 77bc4a0a2b8349c574a60f5e8d17123022c80301..de8d0636ff0f3e7e149e9b4b6f5b1f7ea294d02f 100644 --- a/applications/reconstruct/include/ftl/voxel_hash.hpp +++ b/applications/reconstruct/include/ftl/voxel_hash.hpp @@ -318,10 +318,10 @@ struct HashData { } __device__ - bool isSDFBlockInCameraFrustumApprox(const int3& sdfBlock) { + bool isSDFBlockInCameraFrustumApprox(const HashParams &hashParams, const DepthCameraParams &camera, const int3& sdfBlock) { // NOTE (Nick): Changed, just assume all voxels are potentially in frustrum - //float3 posWorld = virtualVoxelPosToWorld(SDFBlockToVirtualVoxelPos(sdfBlock)) + c_hashParams.m_virtualVoxelSize * 0.5f * (SDF_BLOCK_SIZE - 1.0f); - //return DepthCameraData::isInCameraFrustumApprox(c_hashParams.m_rigidTransformInverse, posWorld); + //float3 posWorld = virtualVoxelPosToWorld(SDFBlockToVirtualVoxelPos(sdfBlock)) + hashParams.m_virtualVoxelSize * 0.5f * (SDF_BLOCK_SIZE - 1.0f); + //return camera.isInCameraFrustumApprox(hashParams.m_rigidTransformInverse, posWorld); return true; } diff --git a/applications/reconstruct/include/ftl/voxel_scene.hpp b/applications/reconstruct/include/ftl/voxel_scene.hpp index 913c38e62193d5af2e15bfec5a5c461df29aedfc..44cdf9750e3e1ffc2eb010720488fb63cc22ff00 100644 --- a/applications/reconstruct/include/ftl/voxel_scene.hpp +++ b/applications/reconstruct/include/ftl/voxel_scene.hpp @@ -4,6 +4,7 @@ #include <cuda_runtime.h> +#include <ftl/cuda_common.hpp> #include <ftl/rgbd/source.hpp> #include <ftl/configurable.hpp> #include <ftl/matrix_conversion.hpp> @@ -18,6 +19,7 @@ struct Cameras { ftl::rgbd::Source *source; DepthCameraData gpu; DepthCameraParams params; + cv::cuda::Stream stream; }; class SceneRep : public ftl::Configurable { @@ -75,13 +77,15 @@ class SceneRep : public ftl::Configurable { //! debug only! void debugHash(); + cudaStream_t getIntegrationStream() const { return integ_stream_; } + private: HashParams _parametersFromConfig(); void _create(const HashParams& params); void _destroy(); - void _alloc(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, const unsigned int* d_bitMask); - void _compactifyVisible(); + void _alloc(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t); + void _compactifyVisible(const DepthCameraParams &camera); void _compactifyAllocated(); void _integrateDepthMap(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams); void _garbageCollect(); @@ -96,6 +100,7 @@ class SceneRep : public ftl::Configurable { unsigned int m_frameCount; bool do_reset_; std::vector<Cameras> cameras_; + cudaStream_t integ_stream_; }; }; // namespace voxhash diff --git a/applications/reconstruct/src/camera_util.cu b/applications/reconstruct/src/camera_util.cu index a80d083e6320269e7f627e6b3d0bfea39a8f32fc..e4b434a05792bfa6fe62114118d7b2d40c69f845 100644 --- a/applications/reconstruct/src/camera_util.cu +++ b/applications/reconstruct/src/camera_util.cu @@ -383,7 +383,7 @@ extern "C" void setInvalidFloat4Map(float4* d_output, unsigned int width, unsign // Convert Depth to Camera Space Positions //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void convertDepthFloatToCameraSpaceFloat3Device(float3* d_output, float* d_input, float4x4 intrinsicsInv, unsigned int width, unsigned int height, DepthCameraData depthCameraData) +/*__global__ void convertDepthFloatToCameraSpaceFloat3Device(float3* d_output, float* d_input, float4x4 intrinsicsInv, unsigned int width, unsigned int height, DepthCameraData depthCameraData) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; @@ -413,7 +413,7 @@ extern "C" void convertDepthFloatToCameraSpaceFloat3(float3* d_output, float* d_ cudaSafeCall(cudaDeviceSynchronize()); //cutilCheckMsg(__FUNCTION__); #endif -} +}*/ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Bilateral Filter Float Map diff --git a/applications/reconstruct/src/compactors.cu b/applications/reconstruct/src/compactors.cu index 3f350ca6be27e080e2ea99f3a6a1225a5ed84521..b1eb1eab784fd555769b6a89146d2614202b99ac 100644 --- a/applications/reconstruct/src/compactors.cu +++ b/applications/reconstruct/src/compactors.cu @@ -63,14 +63,14 @@ using ftl::voxhash::FREE_ENTRY; #endif }*/ -__global__ void compactifyVisibleKernel(HashData hashData) +__global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams, DepthCameraParams camera) { - const HashParams& hashParams = c_hashParams; + //const HashParams& hashParams = c_hashParams; const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; #ifdef COMPACTIFY_HASH_SIMPLE if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { if (hashData.d_hash[idx].ptr != FREE_ENTRY) { - if (hashData.isSDFBlockInCameraFrustumApprox(hashData.d_hash[idx].pos)) + if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, camera, hashData.d_hash[idx].pos)) { int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1); hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; @@ -85,7 +85,7 @@ __global__ void compactifyVisibleKernel(HashData hashData) int addrLocal = -1; if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { if (hashData.d_hash[idx].ptr != FREE_ENTRY) { - if (hashData.isSDFBlockInCameraFrustumApprox(hashData.d_hash[idx].pos)) + if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, camera, hashData.d_hash[idx].pos)) { addrLocal = atomicAdd(&localCounter, 1); } @@ -107,21 +107,21 @@ __global__ void compactifyVisibleKernel(HashData hashData) #endif } -unsigned int ftl::cuda::compactifyVisible(HashData& hashData, const HashParams& hashParams) { +void ftl::cuda::compactifyVisible(HashData& hashData, const HashParams& hashParams, const DepthCameraParams &camera, cudaStream_t stream) { const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); const dim3 blockSize(threadsPerBlock, 1); - cudaSafeCall(cudaMemset(hashData.d_hashCompactifiedCounter, 0, sizeof(int))); - compactifyVisibleKernel << <gridSize, blockSize >> >(hashData); - unsigned int res = 0; - cudaSafeCall(cudaMemcpy(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); + cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int),stream)); + compactifyVisibleKernel << <gridSize, blockSize, 0, stream >> >(hashData, hashParams, camera); + //unsigned int res = 0; + //cudaSafeCall(cudaMemcpyAsync(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream)); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); //cutilCheckMsg(__FUNCTION__); #endif - return res; + //return res; } __global__ void compactifyAllocatedKernel(HashData hashData) @@ -162,19 +162,19 @@ __global__ void compactifyAllocatedKernel(HashData hashData) #endif } -unsigned int ftl::cuda::compactifyAllocated(HashData& hashData, const HashParams& hashParams) { +void ftl::cuda::compactifyAllocated(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); const dim3 blockSize(threadsPerBlock, 1); - cudaSafeCall(cudaMemset(hashData.d_hashCompactifiedCounter, 0, sizeof(int))); - compactifyAllocatedKernel << <gridSize, blockSize >> >(hashData); - unsigned int res = 0; - cudaSafeCall(cudaMemcpy(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); + cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int), stream)); + compactifyAllocatedKernel << <gridSize, blockSize, 0, stream >> >(hashData); + //unsigned int res = 0; + //cudaSafeCall(cudaMemcpyAsync(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream)); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); //cutilCheckMsg(__FUNCTION__); #endif - return res; + //return res; } diff --git a/applications/reconstruct/src/compactors.hpp b/applications/reconstruct/src/compactors.hpp index 9fb961d6c809b21d570175fc9b2e6fc506463fb8..a633cfd22d3ec593c6fb9e3be06977d514eacd84 100644 --- a/applications/reconstruct/src/compactors.hpp +++ b/applications/reconstruct/src/compactors.hpp @@ -7,10 +7,10 @@ namespace ftl { namespace cuda { // Compact visible -unsigned int compactifyVisible(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); +void compactifyVisible(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraParams &camera, cudaStream_t); // Compact allocated -unsigned int compactifyAllocated(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); +void compactifyAllocated(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t); // Compact visible surfaces diff --git a/applications/reconstruct/src/garbage.cu b/applications/reconstruct/src/garbage.cu new file mode 100644 index 0000000000000000000000000000000000000000..f0ba686e438bf2eb748a35d822bdf4d09b4811bb --- /dev/null +++ b/applications/reconstruct/src/garbage.cu @@ -0,0 +1,155 @@ +#include <ftl/voxel_hash.hpp> +#include "garbage.hpp" + +using namespace ftl::voxhash; + +#define T_PER_BLOCK 8 +#define NUM_CUDA_BLOCKS 10000 + +__global__ void starveVoxelsKernel(HashData hashData) { + + // Stride over all allocated blocks + for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) { + + const HashEntry& entry = hashData.d_hashCompactified[bi]; + + //is typically exectued only every n'th frame + int weight = hashData.d_SDFBlocks[entry.ptr + threadIdx.x].weight; + weight = max(0, weight-2); + hashData.d_SDFBlocks[entry.ptr + threadIdx.x].weight = weight; //CHECK Remove to totally clear previous frame (Nick) + + } +} + +void ftl::cuda::starveVoxels(HashData& hashData, const HashParams& hashParams) { + const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; + const dim3 gridSize(NUM_CUDA_BLOCKS, 1); + const dim3 blockSize(threadsPerBlock, 1); + + //if (hashParams.m_numOccupiedBlocks > 0) { + starveVoxelsKernel << <gridSize, blockSize >> >(hashData); + //} +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + //cutilCheckMsg(__FUNCTION__); +#endif +} + + +__shared__ float shared_MinSDF[SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE / 2]; +__shared__ uint shared_MaxWeight[SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE / 2]; + + +__global__ void garbageCollectIdentifyKernel(HashData hashData) { + + // Stride over all allocated blocks + for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) { + + const HashEntry& entry = hashData.d_hashCompactified[bi]; + + // Entire block was not touched in this frame, so remove (Nick) + /*if (entry.flags != cameraParams.flags & 0xFF) { + hashData.d_hashDecision[hashIdx] = 1; + return; + }*/ + + //uint h = hashData.computeHashPos(entry.pos); + //hashData.d_hashDecision[hashIdx] = 1; + //if (hashData.d_hashBucketMutex[h] == LOCK_ENTRY) return; + + //if (entry.ptr == FREE_ENTRY) return; //should never happen since we did compactify before + //const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE; + + const unsigned int idx0 = entry.ptr + 2*threadIdx.x+0; + const unsigned int idx1 = entry.ptr + 2*threadIdx.x+1; + + Voxel v0 = hashData.d_SDFBlocks[idx0]; + Voxel v1 = hashData.d_SDFBlocks[idx1]; + + if (v0.weight == 0) v0.sdf = PINF; + if (v1.weight == 0) v1.sdf = PINF; + + shared_MinSDF[threadIdx.x] = min(fabsf(v0.sdf), fabsf(v1.sdf)); //init shared memory + shared_MaxWeight[threadIdx.x] = max(v0.weight, v1.weight); + +#pragma unroll 1 + for (uint stride = 2; stride <= blockDim.x; stride <<= 1) { + __syncthreads(); + if ((threadIdx.x & (stride-1)) == (stride-1)) { + shared_MinSDF[threadIdx.x] = min(shared_MinSDF[threadIdx.x-stride/2], shared_MinSDF[threadIdx.x]); + shared_MaxWeight[threadIdx.x] = max(shared_MaxWeight[threadIdx.x-stride/2], shared_MaxWeight[threadIdx.x]); + } + } + + __syncthreads(); + + if (threadIdx.x == blockDim.x - 1) { + float minSDF = shared_MinSDF[threadIdx.x]; + uint maxWeight = shared_MaxWeight[threadIdx.x]; + + float t = hashData.getTruncation(5.0f); // NICK should not be hardcoded //MATTHIAS TODO check whether this is a reasonable metric + + if (minSDF >= t || maxWeight == 0) { + hashData.d_hashDecision[bi] = 1; + } else { + hashData.d_hashDecision[bi] = 0; + } + } + + } +} + +void ftl::cuda::garbageCollectIdentify(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { + + const unsigned int threadsPerBlock = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE / 2; + const dim3 gridSize(NUM_CUDA_BLOCKS, 1); + const dim3 blockSize(threadsPerBlock, 1); + + //if (hashParams.m_numOccupiedBlocks > 0) { + garbageCollectIdentifyKernel << <gridSize, blockSize, 0, stream >> >(hashData); + //} +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + //cutilCheckMsg(__FUNCTION__); +#endif +} + + +__global__ void garbageCollectFreeKernel(HashData hashData) { + + // Stride over all allocated blocks + for (int bi=blockIdx.x*blockDim.x + threadIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) { + + if (hashData.d_hashDecision[bi] != 0) { //decision to delete the hash entry + + const HashEntry& entry = hashData.d_hashCompactified[bi]; + //if (entry.ptr == FREE_ENTRY) return; //should never happen since we did compactify before + + if (hashData.deleteHashEntryElement(entry.pos)) { //delete hash entry from hash (and performs heap append) + const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE; + + #pragma unroll 1 + for (uint i = 0; i < linBlockSize; i++) { //clear sdf block: CHECK TODO another kernel? + hashData.deleteVoxel(entry.ptr + i); + } + } + } + + } +} + + +void ftl::cuda::garbageCollectFree(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { + + const unsigned int threadsPerBlock = T_PER_BLOCK*T_PER_BLOCK; + const dim3 gridSize(NUM_CUDA_BLOCKS, 1); // (hashParams.m_numOccupiedBlocks + threadsPerBlock - 1) / threadsPerBlock + const dim3 blockSize(threadsPerBlock, 1); + + //if (hashParams.m_numOccupiedBlocks > 0) { + garbageCollectFreeKernel << <gridSize, blockSize, 0, stream >> >(hashData); + //} +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + //cutilCheckMsg(__FUNCTION__); +#endif +} diff --git a/applications/reconstruct/src/garbage.hpp b/applications/reconstruct/src/garbage.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a7bdbc245e04a9b65bdf68c9a5cd0af2a39bb0db --- /dev/null +++ b/applications/reconstruct/src/garbage.hpp @@ -0,0 +1,14 @@ +#ifndef _FTL_RECONSTRUCTION_GARBAGE_HPP_ +#define _FTL_RECONSTRUCTION_GARBAGE_HPP_ + +namespace ftl { +namespace cuda { + +void starveVoxels(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); +void garbageCollectIdentify(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t stream); +void garbageCollectFree(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t stream); + +} +} + +#endif // _FTL_RECONSTRUCTION_GARBAGE_HPP_ diff --git a/applications/reconstruct/src/integrators.cu b/applications/reconstruct/src/integrators.cu new file mode 100644 index 0000000000000000000000000000000000000000..07f0834d43ee8ac6e44cc8e1b22cf4c99052ba2f --- /dev/null +++ b/applications/reconstruct/src/integrators.cu @@ -0,0 +1,191 @@ +#include "integrators.hpp" +//#include <ftl/ray_cast_params.hpp> +#include <vector_types.h> +#include <cuda_runtime.h> +#include <ftl/cuda_matrix_util.hpp> + +#define T_PER_BLOCK 8 + +using ftl::voxhash::HashData; +using ftl::voxhash::HashParams; +using ftl::voxhash::Voxel; +using ftl::voxhash::HashEntry; +using ftl::voxhash::FREE_ENTRY; + +__device__ float4 make_float4(uchar4 c) { + return make_float4(static_cast<float>(c.x), static_cast<float>(c.y), static_cast<float>(c.z), static_cast<float>(c.w)); +} + +inline __device__ uchar4 bilinearFilterColor(const DepthCameraParams& cameraParams, const float2& screenPos, cudaTextureObject_t colorTextureRef) { + //const DepthCameraParams& cameraParams = c_depthCameraParams; + const int imageWidth = cameraParams.m_imageWidth; + const int imageHeight = cameraParams.m_imageHeight; + const int2 p00 = make_int2(screenPos.x+0.5f, screenPos.y+0.5f); + const int2 dir = sign(make_float2(screenPos.x - p00.x, screenPos.y - p00.y)); + + const int2 p01 = p00 + make_int2(0.0f, dir.y); + const int2 p10 = p00 + make_int2(dir.x, 0.0f); + const int2 p11 = p00 + make_int2(dir.x, dir.y); + + const float alpha = (screenPos.x - p00.x)*dir.x; + const float beta = (screenPos.y - p00.y)*dir.y; + + float4 s0 = make_float4(0.0f, 0.0f, 0.0f, 0.0f); float w0 = 0.0f; + if(p00.x >= 0 && p00.x < imageWidth && p00.y >= 0 && p00.y < imageHeight) { uchar4 v00 = tex2D<uchar4>(colorTextureRef, p00.x, p00.y); if(v00.x != 0) { s0 += (1.0f-alpha)*make_float4(v00); w0 += (1.0f-alpha); } } + if(p10.x >= 0 && p10.x < imageWidth && p10.y >= 0 && p10.y < imageHeight) { uchar4 v10 = tex2D<uchar4>(colorTextureRef, p10.x, p10.y); if(v10.x != 0) { s0 += alpha *make_float4(v10); w0 += alpha ; } } + + float4 s1 = make_float4(0.0f, 0.0f, 0.0f, 0.0f); float w1 = 0.0f; + if(p01.x >= 0 && p01.x < imageWidth && p01.y >= 0 && p01.y < imageHeight) { uchar4 v01 = tex2D<uchar4>(colorTextureRef, p01.x, p01.y); if(v01.x != 0) { s1 += (1.0f-alpha)*make_float4(v01); w1 += (1.0f-alpha);} } + if(p11.x >= 0 && p11.x < imageWidth && p11.y >= 0 && p11.y < imageHeight) { uchar4 v11 = tex2D<uchar4>(colorTextureRef, p11.x, p11.y); if(v11.x != 0) { s1 += alpha *make_float4(v11); w1 += alpha ;} } + + const float4 p0 = s0/w0; + const float4 p1 = s1/w1; + + float4 ss = make_float4(0.0f, 0.0f, 0.0f, 0.0f); float ww = 0.0f; + if(w0 > 0.0f) { ss += (1.0f-beta)*p0; ww += (1.0f-beta); } + if(w1 > 0.0f) { ss += beta *p1; ww += beta ; } + + if(ww > 0.0f) { + ss /= ww; + return make_uchar4(ss.x,ss.y,ss.z,ss.w); + } else return make_uchar4(0, 0, 0, 0); +} + +__device__ float colourDistance(const uchar4 &c1, const uchar3 &c2) { + float x = c1.x-c2.x; + float y = c1.y-c2.y; + float z = c1.z-c2.z; + return x*x + y*y + z*z; +} + +#define NUM_CUDA_BLOCKS 10000 + +__global__ void integrateDepthMapKernel(HashData hashData, HashParams hashParams, DepthCameraParams cameraParams, cudaTextureObject_t depthT, cudaTextureObject_t colourT) { + //const HashParams& hashParams = c_hashParams; + //const DepthCameraParams& cameraParams = c_depthCameraParams; + + // Stride over all allocated blocks + for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) { + + //TODO check if we should load this in shared memory + HashEntry& entry = hashData.d_hashCompactified[bi]; + //if (entry.ptr == FREE_ENTRY) { + // printf("invliad integrate"); + // return; //should never happen since we did the compactification before + //} + + int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(entry.pos); + + uint i = threadIdx.x; //inside of an SDF block + int3 pi = pi_base + make_int3(hashData.delinearizeVoxelIndex(i)); + float3 pf = hashData.virtualVoxelPosToWorld(pi); + + pf = hashParams.m_rigidTransformInverse * pf; + uint2 screenPos = make_uint2(cameraParams.cameraToKinectScreenInt(pf)); + + // For this voxel in hash, get its screen position and check it is on screen + if (screenPos.x < cameraParams.m_imageWidth && screenPos.y < cameraParams.m_imageHeight) { //on screen + + //float depth = g_InputDepth[screenPos]; + float depth = tex2D<float>(depthT, screenPos.x, screenPos.y); + //if (depth > 20.0f) return; + + uchar4 color = make_uchar4(0, 0, 0, 0); + //if (cameraData.d_colorData) { + color = tex2D<uchar4>(colourT, screenPos.x, screenPos.y); + //color = bilinearFilterColor(cameraData.cameraToKinectScreenFloat(pf)); + //} + + //printf("screen pos %d\n", color.x); + //return; + + // Depth is within accepted max distance from camera + if (depth > 0.01f && depth < hashParams.m_maxIntegrationDistance) { // valid depth and color (Nick: removed colour check) + float depthZeroOne = cameraParams.cameraToKinectProjZ(depth); + + // Calculate SDF of this voxel wrt the depth map value + float sdf = depth - pf.z; + float truncation = hashData.getTruncation(depth); + + // Is this voxel close enough to cam for depth map value + // CHECK Nick: If is too close then free space violation so remove? + // This isn't enough if the disparity has occlusions that don't cause violations + // Could RGB changes also cause removals if depth can't be confirmed? + /*if (sdf > truncation) { + uint idx = entry.ptr + i; + hashData.d_SDFBlocks[idx].weight = 0; + //hashData.d_SDFBlocks[idx].sdf = PINF; + hashData.d_SDFBlocks[idx].color = make_uchar3(0,0,0); + }*/ + if (sdf > -truncation) // && depthZeroOne >= 0.0f && depthZeroOne <= 1.0f) //check if in truncation range should already be made in depth map computation + { + /*if (sdf >= 0.0f) { + sdf = fminf(truncation, sdf); + } else { + sdf = fmaxf(-truncation, sdf); + }*/ + + + //printf("SDF: %f\n", sdf); + //float weightUpdate = g_WeightSample; + //weightUpdate = (1-depthZeroOne)*5.0f + depthZeroOne*0.05f; + //weightUpdate *= g_WeightSample; + float weightUpdate = max(hashParams.m_integrationWeightSample * 1.5f * (1.0f-depthZeroOne), 1.0f); + + Voxel curr; //construct current voxel + curr.sdf = sdf; + curr.weight = weightUpdate; + curr.color = make_uchar3(color.x, color.y, color.z); + + + uint idx = entry.ptr + i; + + //if (entry.flags != cameraParams.flags & 0xFF) { + // entry.flags = cameraParams.flags & 0xFF; + //hashData.d_SDFBlocks[idx].color = make_uchar3(0,0,0); + //} + + Voxel newVoxel; + //if (color.x == MINF) hashData.combineVoxelDepthOnly(hashData.d_SDFBlocks[idx], curr, newVoxel); + //else hashData.combineVoxel(hashData.d_SDFBlocks[idx], curr, newVoxel); + hashData.combineVoxel(hashData.d_SDFBlocks[idx], curr, newVoxel); + + hashData.d_SDFBlocks[idx] = newVoxel; + + //Voxel prev = getVoxel(g_SDFBlocksSDFUAV, g_SDFBlocksRGBWUAV, idx); + //Voxel newVoxel = combineVoxel(curr, prev); + //setVoxel(g_SDFBlocksSDFUAV, g_SDFBlocksRGBWUAV, idx, newVoxel); + } + } else { + // Depth is invalid so what to do here? + // TODO(Nick) Use past voxel if available (set weight from 0 to 1) + + //uint idx = entry.ptr + i; + //float coldist = colourDistance(color, hashData.d_SDFBlocks[idx].color); + //if ((depth > 39.99f || depth < 0.01f) && coldist > 100.0f) { + //hashData.d_SDFBlocks[idx].color = make_uchar3(0,0,(uchar)(coldist)); + // hashData.d_SDFBlocks[idx].weight = hashData.d_SDFBlocks[idx].weight >> 1; + //} + } + } + + } +} + + +void ftl::cuda::integrateDepthMap(HashData& hashData, const HashParams& hashParams, + const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t stream) { + const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; + const dim3 gridSize(NUM_CUDA_BLOCKS, 1); + const dim3 blockSize(threadsPerBlock, 1); + + //if (hashParams.m_numOccupiedBlocks > 0) { //this guard is important if there is no depth in the current frame (i.e., no blocks were allocated) + integrateDepthMapKernel << <gridSize, blockSize, 0, stream >> >(hashData, hashParams, depthCameraParams, depthCameraData.depth_obj_, depthCameraData.colour_obj_); + //} + + //cudaSafeCall( cudaGetLastError() ); +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + //cutilCheckMsg(__FUNCTION__); +#endif +} diff --git a/applications/reconstruct/src/integrators.hpp b/applications/reconstruct/src/integrators.hpp new file mode 100644 index 0000000000000000000000000000000000000000..df9f425c97e26b3551baa2ae7062c51a1f6bef88 --- /dev/null +++ b/applications/reconstruct/src/integrators.hpp @@ -0,0 +1,16 @@ +#ifndef _FTL_RECONSTRUCTION_INTEGRATORS_HPP_ +#define _FTL_RECONSTRUCTION_INTEGRATORS_HPP_ + +#include <ftl/voxel_hash.hpp> +#include <ftl/depth_camera.hpp> + +namespace ftl { +namespace cuda { + +void integrateDepthMap(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, + const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t stream); + +} +} + +#endif // _FTL_RECONSTRUCTION_INTEGRATORS_HPP_ diff --git a/applications/reconstruct/src/ray_cast_sdf.cpp b/applications/reconstruct/src/ray_cast_sdf.cpp index 97d75dee7ccd3ec50ce6fef4dac469799f57e60d..c7c04a7293a1e92fac3de0f46c14a4eb30098b3f 100644 --- a/applications/reconstruct/src/ray_cast_sdf.cpp +++ b/applications/reconstruct/src/ray_cast_sdf.cpp @@ -11,7 +11,7 @@ extern "C" void renderCS( const ftl::voxhash::HashData& hashData, const RayCastData &rayCastData, - const RayCastParams &rayCastParams); + const RayCastParams &rayCastParams, cudaStream_t stream); extern "C" void computeNormals(float4* d_output, float3* d_input, unsigned int width, unsigned int height); extern "C" void convertDepthFloatToCameraSpaceFloat3(float3* d_output, float* d_input, float4x4 intrinsicsInv, unsigned int width, unsigned int height, const DepthCameraData& depthCameraData); @@ -20,7 +20,7 @@ extern "C" void resetRayIntervalSplatCUDA(RayCastData& data, const RayCastParams extern "C" void rayIntervalSplatCUDA(const ftl::voxhash::HashData& hashData, const RayCastData &rayCastData, const RayCastParams &rayCastParams); -extern "C" void nickRenderCUDA(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const RayCastData &rayCastData, const RayCastParams ¶ms); +extern "C" void nickRenderCUDA(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const RayCastData &rayCastData, const RayCastParams ¶ms, cudaStream_t stream); @@ -40,51 +40,52 @@ void CUDARayCastSDF::destroy(void) //extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -void CUDARayCastSDF::compactifyHashEntries(ftl::voxhash::HashData& hashData, ftl::voxhash::HashParams& hashParams) { //const DepthCameraData& depthCameraData) { +void CUDARayCastSDF::compactifyHashEntries(ftl::voxhash::HashData& hashData, ftl::voxhash::HashParams& hashParams, cudaStream_t stream) { //const DepthCameraData& depthCameraData) { - hashParams.m_numOccupiedBlocks = ftl::cuda::compactifyVisible(hashData, hashParams); //this version uses atomics over prefix sums, which has a much better performance - std::cout << "Ray blocks = " << hashParams.m_numOccupiedBlocks << std::endl; - hashData.updateParams(hashParams); //make sure numOccupiedBlocks is updated on the GPU + ftl::cuda::compactifyVisible(hashData, hashParams, m_params.camera, stream); //this version uses atomics over prefix sums, which has a much better performance + //std::cout << "Ray blocks = " << hashParams.m_numOccupiedBlocks << std::endl; + //hashData.updateParams(hashParams); //make sure numOccupiedBlocks is updated on the GPU } -void CUDARayCastSDF::render(ftl::voxhash::HashData& hashData, ftl::voxhash::HashParams& hashParams, const DepthCameraParams& cameraParams, const Eigen::Matrix4f& lastRigidTransform) +void CUDARayCastSDF::render(ftl::voxhash::HashData& hashData, ftl::voxhash::HashParams& hashParams, const DepthCameraParams& cameraParams, const Eigen::Matrix4f& lastRigidTransform, cudaStream_t stream) { - updateConstantDepthCameraParams(cameraParams); + //updateConstantDepthCameraParams(cameraParams); //rayIntervalSplatting(hashData, hashParams, lastRigidTransform); //m_data.d_rayIntervalSplatMinArray = m_rayIntervalSplatting.mapMinToCuda(); //m_data.d_rayIntervalSplatMaxArray = m_rayIntervalSplatting.mapMaxToCuda(); - m_params.m_numOccupiedSDFBlocks = hashParams.m_numOccupiedBlocks; + m_params.camera = cameraParams; + //m_params.m_numOccupiedSDFBlocks = hashParams.m_numOccupiedBlocks; m_params.m_viewMatrix = MatrixConversion::toCUDA(lastRigidTransform.inverse()); m_params.m_viewMatrixInverse = MatrixConversion::toCUDA(lastRigidTransform); - m_data.updateParams(m_params); + //m_data.updateParams(m_params); - compactifyHashEntries(hashData, hashParams); + compactifyHashEntries(hashData, hashParams, stream); - if (hash_render_) nickRenderCUDA(hashData, hashParams, m_data, m_params); - else renderCS(hashData, m_data, m_params); + if (hash_render_) nickRenderCUDA(hashData, hashParams, m_data, m_params, stream); + else renderCS(hashData, m_data, m_params, stream); //convertToCameraSpace(cameraData); - if (!m_params.m_useGradients) - { - computeNormals(m_data.d_normals, m_data.d_depth3, m_params.m_width, m_params.m_height); - } + //if (!m_params.m_useGradients) + //{ + // computeNormals(m_data.d_normals, m_data.d_depth3, m_params.m_width, m_params.m_height); + //} //m_rayIntervalSplatting.unmapCuda(); } -void CUDARayCastSDF::convertToCameraSpace(const DepthCameraData& cameraData) +/*void CUDARayCastSDF::convertToCameraSpace(const DepthCameraData& cameraData) { convertDepthFloatToCameraSpaceFloat3(m_data.d_depth3, m_data.d_depth, m_params.m_intrinsicsInverse, m_params.m_width, m_params.m_height, cameraData); if(!m_params.m_useGradients) { computeNormals(m_data.d_normals, m_data.d_depth3, m_params.m_width, m_params.m_height); } -} +}*/ -void CUDARayCastSDF::rayIntervalSplatting(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const Eigen::Matrix4f& lastRigidTransform) +/*void CUDARayCastSDF::rayIntervalSplatting(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const Eigen::Matrix4f& lastRigidTransform) { if (hashParams.m_numOccupiedBlocks == 0) return; @@ -100,4 +101,4 @@ void CUDARayCastSDF::rayIntervalSplatting(const ftl::voxhash::HashData& hashData //don't use ray interval splatting (cf CUDARayCastSDF.cu -> line 40 //m_rayIntervalSplatting.rayIntervalSplatting(DXUTGetD3D11DeviceContext(), hashData, cameraData, m_data, m_params, m_params.m_numOccupiedSDFBlocks*6); -} \ No newline at end of file +}*/ \ No newline at end of file diff --git a/applications/reconstruct/src/ray_cast_sdf.cu b/applications/reconstruct/src/ray_cast_sdf.cu index bf305af94a530cc982ea61cdc8d235711cb7a752..09795894869cdf6138b265020d97a5cd25a443dd 100644 --- a/applications/reconstruct/src/ray_cast_sdf.cu +++ b/applications/reconstruct/src/ray_cast_sdf.cu @@ -10,15 +10,17 @@ #define T_PER_BLOCK 8 #define NUM_GROUPS_X 1024 +#define NUM_CUDA_BLOCKS 10000 + //texture<float, cudaTextureType2D, cudaReadModeElementType> rayMinTextureRef; //texture<float, cudaTextureType2D, cudaReadModeElementType> rayMaxTextureRef; -__global__ void renderKernel(ftl::voxhash::HashData hashData, RayCastData rayCastData) +__global__ void renderKernel(ftl::voxhash::HashData hashData, RayCastData rayCastData, RayCastParams rayCastParams) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - const RayCastParams& rayCastParams = c_rayCastParams; + //const RayCastParams& rayCastParams = c_rayCastParams; if (x < rayCastParams.m_width && y < rayCastParams.m_height) { rayCastData.d_depth[y*rayCastParams.m_width+x] = MINF; @@ -26,7 +28,7 @@ __global__ void renderKernel(ftl::voxhash::HashData hashData, RayCastData rayCas rayCastData.d_normals[y*rayCastParams.m_width+x] = make_float4(MINF,MINF,MINF,MINF); rayCastData.d_colors[y*rayCastParams.m_width+x] = make_uchar3(0,0,0); - float3 camDir = normalize(DepthCameraData::kinectProjToCamera(x, y, 1.0f)); + float3 camDir = normalize(rayCastParams.camera.kinectProjToCamera(x, y, 1.0f)); float3 worldCamPos = rayCastParams.m_viewMatrixInverse * make_float3(0.0f, 0.0f, 0.0f); float4 w = rayCastParams.m_viewMatrixInverse * make_float4(camDir, 0.0f); float3 worldDir = normalize(make_float3(w.x, w.y, w.z)); @@ -50,11 +52,11 @@ __global__ void renderKernel(ftl::voxhash::HashData hashData, RayCastData rayCas // printf("ERROR (%d,%d): [ %f, %f ]\n", x, y, minInterval, maxInterval); //} - rayCastData.traverseCoarseGridSimpleSampleAll(hashData, worldCamPos, worldDir, camDir, make_int3(x,y,1), minInterval, maxInterval); + rayCastData.traverseCoarseGridSimpleSampleAll(hashData, rayCastParams, worldCamPos, worldDir, camDir, make_int3(x,y,1), minInterval, maxInterval); } } -extern "C" void renderCS(const ftl::voxhash::HashData& hashData, const RayCastData &rayCastData, const RayCastParams &rayCastParams) +extern "C" void renderCS(const ftl::voxhash::HashData& hashData, const RayCastData &rayCastData, const RayCastParams &rayCastParams, cudaStream_t stream) { const dim3 gridSize((rayCastParams.m_width + T_PER_BLOCK - 1)/T_PER_BLOCK, (rayCastParams.m_height + T_PER_BLOCK - 1)/T_PER_BLOCK); @@ -66,7 +68,7 @@ extern "C" void renderCS(const ftl::voxhash::HashData& hashData, const RayCastDa //printf("Ray casting render...\n"); - renderKernel<<<gridSize, blockSize>>>(hashData, rayCastData); + renderKernel<<<gridSize, blockSize, 0, stream>>>(hashData, rayCastData, rayCastParams); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); @@ -78,12 +80,12 @@ extern "C" void renderCS(const ftl::voxhash::HashData& hashData, const RayCastDa // Nicks render approach //////////////////////////////////////////////////////////////////////////////// -__global__ void clearDepthKernel(ftl::voxhash::HashData hashData, RayCastData rayCastData) +__global__ void clearDepthKernel(ftl::voxhash::HashData hashData, RayCastData rayCastData, RayCastParams rayCastParams) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - const RayCastParams& rayCastParams = c_rayCastParams; + //const RayCastParams& rayCastParams = c_rayCastParams; if (x < rayCastParams.m_width && y < rayCastParams.m_height) { rayCastData.d_depth_i[y*rayCastParams.m_width+x] = 0x7FFFFFFF; //PINF; @@ -174,10 +176,14 @@ __global__ void nickRenderKernel(ftl::voxhash::HashData hashData, RayCastData ra __shared__ ftl::voxhash::Voxel voxels[SDF_BLOCK_BUFFER]; __shared__ ftl::voxhash::HashEntry blocks[8]; + // Stride over all allocated blocks + for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) { + __syncthreads(); + const uint i = threadIdx.x; //inside of an SDF block //TODO (Nick) Either don't use compactified or re-run compacitification using render cam frustrum - if (i == 0) blocks[0] = hashData.d_hashCompactified[blockIdx.x]; + if (i == 0) blocks[0] = hashData.d_hashCompactified[bi]; //else if (i <= 7) blocks[i] = hashData.getHashEntryForSDFBlockPos(blockDelinear(blocks[0].pos, i)); // Make sure all hash entries are cached @@ -253,7 +259,7 @@ __global__ void nickRenderKernel(ftl::voxhash::HashData hashData, RayCastData ra // voxels[ix[3]].weight == 0 || voxels[ix[4]].weight == 0 || voxels[ix[5]].weight == 0 || // voxels[ix[6]].weight == 0; // || voxels[ix[7]].weight == 0; //if (missweight) return; - if (voxels[j].weight == 0) return; + if (voxels[j].weight == 0) continue; // Trilinear Interpolation (simple and fast) /*float3 colorFloat = make_float3(0.0f, 0.0f, 0.0f); @@ -286,7 +292,7 @@ __global__ void nickRenderKernel(ftl::voxhash::HashData hashData, RayCastData ra bool is_surface = ((params.m_flags & kShowBlockBorders) && edgeX + edgeY + edgeZ >= 2); if (is_surface) voxels[j].color = make_uchar3(255,(vp.x == 0 && vp.y == 0 && vp.z == 0) ? 255 : 0,0); - if (!is_surface && voxels[j].sdf >= 0.0f) return; + if (!is_surface && voxels[j].sdf >= 0.0f) continue; //if (vp.z == 7) voxels[j].color = make_uchar3(0,255,(voxels[j].sdf < 0.0f) ? 255 : 0); @@ -313,13 +319,13 @@ __global__ void nickRenderKernel(ftl::voxhash::HashData hashData, RayCastData ra // Only for surface voxels, work out screen coordinates // TODO Could adjust weights, strengthen on surface, weaken otherwise?? - if (!is_surface) return; + if (!is_surface) continue; const float3 camPos = params.m_viewMatrix * worldPos; - const float2 screenPosf = DepthCameraData::cameraToKinectScreenFloat(camPos); + const float2 screenPosf = params.camera.cameraToKinectScreenFloat(camPos); const uint2 screenPos = make_uint2(make_int2(screenPosf)); // + make_float2(0.5f, 0.5f) - if (camPos.z < params.m_minDepth) return; + if (camPos.z < params.m_minDepth) continue; /*if (screenPos.x < params.m_width && screenPos.y < params.m_height && rayCastData.d_depth[(screenPos.y)*params.m_width+screenPos.x] > camPos.z) { @@ -331,7 +337,7 @@ __global__ void nickRenderKernel(ftl::voxhash::HashData hashData, RayCastData ra // For this voxel in hash, get its screen position and check it is on screen // Convert depth map to int by x1000 and use atomicMin - const int pixsize = static_cast<int>((c_hashParams.m_virtualVoxelSize*c_depthCameraParams.fx/(camPos.z*0.8f)))+1; // Magic number increase voxel to ensure coverage + const int pixsize = static_cast<int>((c_hashParams.m_virtualVoxelSize*params.camera.fx/(camPos.z*0.8f)))+1; // Magic number increase voxel to ensure coverage int pixsizeX = pixsize; // Max voxel pixels int pixsizeY = pixsize; @@ -339,7 +345,7 @@ __global__ void nickRenderKernel(ftl::voxhash::HashData hashData, RayCastData ra for (int x=0; x<pixsizeX; x++) { // TODO(Nick) Within a window, check pixels that have same voxel id // Then trilinear interpolate between current voxel and neighbors. - const float3 pixelWorldPos = params.m_viewMatrixInverse * DepthCameraData::kinectDepthToSkeleton(screenPos.x+x,screenPos.y+y, camPos.z); + const float3 pixelWorldPos = params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(screenPos.x+x,screenPos.y+y, camPos.z); const float3 posInVoxel = (pixelWorldPos - worldPos) / make_float3(c_hashParams.m_virtualVoxelSize,c_hashParams.m_virtualVoxelSize,c_hashParams.m_virtualVoxelSize); //if (posInVoxel.x >= 1.0f || posInVoxel.y >= 1.0f || posInVoxel.z >= 1.0f) { @@ -364,22 +370,24 @@ __global__ void nickRenderKernel(ftl::voxhash::HashData hashData, RayCastData ra } if (pixsizeX == 0) break; } + + } } -extern "C" void nickRenderCUDA(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const RayCastData &rayCastData, const RayCastParams ¶ms) +extern "C" void nickRenderCUDA(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const RayCastData &rayCastData, const RayCastParams ¶ms, cudaStream_t stream) { const dim3 clear_gridSize((params.m_width + T_PER_BLOCK - 1)/T_PER_BLOCK, (params.m_height + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK); - clearDepthKernel<<<clear_gridSize, clear_blockSize>>>(hashData, rayCastData); + clearDepthKernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(hashData, rayCastData, params); const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; - const dim3 gridSize(hashParams.m_numOccupiedBlocks, 1); + const dim3 gridSize(NUM_CUDA_BLOCKS, 1); const dim3 blockSize(threadsPerBlock, 1); - if (hashParams.m_numOccupiedBlocks > 0) { //this guard is important if there is no depth in the current frame (i.e., no blocks were allocated) - nickRenderKernel << <gridSize, blockSize >> >(hashData, rayCastData, params); - } + //if (hashParams.m_numOccupiedBlocks > 0) { //this guard is important if there is no depth in the current frame (i.e., no blocks were allocated) + nickRenderKernel << <gridSize, blockSize, 0, stream >> >(hashData, rayCastData, params); + //} cudaSafeCall( cudaGetLastError() ); #ifdef _DEBUG @@ -393,7 +401,7 @@ extern "C" void nickRenderCUDA(const ftl::voxhash::HashData& hashData, const ftl // ray interval splatting ///////////////////////////////////////////////////////////////////////// -__global__ void resetRayIntervalSplatKernel(RayCastData data) +/*__global__ void resetRayIntervalSplatKernel(RayCastData data) { uint idx = blockIdx.x + blockIdx.y * NUM_GROUPS_X; data.point_cloud_[idx] = make_float3(MINF); @@ -410,9 +418,9 @@ extern "C" void resetRayIntervalSplatCUDA(RayCastData& data, const RayCastParams cudaSafeCall(cudaDeviceSynchronize()); //cutilCheckMsg(__FUNCTION__); #endif -} +}*/ -__global__ void rayIntervalSplatKernel(ftl::voxhash::HashData hashData, DepthCameraData depthCameraData, RayCastData rayCastData, DepthCameraData cameraData) +/*__global__ void rayIntervalSplatKernel(ftl::voxhash::HashData hashData, DepthCameraData depthCameraData, RayCastData rayCastData, DepthCameraData cameraData) { uint idx = blockIdx.x + blockIdx.y * NUM_GROUPS_X; @@ -476,13 +484,13 @@ __global__ void rayIntervalSplatKernel(ftl::voxhash::HashData hashData, DepthCam rayCastData.point_cloud_[addr] = make_float3(maxFinal.x, maxFinal.y, depth); //printf("Ray: %f\n", depth); - /*uint addr = idx*6; + uint addr = idx*6; rayCastData.d_vertexBuffer[addr] = make_float4(maxFinal.x, minFinal.y, depth, depthWorld); rayCastData.d_vertexBuffer[addr+1] = make_float4(minFinal.x, minFinal.y, depth, depthWorld); rayCastData.d_vertexBuffer[addr+2] = make_float4(maxFinal.x, maxFinal.y, depth, depthWorld); rayCastData.d_vertexBuffer[addr+3] = make_float4(minFinal.x, minFinal.y, depth, depthWorld); rayCastData.d_vertexBuffer[addr+4] = make_float4(maxFinal.x, maxFinal.y, depth, depthWorld); - rayCastData.d_vertexBuffer[addr+5] = make_float4(minFinal.x, maxFinal.y, depth, depthWorld);*/ + rayCastData.d_vertexBuffer[addr+5] = make_float4(minFinal.x, maxFinal.y, depth, depthWorld); } } @@ -498,4 +506,4 @@ extern "C" void rayIntervalSplatCUDA(const ftl::voxhash::HashData& hashData, con cudaSafeCall(cudaDeviceSynchronize()); //cutilCheckMsg(__FUNCTION__); #endif -} +} */ diff --git a/applications/reconstruct/src/scene_rep_hash_sdf.cu b/applications/reconstruct/src/scene_rep_hash_sdf.cu index 8a7c0b25a62e089a84121c809fa07e202ea74559..152722a34de63909b099d5988db25330793b2dff 100644 --- a/applications/reconstruct/src/scene_rep_hash_sdf.cu +++ b/applications/reconstruct/src/scene_rep_hash_sdf.cu @@ -25,7 +25,7 @@ using ftl::voxhash::FREE_ENTRY; __device__ __constant__ HashParams c_hashParams; __device__ __constant__ RayCastParams c_rayCastParams; -__device__ __constant__ DepthCameraParams c_depthCameraParams; +//__device__ __constant__ DepthCameraParams c_depthCameraParams; extern "C" void updateConstantHashParams(const HashParams& params) { @@ -53,7 +53,7 @@ extern "C" void updateConstantRayCastParams(const RayCastParams& params) { } -extern "C" void updateConstantDepthCameraParams(const DepthCameraParams& params) { +/*extern "C" void updateConstantDepthCameraParams(const DepthCameraParams& params) { //printf("Update depth camera params\n"); size_t size; cudaSafeCall(cudaGetSymbolSize(&size, c_depthCameraParams)); @@ -64,7 +64,7 @@ extern "C" void updateConstantDepthCameraParams(const DepthCameraParams& params) //cutilCheckMsg(__FUNCTION__); #endif -} +}*/ extern "C" void bindInputDepthColorTextures(const DepthCameraData& depthCameraData) { @@ -160,12 +160,12 @@ extern "C" void resetCUDA(HashData& hashData, const HashParams& hashParams) } -extern "C" void resetHashBucketMutexCUDA(HashData& hashData, const HashParams& hashParams) +extern "C" void resetHashBucketMutexCUDA(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { const dim3 gridSize((hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); - resetHashBucketMutexKernel<<<gridSize, blockSize>>>(hashData); + resetHashBucketMutexKernel<<<gridSize, blockSize, 0, stream>>>(hashData); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); @@ -212,10 +212,10 @@ bool isSDFBlockStreamedOut(const int3& sdfBlock, const HashData& hashData, const // Note: bitMask used for Streaming out code... could be set to nullptr if not streaming out // Note: Allocations might need to be around fat rays since multiple voxels could correspond // to same depth map pixel at larger distances. -__global__ void allocKernel(HashData hashData, DepthCameraData cameraData, const unsigned int* d_bitMask) +__global__ void allocKernel(HashData hashData, DepthCameraData cameraData, HashParams hashParams, DepthCameraParams cameraParams) { - const HashParams& hashParams = c_hashParams; - const DepthCameraParams& cameraParams = c_depthCameraParams; + //const HashParams& hashParams = c_hashParams; + //const DepthCameraParams& cameraParams = c_depthCameraParams; const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; @@ -237,11 +237,11 @@ __global__ void allocKernel(HashData hashData, DepthCameraData cameraData, const // Convert ray from image coords to world // Does kinectDepthToSkeleton convert pixel values to coordinates using // camera intrinsics? Same as what reprojectTo3D does in OpenCV? - float3 rayMin = cameraData.kinectDepthToSkeleton(x, y, minDepth); + float3 rayMin = cameraParams.kinectDepthToSkeleton(x, y, minDepth); // Is the rigid transform then the estimated camera pose? rayMin = hashParams.m_rigidTransform * rayMin; //printf("Ray min: %f,%f,%f\n", rayMin.x, rayMin.y, rayMin.z); - float3 rayMax = cameraData.kinectDepthToSkeleton(x, y, maxDepth); + float3 rayMax = cameraParams.kinectDepthToSkeleton(x, y, maxDepth); rayMax = hashParams.m_rigidTransform * rayMax; float3 rayDir = normalize(rayMax - rayMin); @@ -277,7 +277,7 @@ __global__ void allocKernel(HashData hashData, DepthCameraData cameraData, const while(iter < g_MaxLoopIterCount) { //check if it's in the frustum and not checked out - if (hashData.isSDFBlockInCameraFrustumApprox(idCurrentVoxel)) { //} && !isSDFBlockStreamedOut(idCurrentVoxel, hashData, d_bitMask)) { + if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, cameraParams, idCurrentVoxel)) { //} && !isSDFBlockStreamedOut(idCurrentVoxel, hashData, d_bitMask)) { hashData.allocBlock(idCurrentVoxel, cameraParams.flags & 0xFF); //printf("Allocate block: %d\n",idCurrentVoxel.x); } @@ -304,7 +304,7 @@ __global__ void allocKernel(HashData hashData, DepthCameraData cameraData, const } } -extern "C" void allocCUDA(HashData& hashData, const HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, const unsigned int* d_bitMask) +extern "C" void allocCUDA(HashData& hashData, const HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t stream) { //printf("Allocating: %d\n",depthCameraParams.m_imageWidth); @@ -312,327 +312,11 @@ extern "C" void allocCUDA(HashData& hashData, const HashParams& hashParams, cons const dim3 gridSize((depthCameraParams.m_imageWidth + T_PER_BLOCK - 1)/T_PER_BLOCK, (depthCameraParams.m_imageHeight + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - allocKernel<<<gridSize, blockSize>>>(hashData, depthCameraData, d_bitMask); + allocKernel<<<gridSize, blockSize, 0, stream>>>(hashData, depthCameraData, hashParams, depthCameraParams); - //cudaSafeCall(cudaDeviceSynchronize()); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); //cutilCheckMsg(__FUNCTION__); #endif } - -__device__ float4 make_float4(uchar4 c) { - return make_float4(static_cast<float>(c.x), static_cast<float>(c.y), static_cast<float>(c.z), static_cast<float>(c.w)); -} - -inline __device__ uchar4 bilinearFilterColor(const float2& screenPos, cudaTextureObject_t colorTextureRef) { - const DepthCameraParams& cameraParams = c_depthCameraParams; - const int imageWidth = cameraParams.m_imageWidth; - const int imageHeight = cameraParams.m_imageHeight; - const int2 p00 = make_int2(screenPos.x+0.5f, screenPos.y+0.5f); - const int2 dir = sign(make_float2(screenPos.x - p00.x, screenPos.y - p00.y)); - - const int2 p01 = p00 + make_int2(0.0f, dir.y); - const int2 p10 = p00 + make_int2(dir.x, 0.0f); - const int2 p11 = p00 + make_int2(dir.x, dir.y); - - const float alpha = (screenPos.x - p00.x)*dir.x; - const float beta = (screenPos.y - p00.y)*dir.y; - - float4 s0 = make_float4(0.0f, 0.0f, 0.0f, 0.0f); float w0 = 0.0f; - if(p00.x >= 0 && p00.x < imageWidth && p00.y >= 0 && p00.y < imageHeight) { uchar4 v00 = tex2D<uchar4>(colorTextureRef, p00.x, p00.y); if(v00.x != 0) { s0 += (1.0f-alpha)*make_float4(v00); w0 += (1.0f-alpha); } } - if(p10.x >= 0 && p10.x < imageWidth && p10.y >= 0 && p10.y < imageHeight) { uchar4 v10 = tex2D<uchar4>(colorTextureRef, p10.x, p10.y); if(v10.x != 0) { s0 += alpha *make_float4(v10); w0 += alpha ; } } - - float4 s1 = make_float4(0.0f, 0.0f, 0.0f, 0.0f); float w1 = 0.0f; - if(p01.x >= 0 && p01.x < imageWidth && p01.y >= 0 && p01.y < imageHeight) { uchar4 v01 = tex2D<uchar4>(colorTextureRef, p01.x, p01.y); if(v01.x != 0) { s1 += (1.0f-alpha)*make_float4(v01); w1 += (1.0f-alpha);} } - if(p11.x >= 0 && p11.x < imageWidth && p11.y >= 0 && p11.y < imageHeight) { uchar4 v11 = tex2D<uchar4>(colorTextureRef, p11.x, p11.y); if(v11.x != 0) { s1 += alpha *make_float4(v11); w1 += alpha ;} } - - const float4 p0 = s0/w0; - const float4 p1 = s1/w1; - - float4 ss = make_float4(0.0f, 0.0f, 0.0f, 0.0f); float ww = 0.0f; - if(w0 > 0.0f) { ss += (1.0f-beta)*p0; ww += (1.0f-beta); } - if(w1 > 0.0f) { ss += beta *p1; ww += beta ; } - - if(ww > 0.0f) { - ss /= ww; - return make_uchar4(ss.x,ss.y,ss.z,ss.w); - } else return make_uchar4(0, 0, 0, 0); -} - -__device__ float colourDistance(const uchar4 &c1, const uchar3 &c2) { - float x = c1.x-c2.x; - float y = c1.y-c2.y; - float z = c1.z-c2.z; - return x*x + y*y + z*z; -} - -__global__ void integrateDepthMapKernel(HashData hashData, DepthCameraData cameraData, cudaTextureObject_t depthT, cudaTextureObject_t colourT) { - const HashParams& hashParams = c_hashParams; - const DepthCameraParams& cameraParams = c_depthCameraParams; - - //TODO check if we should load this in shared memory - HashEntry& entry = hashData.d_hashCompactified[blockIdx.x]; - //if (entry.ptr == FREE_ENTRY) { - // printf("invliad integrate"); - // return; //should never happen since we did the compactification before - //} - - int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(entry.pos); - - uint i = threadIdx.x; //inside of an SDF block - int3 pi = pi_base + make_int3(hashData.delinearizeVoxelIndex(i)); - float3 pf = hashData.virtualVoxelPosToWorld(pi); - - pf = hashParams.m_rigidTransformInverse * pf; - uint2 screenPos = make_uint2(cameraData.cameraToKinectScreenInt(pf)); - - // For this voxel in hash, get its screen position and check it is on screen - if (screenPos.x < cameraParams.m_imageWidth && screenPos.y < cameraParams.m_imageHeight) { //on screen - - //float depth = g_InputDepth[screenPos]; - float depth = tex2D<float>(depthT, screenPos.x, screenPos.y); - //if (depth > 20.0f) return; - - uchar4 color = make_uchar4(0, 0, 0, 0); - //if (cameraData.d_colorData) { - color = tex2D<uchar4>(colourT, screenPos.x, screenPos.y); - //color = bilinearFilterColor(cameraData.cameraToKinectScreenFloat(pf)); - //} - - //printf("screen pos %d\n", color.x); - //return; - - // Depth is within accepted max distance from camera - if (depth > 0.01f && depth < hashParams.m_maxIntegrationDistance) { // valid depth and color (Nick: removed colour check) - float depthZeroOne = cameraData.cameraToKinectProjZ(depth); - - // Calculate SDF of this voxel wrt the depth map value - float sdf = depth - pf.z; - float truncation = hashData.getTruncation(depth); - - // Is this voxel close enough to cam for depth map value - // CHECK Nick: If is too close then free space violation so remove? - // This isn't enough if the disparity has occlusions that don't cause violations - // Could RGB changes also cause removals if depth can't be confirmed? - /*if (sdf > truncation) { - uint idx = entry.ptr + i; - hashData.d_SDFBlocks[idx].weight = 0; - //hashData.d_SDFBlocks[idx].sdf = PINF; - hashData.d_SDFBlocks[idx].color = make_uchar3(0,0,0); - }*/ - if (sdf > -truncation) // && depthZeroOne >= 0.0f && depthZeroOne <= 1.0f) //check if in truncation range should already be made in depth map computation - { - /*if (sdf >= 0.0f) { - sdf = fminf(truncation, sdf); - } else { - sdf = fmaxf(-truncation, sdf); - }*/ - - - //printf("SDF: %f\n", sdf); - //float weightUpdate = g_WeightSample; - //weightUpdate = (1-depthZeroOne)*5.0f + depthZeroOne*0.05f; - //weightUpdate *= g_WeightSample; - float weightUpdate = max(hashParams.m_integrationWeightSample * 1.5f * (1.0f-depthZeroOne), 1.0f); - - Voxel curr; //construct current voxel - curr.sdf = sdf; - curr.weight = weightUpdate; - curr.color = make_uchar3(color.x, color.y, color.z); - - - uint idx = entry.ptr + i; - - //if (entry.flags != cameraParams.flags & 0xFF) { - // entry.flags = cameraParams.flags & 0xFF; - //hashData.d_SDFBlocks[idx].color = make_uchar3(0,0,0); - //} - - Voxel newVoxel; - //if (color.x == MINF) hashData.combineVoxelDepthOnly(hashData.d_SDFBlocks[idx], curr, newVoxel); - //else hashData.combineVoxel(hashData.d_SDFBlocks[idx], curr, newVoxel); - hashData.combineVoxel(hashData.d_SDFBlocks[idx], curr, newVoxel); - - hashData.d_SDFBlocks[idx] = newVoxel; - - //Voxel prev = getVoxel(g_SDFBlocksSDFUAV, g_SDFBlocksRGBWUAV, idx); - //Voxel newVoxel = combineVoxel(curr, prev); - //setVoxel(g_SDFBlocksSDFUAV, g_SDFBlocksRGBWUAV, idx, newVoxel); - } - } else { - // Depth is invalid so what to do here? - // TODO(Nick) Use past voxel if available (set weight from 0 to 1) - - //uint idx = entry.ptr + i; - //float coldist = colourDistance(color, hashData.d_SDFBlocks[idx].color); - //if ((depth > 39.99f || depth < 0.01f) && coldist > 100.0f) { - //hashData.d_SDFBlocks[idx].color = make_uchar3(0,0,(uchar)(coldist)); - // hashData.d_SDFBlocks[idx].weight = hashData.d_SDFBlocks[idx].weight >> 1; - //} - } - } -} - - -extern "C" void integrateDepthMapCUDA(HashData& hashData, const HashParams& hashParams, - const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams) -{ - const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; - const dim3 gridSize(hashParams.m_numOccupiedBlocks, 1); - const dim3 blockSize(threadsPerBlock, 1); - - if (hashParams.m_numOccupiedBlocks > 0) { //this guard is important if there is no depth in the current frame (i.e., no blocks were allocated) - integrateDepthMapKernel << <gridSize, blockSize >> >(hashData, depthCameraData, depthCameraData.depth_obj_, depthCameraData.colour_obj_); - } - - cudaSafeCall( cudaGetLastError() ); -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -} - - - -__global__ void starveVoxelsKernel(HashData hashData) { - - const uint idx = blockIdx.x; - const HashEntry& entry = hashData.d_hashCompactified[idx]; - - //is typically exectued only every n'th frame - int weight = hashData.d_SDFBlocks[entry.ptr + threadIdx.x].weight; - weight = max(0, weight-2); - hashData.d_SDFBlocks[entry.ptr + threadIdx.x].weight = weight; //CHECK Remove to totally clear previous frame (Nick) -} - -extern "C" void starveVoxelsKernelCUDA(HashData& hashData, const HashParams& hashParams) -{ - const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; - const dim3 gridSize(hashParams.m_numOccupiedBlocks, 1); - const dim3 blockSize(threadsPerBlock, 1); - - if (hashParams.m_numOccupiedBlocks > 0) { - starveVoxelsKernel << <gridSize, blockSize >> >(hashData); - } -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -} - - -__shared__ float shared_MinSDF[SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE / 2]; -__shared__ uint shared_MaxWeight[SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE / 2]; - - -__global__ void garbageCollectIdentifyKernel(HashData hashData) { - - const DepthCameraParams& cameraParams = c_depthCameraParams; - const unsigned int hashIdx = blockIdx.x; - const HashEntry& entry = hashData.d_hashCompactified[hashIdx]; - - // Entire block was not touched in this frame, so remove (Nick) - /*if (entry.flags != cameraParams.flags & 0xFF) { - hashData.d_hashDecision[hashIdx] = 1; - return; - }*/ - - //uint h = hashData.computeHashPos(entry.pos); - //hashData.d_hashDecision[hashIdx] = 1; - //if (hashData.d_hashBucketMutex[h] == LOCK_ENTRY) return; - - //if (entry.ptr == FREE_ENTRY) return; //should never happen since we did compactify before - //const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE; - - const unsigned int idx0 = entry.ptr + 2*threadIdx.x+0; - const unsigned int idx1 = entry.ptr + 2*threadIdx.x+1; - - Voxel v0 = hashData.d_SDFBlocks[idx0]; - Voxel v1 = hashData.d_SDFBlocks[idx1]; - - if (v0.weight == 0) v0.sdf = PINF; - if (v1.weight == 0) v1.sdf = PINF; - - shared_MinSDF[threadIdx.x] = min(fabsf(v0.sdf), fabsf(v1.sdf)); //init shared memory - shared_MaxWeight[threadIdx.x] = max(v0.weight, v1.weight); - -#pragma unroll 1 - for (uint stride = 2; stride <= blockDim.x; stride <<= 1) { - __syncthreads(); - if ((threadIdx.x & (stride-1)) == (stride-1)) { - shared_MinSDF[threadIdx.x] = min(shared_MinSDF[threadIdx.x-stride/2], shared_MinSDF[threadIdx.x]); - shared_MaxWeight[threadIdx.x] = max(shared_MaxWeight[threadIdx.x-stride/2], shared_MaxWeight[threadIdx.x]); - } - } - - __syncthreads(); - - if (threadIdx.x == blockDim.x - 1) { - float minSDF = shared_MinSDF[threadIdx.x]; - uint maxWeight = shared_MaxWeight[threadIdx.x]; - - float t = hashData.getTruncation(c_depthCameraParams.m_sensorDepthWorldMax); //MATTHIAS TODO check whether this is a reasonable metric - - if (minSDF >= t || maxWeight == 0) { - hashData.d_hashDecision[hashIdx] = 1; - } else { - hashData.d_hashDecision[hashIdx] = 0; - } - } -} - -extern "C" void garbageCollectIdentifyCUDA(HashData& hashData, const HashParams& hashParams) { - - const unsigned int threadsPerBlock = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE / 2; - const dim3 gridSize(hashParams.m_numOccupiedBlocks, 1); - const dim3 blockSize(threadsPerBlock, 1); - - if (hashParams.m_numOccupiedBlocks > 0) { - garbageCollectIdentifyKernel << <gridSize, blockSize >> >(hashData); - } -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -} - - -__global__ void garbageCollectFreeKernel(HashData hashData) { - - //const uint hashIdx = blockIdx.x; - const uint hashIdx = blockIdx.x*blockDim.x + threadIdx.x; - - - if (hashIdx < c_hashParams.m_numOccupiedBlocks && hashData.d_hashDecision[hashIdx] != 0) { //decision to delete the hash entry - - const HashEntry& entry = hashData.d_hashCompactified[hashIdx]; - //if (entry.ptr == FREE_ENTRY) return; //should never happen since we did compactify before - - if (hashData.deleteHashEntryElement(entry.pos)) { //delete hash entry from hash (and performs heap append) - const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE; - - #pragma unroll 1 - for (uint i = 0; i < linBlockSize; i++) { //clear sdf block: CHECK TODO another kernel? - hashData.deleteVoxel(entry.ptr + i); - } - } - } -} - - -extern "C" void garbageCollectFreeCUDA(HashData& hashData, const HashParams& hashParams) { - - const unsigned int threadsPerBlock = T_PER_BLOCK*T_PER_BLOCK; - const dim3 gridSize((hashParams.m_numOccupiedBlocks + threadsPerBlock - 1) / threadsPerBlock, 1); - const dim3 blockSize(threadsPerBlock, 1); - - if (hashParams.m_numOccupiedBlocks > 0) { - garbageCollectFreeKernel << <gridSize, blockSize >> >(hashData); - } -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -} diff --git a/applications/reconstruct/src/virtual_source.cpp b/applications/reconstruct/src/virtual_source.cpp index a64a857db224cd191899558d16873bf3c7267aff..73c80323b9a702964a18bbc7ce68e9c407873b06 100644 --- a/applications/reconstruct/src/virtual_source.cpp +++ b/applications/reconstruct/src/virtual_source.cpp @@ -64,16 +64,20 @@ bool VirtualSource::grab() { params.m_sensorDepthWorldMax = params_.maxDepth; // TODO(Nick) Use double precision pose here - rays_->render(scene_->getHashData(), scene_->getHashParams(), params, host_->getPose().cast<float>()); + rays_->render(scene_->getHashData(), scene_->getHashParams(), params, host_->getPose().cast<float>(), scene_->getIntegrationStream()); //unique_lock<mutex> lk(mutex_); if (rays_->isIntegerDepth()) { - rays_->getRayCastData().download((int*)idepth_.data, (uchar3*)rgb_.data, rays_->getRayCastParams()); + rays_->getRayCastData().download((int*)idepth_.data, (uchar3*)rgb_.data, rays_->getRayCastParams(), scene_->getIntegrationStream()); + + cudaSafeCall(cudaStreamSynchronize(scene_->getIntegrationStream())); idepth_.convertTo(depth_, CV_32FC1, 1.0f / 100.0f); } else { - rays_->getRayCastData().download((int*)depth_.data, (uchar3*)rgb_.data, rays_->getRayCastParams()); + rays_->getRayCastData().download((int*)depth_.data, (uchar3*)rgb_.data, rays_->getRayCastParams(), scene_->getIntegrationStream()); + cudaSafeCall(cudaStreamSynchronize(scene_->getIntegrationStream())); } } + return true; } diff --git a/applications/reconstruct/src/voxel_scene.cpp b/applications/reconstruct/src/voxel_scene.cpp index 445c6b9f637e815726edec4d4b3d550bcdb02260..eb46288581799d13f1a99859d357a0eed1bc7d66 100644 --- a/applications/reconstruct/src/voxel_scene.cpp +++ b/applications/reconstruct/src/voxel_scene.cpp @@ -1,5 +1,9 @@ #include <ftl/voxel_scene.hpp> #include "compactors.hpp" +#include "garbage.hpp" +#include "integrators.hpp" + +#include <opencv2/core/cuda_stream_accessor.hpp> using namespace ftl::voxhash; using ftl::rgbd::Source; @@ -9,17 +13,14 @@ using cv::Mat; #define SAFE_DELETE_ARRAY(a) { delete [] (a); (a) = NULL; } extern "C" void resetCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -extern "C" void resetHashBucketMutexCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -extern "C" void allocCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, const unsigned int* d_bitMask); +extern "C" void resetHashBucketMutexCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t); +extern "C" void allocCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t); //extern "C" void fillDecisionArrayCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData); //extern "C" void compactifyHashCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); //extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -extern "C" void integrateDepthMapCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams); +extern "C" void integrateDepthMapCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t); //extern "C" void bindInputDepthColorTextures(const DepthCameraData& depthCameraData); -extern "C" void starveVoxelsKernelCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -extern "C" void garbageCollectIdentifyCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -extern "C" void garbageCollectFreeCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); SceneRep::SceneRep(nlohmann::json &config) : Configurable(config), do_reset_(false), m_frameCount(0) { // Allocates voxel structure on GPU @@ -46,10 +47,14 @@ SceneRep::SceneRep(nlohmann::json &config) : Configurable(config), do_reset_(fal on("SDFMaxIntegrationDistance", [this](const ftl::config::Event &e) { m_hashParams.m_maxIntegrationDistance = value("SDFMaxIntegrationDistance", 10.0f); }); + + cudaSafeCall(cudaStreamCreate(&integ_stream_)); + //integ_stream_ = 0; } SceneRep::~SceneRep() { _destroy(); + cudaStreamDestroy(integ_stream_); } void SceneRep::addSource(ftl::rgbd::Source *src) { @@ -106,18 +111,23 @@ int SceneRep::upload() { cam.params.flags = m_frameCount; // Send to GPU and merge view into scene - cam.gpu.updateParams(cam.params); - cam.gpu.updateData(depth, rgba); + //cam.gpu.updateParams(cam.params); + cam.gpu.updateData(depth, rgba, cam.stream); setLastRigidTransform(input->getPose().cast<float>()); //make the rigid transform available on the GPU - m_hashData.updateParams(m_hashParams); + //m_hashData.updateParams(m_hashParams, cv::cuda::StreamAccessor::getStream(cam.stream)); + + //if (i > 0) cudaSafeCall(cudaStreamSynchronize(cv::cuda::StreamAccessor::getStream(cameras_[i-1].stream))); //allocate all hash blocks which are corresponding to depth map entries - _alloc(cam.gpu, cam.params, nullptr); + _alloc(cam.gpu, cam.params, cv::cuda::StreamAccessor::getStream(cam.stream)); } + // Must have finished all allocations and rendering before next integration + cudaSafeCall(cudaDeviceSynchronize()); + return active; } @@ -126,10 +136,10 @@ void SceneRep::integrate() { auto &cam = cameras_[i]; setLastRigidTransform(cam.source->getPose().cast<float>()); - m_hashData.updateParams(m_hashParams); + //m_hashData.updateParams(m_hashParams); //generate a linear hash array with only occupied entries - _compactifyVisible(); + _compactifyVisible(cam.params); //volumetrically integrate the depth data into the depth SDFBlocks _integrateDepthMap(cam.gpu, cam.params); @@ -144,6 +154,7 @@ void SceneRep::garbage() { _compactifyAllocated(); _garbageCollect(); + //cudaSafeCall(cudaStreamSynchronize(integ_stream_)); } /*void SceneRep::integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask) { @@ -189,7 +200,7 @@ void SceneRep::nextFrame() { _destroy(); _create(_parametersFromConfig()); } else { - starveVoxelsKernelCUDA(m_hashData, m_hashParams); + ftl::cuda::starveVoxels(m_hashData, m_hashParams); m_numIntegratedFrames = 0; } } @@ -356,14 +367,15 @@ void SceneRep::_destroy() { m_hashData.free(); } -void SceneRep::_alloc(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, const unsigned int* d_bitMask) { +void SceneRep::_alloc(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t stream) { // NOTE (nick): We might want this later... - if (true) { + if (false) { + // TODO(Nick) Make this work without memcpy to host first //allocate until all blocks are allocated unsigned int prevFree = getHeapFreeCount(); while (1) { - resetHashBucketMutexCUDA(m_hashData, m_hashParams); - allocCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams, d_bitMask); + resetHashBucketMutexCUDA(m_hashData, m_hashParams, stream); + allocCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams, stream); unsigned int currFree = getHeapFreeCount(); @@ -377,29 +389,29 @@ void SceneRep::_alloc(const DepthCameraData& depthCameraData, const DepthCameraP } else { //this version is faster, but it doesn't guarantee that all blocks are allocated (staggers alloc to the next frame) - resetHashBucketMutexCUDA(m_hashData, m_hashParams); - allocCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams, d_bitMask); + resetHashBucketMutexCUDA(m_hashData, m_hashParams, stream); + allocCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams, stream); } } -void SceneRep::_compactifyVisible() { //const DepthCameraData& depthCameraData) { - m_hashParams.m_numOccupiedBlocks = ftl::cuda::compactifyVisible(m_hashData, m_hashParams); //this version uses atomics over prefix sums, which has a much better performance - m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU +void SceneRep::_compactifyVisible(const DepthCameraParams &camera) { //const DepthCameraData& depthCameraData) { + ftl::cuda::compactifyVisible(m_hashData, m_hashParams, camera, integ_stream_); //this version uses atomics over prefix sums, which has a much better performance + //m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU } void SceneRep::_compactifyAllocated() { - m_hashParams.m_numOccupiedBlocks = ftl::cuda::compactifyAllocated(m_hashData, m_hashParams); //this version uses atomics over prefix sums, which has a much better performance - std::cout << "Occ blocks = " << m_hashParams.m_numOccupiedBlocks << std::endl; - m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU + ftl::cuda::compactifyAllocated(m_hashData, m_hashParams, integ_stream_); //this version uses atomics over prefix sums, which has a much better performance + //std::cout << "Occ blocks = " << m_hashParams.m_numOccupiedBlocks << std::endl; + //m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU } void SceneRep::_integrateDepthMap(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams) { - integrateDepthMapCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams); + ftl::cuda::integrateDepthMap(m_hashData, m_hashParams, depthCameraData, depthCameraParams, integ_stream_); } void SceneRep::_garbageCollect() { - garbageCollectIdentifyCUDA(m_hashData, m_hashParams); - resetHashBucketMutexCUDA(m_hashData, m_hashParams); //needed if linked lists are enabled -> for memeory deletion - garbageCollectFreeCUDA(m_hashData, m_hashParams); + ftl::cuda::garbageCollectIdentify(m_hashData, m_hashParams, integ_stream_); + resetHashBucketMutexCUDA(m_hashData, m_hashParams, integ_stream_); //needed if linked lists are enabled -> for memeory deletion + ftl::cuda::garbageCollectFree(m_hashData, m_hashParams, integ_stream_); }