diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt index 00e44a444ba45f6cb7b4f68ed2441a1c3ac4fd4c..ae06aab6d142ba8a2ad3a1e0970905e3a70468e9 100644 --- a/applications/reconstruct/CMakeLists.txt +++ b/applications/reconstruct/CMakeLists.txt @@ -19,6 +19,8 @@ set(REPSRC #src/virtual_source.cpp src/splat_render.cpp src/dibr.cu + src/depth_camera.cu + src/depth_camera.cpp ) add_executable(ftl-reconstruct ${REPSRC}) diff --git a/applications/reconstruct/src/depth_camera.cpp b/applications/reconstruct/src/depth_camera.cpp index dd04da7738c60aaf44b208d03bf76272cc9a4c87..93d6ab2b44592fbfc859428443922cfcaa304454 100644 --- a/applications/reconstruct/src/depth_camera.cpp +++ b/applications/reconstruct/src/depth_camera.cpp @@ -1,9 +1,9 @@ #include <ftl/depth_camera.hpp> +#include "depth_camera_cuda.hpp" +#include <opencv2/core/cuda_stream_accessor.hpp> using ftl::voxhash::DepthCamera; - -extern "C" void computeNormals(float4* d_output, float3* d_input, unsigned int width, unsigned int height); -extern "C" void convertDepthFloatToCameraSpaceFloat3(float3* d_output, float* d_input, float4x4 intrinsicsInv, unsigned int width, unsigned int height, const DepthCameraData& depthCameraData); +using ftl::voxhash::DepthCameraCUDA; DepthCamera::DepthCamera() { depth_mat_ = nullptr; @@ -15,7 +15,7 @@ DepthCamera::DepthCamera() { normal_tex_ = nullptr; } -void DepthCamera::alloc(const DepthCameraParams& params, bool withNormals=false) { //! todo resizing??? +void DepthCamera::alloc(const DepthCameraParams& params, bool withNormals) { //! todo resizing??? depth_mat_ = new cv::cuda::GpuMat(params.m_imageHeight, params.m_imageWidth, CV_32FC1); colour_mat_ = new cv::cuda::GpuMat(params.m_imageHeight, params.m_imageWidth, CV_8UC4); depth_tex_ = new ftl::cuda::TextureObject<float>((cv::cuda::PtrStepSz<float>)*depth_mat_); @@ -53,6 +53,6 @@ void DepthCamera::updateData(const cv::Mat &depth, const cv::Mat &rgb, cv::cuda: } void DepthCamera::_computeNormals(cudaStream_t stream) { - convertDepthFloatToCameraSpaceFloat3(m_data.d_depth3, m_data.d_depth, m_params.m_intrinsicsInverse, m_params.m_width, m_params.m_height, cameraData); - computeNormals(m_data.d_normals, m_data.d_depth3, m_params.m_width, m_params.m_height); + ftl::cuda::point_cloud((float3*)point_mat_->data, data, stream); + ftl::cuda::compute_normals((float3*)point_mat_->data, normal_tex_, stream); } diff --git a/applications/reconstruct/src/depth_camera.cu b/applications/reconstruct/src/depth_camera.cu new file mode 100644 index 0000000000000000000000000000000000000000..9234504e2d78b1cbf75563693b119b7975b2481e --- /dev/null +++ b/applications/reconstruct/src/depth_camera.cu @@ -0,0 +1,84 @@ +#include <ftl/cuda_common.hpp> +#include <ftl/cuda_util.hpp> +#include <ftl/depth_camera.hpp> +#include "depth_camera_cuda.hpp" + +#define T_PER_BLOCK 16 +#define MINF __int_as_float(0xff800000) + +using ftl::voxhash::DepthCameraCUDA; + +__global__ void point_cloud_kernel(float3* output, DepthCameraCUDA depthCameraData) +{ + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + const int width = depthCameraData.params.m_imageWidth; + const int height = depthCameraData.params.m_imageHeight; + + if (x < width && y < height) { + float depth = tex2D<float>(depthCameraData.depth, x, y); + + output[y*width+x] = (depth >= depthCameraData.params.m_sensorDepthWorldMin && depth <= depthCameraData.params.m_sensorDepthWorldMax) ? + depthCameraData.params.kinectDepthToSkeleton(x, y, depth) : + make_float3(MINF, MINF, MINF); + } +} + +void ftl::cuda::point_cloud(float3* output, const DepthCameraCUDA &depthCameraData, cudaStream_t stream) { + const dim3 gridSize((depthCameraData.params.m_imageWidth + T_PER_BLOCK - 1)/T_PER_BLOCK, (depthCameraData.params.m_imageHeight + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + point_cloud_kernel<<<gridSize, blockSize, 0, stream>>>(output, depthCameraData); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif +} + +/// ===== NORMALS ===== + +__global__ void compute_normals_kernel(const float3 *input, ftl::cuda::TextureObject<float4> output) +{ + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + const int width = output.width(); + + if(x >= output.width() || y >= output.height()) return; + + output(x,y) = make_float4(MINF, MINF, MINF, MINF); + + if(x > 0 && x < output.width()-1 && y > 0 && y < output.height()-1) + { + // TODO:(Nick) Should use a 7x7 window + const float3 CC = input[(y+0)*width+(x+0)]; + const float3 PC = input[(y+1)*width+(x+0)]; + const float3 CP = input[(y+0)*width+(x+1)]; + const float3 MC = input[(y-1)*width+(x+0)]; + const float3 CM = input[(y+0)*width+(x-1)]; + + if(CC.x != MINF && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) + { + const float3 n = cross(PC-MC, CP-CM); + const float l = length(n); + + if(l > 0.0f) + { + output(x,y) = make_float4(n/-l, 1.0f); + } + } + } +} + +void ftl::cuda::compute_normals(const float3 *input, ftl::cuda::TextureObject<float4> *output, cudaStream_t stream) { + const dim3 gridSize((output->width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (output->height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + compute_normals_kernel<<<gridSize, blockSize, 0, stream>>>(input, *output); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + //cutilCheckMsg(__FUNCTION__); +#endif +} \ No newline at end of file diff --git a/applications/reconstruct/src/depth_camera_cuda.hpp b/applications/reconstruct/src/depth_camera_cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..05a83a473a7a36ccf6a16fc8c699d63046eea3b1 --- /dev/null +++ b/applications/reconstruct/src/depth_camera_cuda.hpp @@ -0,0 +1,17 @@ +#ifndef _FTL_RECONSTRUCTION_CAMERA_CUDA_HPP_ +#define _FTL_RECONSTRUCTION_CAMERA_CUDA_HPP_ + +#include <ftl/depth_camera.hpp> +#include <ftl/voxel_hash.hpp> + +namespace ftl { +namespace cuda { + +void point_cloud(float3* output, const ftl::voxhash::DepthCameraCUDA &depthCameraData, cudaStream_t stream); + +void compute_normals(const float3 *points, ftl::cuda::TextureObject<float4> *normals, cudaStream_t stream); + +} +} + +#endif // _FTL_RECONSTRUCTION_CAMERA_CUDA_HPP_ diff --git a/applications/reconstruct/src/integrators.cu b/applications/reconstruct/src/integrators.cu index ce51c185dc9a6ff105a69fc6818f8db421e95433..984fd29dce023bd1491c2637b73b98accaf303d9 100644 --- a/applications/reconstruct/src/integrators.cu +++ b/applications/reconstruct/src/integrators.cu @@ -42,6 +42,8 @@ __device__ bool colordiff(const uchar4 &pa, const uchar3 &pb, float epsilon) { /* * Guennebaud, G.; Gross, M. Algebraic point set surfaces. ACMTransactions on Graphics Vol. 26, No. 3, Article No. 23, 2007. * Used in: FusionMLS: Highly dynamic 3D reconstruction with consumer-grade RGB-D cameras + * r = distance between points + * h = smoothing parameter in meters (default 4cm) */ __device__ float spatialWeighting(float r) { const float h = c_hashParams.m_spatialSmoothing; @@ -53,73 +55,6 @@ __device__ float spatialWeighting(float r) { -/*__global__ void integrateRegistrationKernel(HashData hashData, HashParams hashParams, DepthCameraParams cameraParams, cudaTextureObject_t depthT, cudaTextureObject_t colourT) { - - // Stride over all allocated blocks - for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) { - - //TODO check if we should load this in shared memory - HashEntry& entry = hashData.d_hashCompactified[bi]; - - - int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(entry.pos); - - uint i = threadIdx.x; //inside of an SDF block - int3 pi = pi_base + make_int3(hashData.delinearizeVoxelIndex(i)); - float3 pf = hashData.virtualVoxelPosToWorld(pi); - - pf = hashParams.m_rigidTransformInverse * pf; - uint2 screenPos = make_uint2(cameraParams.cameraToKinectScreenInt(pf)); - - // For this voxel in hash, get its screen position and check it is on screen - if (screenPos.x < cameraParams.m_imageWidth && screenPos.y < cameraParams.m_imageHeight) { //on screen - - //float depth = g_InputDepth[screenPos]; - float depth = tex2D<float>(depthT, screenPos.x, screenPos.y); - //if (depth > 20.0f) return; - - uchar4 color = make_uchar4(0, 0, 0, 0); - color = tex2D<uchar4>(colourT, screenPos.x, screenPos.y); - - // Depth is within accepted max distance from camera - if (depth > 0.01f && depth < hashParams.m_maxIntegrationDistance) { // valid depth and color (Nick: removed colour check) - float depthZeroOne = cameraParams.cameraToKinectProjZ(depth); - - // Calculate SDF of this voxel wrt the depth map value - float sdf = depth - pf.z; - float truncation = hashData.getTruncation(depth); - - if (sdf > -truncation) { - float weightUpdate = max(hashParams.m_integrationWeightSample * 1.5f * (1.0f-depthZeroOne), 1.0f); - - Voxel curr; //construct current voxel - curr.sdf = sdf; - curr.weight = weightUpdate; - curr.color = make_uchar3(color.x, color.y, color.z); - - uint idx = entry.ptr + i; - - Voxel out; - const Voxel &v1 = curr; - const Voxel &v0 = hashData.d_SDFBlocks[idx]; - - float redshift = (v0.weight > 0) ? 1.0f - ((v1.sdf - v0.sdf) / hashParams.m_truncation)*0.5f : 1.0f; - - out.color.x = min(max(v1.color.x*redshift,0.0f),255.0f); - out.color.y = min(max(v1.color.y*redshift,0.0f),255.0f); - out.color.z = min(max(v1.color.z*(1.0f / redshift),0.0f),255.0f); - - out.sdf = (v0.sdf * (float)v0.weight + v1.sdf * (float)v1.weight) / ((float)v0.weight + (float)v1.weight); - out.weight = min(c_hashParams.m_integrationWeightMax, (unsigned int)v0.weight + (unsigned int)v1.weight); - - hashData.d_SDFBlocks[idx] = out; - - } - } - } - - } // Stride loop -}*/ __global__ void integrateDepthMapsKernel(HashData hashData, HashParams hashParams, int numcams) { __shared__ uint all_warp_ballot;