diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt index dcee7afa0147378ffaabd91263e200f8fe284c98..a2d3b43e33e3798532f494d576c694e8a409a99b 100644 --- a/applications/reconstruct/CMakeLists.txt +++ b/applications/reconstruct/CMakeLists.txt @@ -15,8 +15,8 @@ set(REPSRC #src/mls.cu #src/depth_camera.cu #src/depth_camera.cpp - src/ilw.cpp - src/ilw.cu + src/ilw/ilw.cpp + src/ilw/ilw.cu ) add_executable(ftl-reconstruct ${REPSRC}) diff --git a/applications/reconstruct/src/depth_camera.cpp b/applications/reconstruct/src/depth_camera.cpp deleted file mode 100644 index de310f592b303dbcf7d041087863bf6c76db9305..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/depth_camera.cpp +++ /dev/null @@ -1,55 +0,0 @@ -#include <loguru.hpp> -#include <ftl/depth_camera.hpp> -#include "depth_camera_cuda.hpp" -#include <opencv2/core/cuda_stream_accessor.hpp> - -using ftl::voxhash::DepthCamera; -using ftl::voxhash::DepthCameraCUDA; - -DepthCamera::DepthCamera() { - depth_tex_ = nullptr; - depth2_tex_ = nullptr; - points_tex_ = nullptr; - colour_tex_ = nullptr; - normal_tex_ = nullptr; -} - -void DepthCamera::alloc(const DepthCameraParams& params, bool withNormals) { //! todo resizing??? - depth_tex_ = new ftl::cuda::TextureObject<float>(params.m_imageWidth, params.m_imageHeight); - depth2_tex_ = new ftl::cuda::TextureObject<int>(params.m_imageWidth, params.m_imageHeight); - points_tex_ = new ftl::cuda::TextureObject<float4>(params.m_imageWidth, params.m_imageHeight); - colour_tex_ = new ftl::cuda::TextureObject<uchar4>(params.m_imageWidth, params.m_imageHeight); - data.depth = depth_tex_->cudaTexture(); - data.depth2 = depth2_tex_->cudaTexture(); - data.points = points_tex_->cudaTexture(); - data.colour = colour_tex_->cudaTexture(); - data.params = params; - - //if (withNormals) { - normal_tex_ = new ftl::cuda::TextureObject<float4>(params.m_imageWidth, params.m_imageHeight); - data.normal = normal_tex_->cudaTexture(); - //} else { - // data.normal = 0; - //} -} - -void DepthCamera::free() { - delete depth_tex_; - delete colour_tex_; - delete depth2_tex_; - delete points_tex_; - if (normal_tex_) delete normal_tex_; -} - -void DepthCamera::updateData(const cv::Mat &depth, const cv::Mat &rgb, cv::cuda::Stream &stream) { - depth_tex_->upload(depth, cv::cuda::StreamAccessor::getStream(stream)); - colour_tex_->upload(rgb, cv::cuda::StreamAccessor::getStream(stream)); - //if (normal_mat_) { - //computeNormals(cv::cuda::StreamAccessor::getStream(stream)); - //} -} - -void DepthCamera::computeNormals(cudaStream_t stream) { - //ftl::cuda::point_cloud(*points_tex_, data, stream); - ftl::cuda::compute_normals(*depth_tex_, *normal_tex_, data, stream); -} diff --git a/applications/reconstruct/src/depth_camera.cu b/applications/reconstruct/src/depth_camera.cu deleted file mode 100644 index 7a322eaf733230772e72c50722e849335bf7e891..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/depth_camera.cu +++ /dev/null @@ -1,355 +0,0 @@ -#include <ftl/cuda_common.hpp> -#include <ftl/cuda_util.hpp> -#include <ftl/depth_camera.hpp> -#include "depth_camera_cuda.hpp" - -#define T_PER_BLOCK 16 -#define MINF __int_as_float(0xff800000) - -using ftl::voxhash::DepthCameraCUDA; -using ftl::voxhash::HashData; -using ftl::voxhash::HashParams; -using ftl::cuda::TextureObject; -using ftl::render::SplatParams; - -extern __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS]; -extern __constant__ HashParams c_hashParams; - -__global__ void clear_depth_kernel(ftl::cuda::TextureObject<float> depth) { - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < depth.width() && y < depth.height()) { - depth(x,y) = 1000.0f; //PINF; - //colour(x,y) = make_uchar4(76,76,82,0); - } -} - -void ftl::cuda::clear_depth(const ftl::cuda::TextureObject<float> &depth, cudaStream_t stream) { - const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK); - clear_depth_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth); -} - -__global__ void clear_depth_kernel(ftl::cuda::TextureObject<int> depth) { - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < depth.width() && y < depth.height()) { - depth(x,y) = 0x7FFFFFFF; //PINF; - //colour(x,y) = make_uchar4(76,76,82,0); - } -} - -void ftl::cuda::clear_depth(const ftl::cuda::TextureObject<int> &depth, cudaStream_t stream) { - const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK); - clear_depth_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth); -} - -__global__ void clear_to_zero_kernel(ftl::cuda::TextureObject<float> depth) { - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < depth.width() && y < depth.height()) { - depth(x,y) = 0.0f; //PINF; - } -} - -void ftl::cuda::clear_to_zero(const ftl::cuda::TextureObject<float> &depth, cudaStream_t stream) { - const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK); - clear_to_zero_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth); -} - -__global__ void clear_points_kernel(ftl::cuda::TextureObject<float4> depth) { - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < depth.width() && y < depth.height()) { - depth(x,y) = make_float4(MINF,MINF,MINF,MINF); - //colour(x,y) = make_uchar4(76,76,82,0); - } -} - -void ftl::cuda::clear_points(const ftl::cuda::TextureObject<float4> &depth, cudaStream_t stream) { - const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK); - clear_points_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth); -} - -__global__ void clear_colour_kernel(ftl::cuda::TextureObject<uchar4> depth) { - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < depth.width() && y < depth.height()) { - depth(x,y) = make_uchar4(76,76,76,76); - //colour(x,y) = make_uchar4(76,76,82,0); - } -} - -void ftl::cuda::clear_colour(const ftl::cuda::TextureObject<uchar4> &depth, cudaStream_t stream) { - const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK); - clear_colour_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth); -} - -__global__ void clear_colour_kernel(ftl::cuda::TextureObject<float4> depth) { - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < depth.width() && y < depth.height()) { - depth(x,y) = make_float4(0.0f); - } -} - -void ftl::cuda::clear_colour(const ftl::cuda::TextureObject<float4> &depth, cudaStream_t stream) { - const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK); - clear_colour_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth); -} - -// ===== Type convert ===== - -template <typename A, typename B> -__global__ void convert_kernel(const ftl::cuda::TextureObject<A> in, ftl::cuda::TextureObject<B> out, float scale) { - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < in.width() && y < in.height()) { - out(x,y) = ((float)in.tex2D((int)x,(int)y)) * scale; - } -} - -void ftl::cuda::float_to_int(const ftl::cuda::TextureObject<float> &in, ftl::cuda::TextureObject<int> &out, float scale, cudaStream_t stream) { - const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - convert_kernel<float,int><<<gridSize, blockSize, 0, stream>>>(in, out, scale); -} - -void ftl::cuda::int_to_float(const ftl::cuda::TextureObject<int> &in, ftl::cuda::TextureObject<float> &out, float scale, cudaStream_t stream) { - const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - convert_kernel<int,float><<<gridSize, blockSize, 0, stream>>>(in, out, scale); -} - -/// ===== Median Filter ====== - -#define WINDOW_SIZE 3 -#define MEDIAN_RADIUS 3 -#define MEDIAN_SIZE (((MEDIAN_RADIUS*2)+1)*((MEDIAN_RADIUS*2)+1)) - -__global__ void medianFilterKernel(TextureObject<int> inputImageKernel, TextureObject<float> outputImagekernel) -{ - // Set row and colum for thread. - int row = blockIdx.y * blockDim.y + threadIdx.y; - int col = blockIdx.x * blockDim.x + threadIdx.x; - int filterVector[MEDIAN_SIZE] = {0}; //Take fiter window - if((row<=MEDIAN_RADIUS) || (col<=MEDIAN_RADIUS) || (row>=inputImageKernel.height()-MEDIAN_RADIUS) || (col>=inputImageKernel.width()-MEDIAN_RADIUS)) - outputImagekernel(col, row) = 0.0f; //Deal with boundry conditions - else { - for (int v = -MEDIAN_RADIUS; v <= MEDIAN_RADIUS; v++) { - for (int u = -MEDIAN_RADIUS; u <= MEDIAN_RADIUS; u++){ - filterVector[(v+MEDIAN_RADIUS)*(2*MEDIAN_RADIUS+1)+u+MEDIAN_RADIUS] = inputImageKernel((col+u), (row+v)); // setup the filterign window. - } - } - for (int i = 0; i < MEDIAN_SIZE; i++) { - for (int j = i + 1; j < MEDIAN_SIZE; j++) { - if (filterVector[i] > filterVector[j]) { - //Swap the variables. - char tmp = filterVector[i]; - filterVector[i] = filterVector[j]; - filterVector[j] = tmp; - } - } - } - outputImagekernel(col, row) = (float)filterVector[MEDIAN_SIZE/2+1] / 1000.0f; //Set the output variables. - } -} - -void ftl::cuda::median_filter(const ftl::cuda::TextureObject<int> &in, ftl::cuda::TextureObject<float> &out, cudaStream_t stream) { - const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - medianFilterKernel<<<gridSize, blockSize, 0, stream>>>(in, out); -} - - -/// ===== Hole Fill ===== - -__device__ inline float distance2(float3 a, float3 b) { - const float x = a.x-b.x; - const float y = a.y-b.y; - const float z = a.z-b.z; - return x*x+y*y+z*z; -} - -#define SPLAT_RADIUS 7 -#define SPLAT_BOUNDS (2*SPLAT_RADIUS+T_PER_BLOCK+1) -#define SPLAT_BUFFER_SIZE (SPLAT_BOUNDS*SPLAT_BOUNDS) - -__global__ void hole_fill_kernel( - TextureObject<int> depth_in, - TextureObject<float> depth_out, DepthCameraParams params) { - // Read an NxN region and - // - interpolate a depth value for this pixel - // - interpolate an rgb value for this pixel - // Must respect depth discontinuities. - // How much influence a given neighbour has depends on its depth value - - __shared__ float3 positions[SPLAT_BUFFER_SIZE]; - - const float voxelSize = c_hashParams.m_virtualVoxelSize; - - const int i = threadIdx.y*blockDim.y + threadIdx.x; - const int bx = blockIdx.x*blockDim.x; - const int by = blockIdx.y*blockDim.y; - const int x = bx + threadIdx.x; - const int y = by + threadIdx.y; - - // const float camMinDepth = params.camera.m_sensorDepthWorldMin; - // const float camMaxDepth = params.camera.m_sensorDepthWorldMax; - - for (int j=i; j<SPLAT_BUFFER_SIZE; j+=T_PER_BLOCK) { - const unsigned int sx = (j % SPLAT_BOUNDS)+bx-SPLAT_RADIUS; - const unsigned int sy = (j / SPLAT_BOUNDS)+by-SPLAT_RADIUS; - if (sx >= depth_in.width() || sy >= depth_in.height()) { - positions[j] = make_float3(1000.0f,1000.0f, 1000.0f); - } else { - positions[j] = params.kinectDepthToSkeleton(sx, sy, (float)depth_in.tex2D((int)sx,(int)sy) / 1000.0f); - } - } - - __syncthreads(); - - if (x >= depth_in.width() || y >= depth_in.height()) return; - - const float voxelSquared = voxelSize*voxelSize; - float mindepth = 1000.0f; - //int minidx = -1; - // float3 minpos; - - //float3 validPos[MAX_VALID]; - //int validIndices[MAX_VALID]; - //int validix = 0; - - for (int v=-SPLAT_RADIUS; v<=SPLAT_RADIUS; ++v) { - for (int u=-SPLAT_RADIUS; u<=SPLAT_RADIUS; ++u) { - //const int idx = (threadIdx.y+v)*SPLAT_BOUNDS+threadIdx.x+u; - const int idx = (threadIdx.y+v+SPLAT_RADIUS)*SPLAT_BOUNDS+threadIdx.x+u+SPLAT_RADIUS; - - float3 posp = positions[idx]; - const float d = posp.z; - //if (d < camMinDepth || d > camMaxDepth) continue; - - float3 pos = params.kinectDepthToSkeleton(x, y, d); - float dist = distance2(pos, posp); - - if (dist < voxelSquared) { - // Valid so check for minimum - //validPos[validix] = pos; - //validIndices[validix++] = idx; - if (d < mindepth) { - mindepth = d; - //minidx = idx; - // minpos = pos; - } - } - } - } - - depth_out(x,y) = mindepth; -} - -void ftl::cuda::hole_fill(const TextureObject<int> &depth_in, - const TextureObject<float> &depth_out, const DepthCameraParams ¶ms, cudaStream_t stream) -{ - - const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - hole_fill_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, params); - cudaSafeCall( cudaGetLastError() ); - - #ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - #endif -} - - -/// ===== Point cloud from depth ===== - -__global__ void point_cloud_kernel(ftl::cuda::TextureObject<float4> output, DepthCameraCUDA depthCameraData) -{ - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - const int width = depthCameraData.params.m_imageWidth; - const int height = depthCameraData.params.m_imageHeight; - - if (x < width && y < height) { - float depth = tex2D<float>(depthCameraData.depth, x, y); - - output(x,y) = (depth >= depthCameraData.params.m_sensorDepthWorldMin && depth <= depthCameraData.params.m_sensorDepthWorldMax) ? - make_float4(depthCameraData.pose * depthCameraData.params.kinectDepthToSkeleton(x, y, depth), 0.0f) : - make_float4(MINF, MINF, MINF, MINF); - } -} - -void ftl::cuda::point_cloud(ftl::cuda::TextureObject<float4> &output, const DepthCameraCUDA &depthCameraData, cudaStream_t stream) { - const dim3 gridSize((depthCameraData.params.m_imageWidth + T_PER_BLOCK - 1)/T_PER_BLOCK, (depthCameraData.params.m_imageHeight + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - point_cloud_kernel<<<gridSize, blockSize, 0, stream>>>(output, depthCameraData); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); -#endif -} - -/// ===== NORMALS ===== - - -__global__ void compute_normals_kernel(const ftl::cuda::TextureObject<float> input, ftl::cuda::TextureObject<float4> output, DepthCameraCUDA camera) -{ - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - const int width = output.width(); - - if(x >= output.width() || y >= output.height()) return; - - output(x,y) = make_float4(MINF, MINF, MINF, MINF); - - if(x > 0 && x < output.width()-1 && y > 0 && y < output.height()-1) - { - const float3 CC = camera.pose * camera.params.kinectDepthToSkeleton(x,y,input(x,y)); //input[(y+0)*width+(x+0)]; - const float3 PC = camera.pose * camera.params.kinectDepthToSkeleton(x,y,input(x,y+1)); //input[(y+1)*width+(x+0)]; - const float3 CP = camera.pose * camera.params.kinectDepthToSkeleton(x,y,input(x+1,y)); //input[(y+0)*width+(x+1)]; - const float3 MC = camera.pose * camera.params.kinectDepthToSkeleton(x,y,input(x,y-1)); //input[(y-1)*width+(x+0)]; - const float3 CM = camera.pose * camera.params.kinectDepthToSkeleton(x,y,input(x-1,y)); //input[(y+0)*width+(x-1)]; - - if(CC.x != MINF && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) - { - const float3 n = cross(PC-MC, CP-CM); - const float l = length(n); - - if(l > 0.0f) - { - //if (x == 400 && y == 200) printf("Cam NORMX: %f\n", (n/-l).x); - output(x,y) = make_float4(n.x/-l, n.y/-l, n.z/-l, 0.0f); //make_float4(n/-l, 1.0f); - } - } - } -} - -void ftl::cuda::compute_normals(const ftl::cuda::TextureObject<float> &input, ftl::cuda::TextureObject<float4> &output, const DepthCameraCUDA &camera, cudaStream_t stream) { - const dim3 gridSize((output.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (output.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - compute_normals_kernel<<<gridSize, blockSize, 0, stream>>>(input, output, camera); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -} \ No newline at end of file diff --git a/applications/reconstruct/src/depth_camera_cuda.hpp b/applications/reconstruct/src/depth_camera_cuda.hpp deleted file mode 100644 index 26bfcad73f5ae72f20cfe2dd29d0e91c62fca62b..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/depth_camera_cuda.hpp +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef _FTL_RECONSTRUCTION_CAMERA_CUDA_HPP_ -#define _FTL_RECONSTRUCTION_CAMERA_CUDA_HPP_ - -#include <ftl/depth_camera.hpp> -#include <ftl/voxel_hash.hpp> -#include "splat_params.hpp" - -namespace ftl { -namespace cuda { - -void clear_depth(const TextureObject<float> &depth, cudaStream_t stream); -void clear_depth(const TextureObject<int> &depth, cudaStream_t stream); -void clear_to_zero(const ftl::cuda::TextureObject<float> &depth, cudaStream_t stream); -void clear_points(const ftl::cuda::TextureObject<float4> &depth, cudaStream_t stream); -void clear_colour(const ftl::cuda::TextureObject<uchar4> &depth, cudaStream_t stream); -void clear_colour(const ftl::cuda::TextureObject<float4> &depth, cudaStream_t stream); - -void median_filter(const ftl::cuda::TextureObject<int> &in, ftl::cuda::TextureObject<float> &out, cudaStream_t stream); - -void int_to_float(const ftl::cuda::TextureObject<int> &in, ftl::cuda::TextureObject<float> &out, float scale, cudaStream_t stream); - -void float_to_int(const ftl::cuda::TextureObject<float> &in, ftl::cuda::TextureObject<int> &out, float scale, cudaStream_t stream); - -void mls_smooth(TextureObject<float4> &output, const ftl::voxhash::HashParams &hashParams, int numcams, int cam, cudaStream_t stream); - -void mls_render_depth(const TextureObject<int> &input, TextureObject<int> &output, const ftl::render::SplatParams ¶ms, int numcams, cudaStream_t stream); - -void hole_fill(const TextureObject<int> &depth_in, const TextureObject<float> &depth_out, const DepthCameraParams ¶ms, cudaStream_t stream); - -void point_cloud(ftl::cuda::TextureObject<float4> &output, const ftl::voxhash::DepthCameraCUDA &depthCameraData, cudaStream_t stream); - -void compute_normals(const ftl::cuda::TextureObject<float> &depth, ftl::cuda::TextureObject<float4> &normals, const ftl::voxhash::DepthCameraCUDA &camera, cudaStream_t stream); - -} -} - -#endif // _FTL_RECONSTRUCTION_CAMERA_CUDA_HPP_ diff --git a/applications/reconstruct/src/ilw.cpp b/applications/reconstruct/src/ilw.cpp index 86a4cca5e4f82047ed6591a137b694788da879eb..c1bf8499aea122e58723bad6de434ebe4a37cd26 100644 --- a/applications/reconstruct/src/ilw.cpp +++ b/applications/reconstruct/src/ilw.cpp @@ -66,6 +66,11 @@ bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { f.createTexture<float4>(Channel::EnergyVector, Format<float4>(f.get<GpuMat>(Channel::Colour).size())); f.createTexture<float>(Channel::Energy, Format<float>(f.get<GpuMat>(Channel::Colour).size())); f.createTexture<uchar4>(Channel::Colour); + + cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + + f.get<GpuMat>(Channel::EnergyVector).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream); + f.get<GpuMat>(Channel::Energy).setTo(cv::Scalar(0.0f), cvstream); } return true; diff --git a/applications/reconstruct/src/ilw.cu b/applications/reconstruct/src/ilw.cu index 90133a3a57800ee87a91fd50902deea5f701258a..999b5ec9031eed08fc4bc527471961c3236d7445 100644 --- a/applications/reconstruct/src/ilw.cu +++ b/applications/reconstruct/src/ilw.cu @@ -30,9 +30,14 @@ __global__ void correspondence_energy_vector_kernel( const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; const int y = blockIdx.y*blockDim.y + threadIdx.y; - const float3 world1 = make_float3(p1.tex2D(x, y)); + const float3 world1 = make_float3(p1.tex2D(x, y)); + if (world1.x == MINF) { + vout(x,y) = make_float4(0.0f); + eout(x,y) = 0.0f; + return; + } const float3 camPos2 = pose2 * world1; - const uint2 screen2 = cam2.camToScreen<uint2>(camPos2); + const uint2 screen2 = cam2.camToScreen<uint2>(camPos2); const int upsample = 8; @@ -43,12 +48,11 @@ __global__ void correspondence_energy_vector_kernel( const float u = (i % upsample) - (upsample / 2); const float v = (i / upsample) - (upsample / 2); - const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v)); + const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v)); + if (world2.x == MINF) continue; // Determine degree of correspondence const float confidence = 1.0f / length(world1 - world2); - - printf("conf %f\n", confidence); const float maxconf = warpMax(confidence); // This thread has best confidence value diff --git a/applications/reconstruct/src/ilw/ilw.cpp b/applications/reconstruct/src/ilw/ilw.cpp new file mode 100644 index 0000000000000000000000000000000000000000..65e1a19dfe35e89019f5b3aa9dfbe85310b03f33 --- /dev/null +++ b/applications/reconstruct/src/ilw/ilw.cpp @@ -0,0 +1,223 @@ +#include "ilw.hpp" +#include <ftl/utility/matrix_conversion.hpp> +#include <ftl/rgbd/source.hpp> +#include <ftl/cuda/points.hpp> +#include <loguru.hpp> + +#include "ilw_cuda.hpp" + +using ftl::ILW; +using ftl::detail::ILWData; +using ftl::rgbd::Channel; +using ftl::rgbd::Channels; +using ftl::rgbd::Format; +using cv::cuda::GpuMat; + +ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) { + enabled_ = value("ilw_align", true); + iterations_ = value("iterations", 1); + motion_rate_ = value("motion_rate", 0.4f); + motion_window_ = value("motion_window", 3); + use_lab_ = value("use_Lab", false); + params_.colour_smooth = value("colour_smooth", 50.0f); + params_.spatial_smooth = value("spatial_smooth", 0.04f); + params_.cost_ratio = value("cost_ratio", 0.75f); + + on("ilw_align", [this](const ftl::config::Event &e) { + enabled_ = value("ilw_align", true); + }); + + on("iterations", [this](const ftl::config::Event &e) { + iterations_ = value("iterations", 1); + }); + + on("motion_rate", [this](const ftl::config::Event &e) { + motion_rate_ = value("motion_rate", 0.4f); + }); + + on("motion_window", [this](const ftl::config::Event &e) { + motion_window_ = value("motion_window", 3); + }); + + on("use_Lab", [this](const ftl::config::Event &e) { + use_lab_ = value("use_Lab", false); + }); + + on("colour_smooth", [this](const ftl::config::Event &e) { + params_.colour_smooth = value("colour_smooth", 50.0f); + }); + + on("spatial_smooth", [this](const ftl::config::Event &e) { + params_.spatial_smooth = value("spatial_smooth", 0.04f); + }); + + on("cost_ratio", [this](const ftl::config::Event &e) { + params_.cost_ratio = value("cost_ratio", 75.0f); + }); + + params_.flags = 0; + if (value("ignore_bad", false)) params_.flags |= ftl::cuda::kILWFlag_IgnoreBad; + if (value("ignore_bad_colour", false)) params_.flags |= ftl::cuda::kILWFlag_SkipBadColour; + if (value("restrict_z", true)) params_.flags |= ftl::cuda::kILWFlag_RestrictZ; + + on("ignore_bad", [this](const ftl::config::Event &e) { + if (value("ignore_bad", false)) params_.flags |= ftl::cuda::kILWFlag_IgnoreBad; + else params_.flags &= ~ftl::cuda::kILWFlag_IgnoreBad; + }); + + on("ignore_bad_colour", [this](const ftl::config::Event &e) { + if (value("ignore_bad_colour", false)) params_.flags |= ftl::cuda::kILWFlag_SkipBadColour; + else params_.flags &= ~ftl::cuda::kILWFlag_SkipBadColour; + }); + + on("restrict_z", [this](const ftl::config::Event &e) { + if (value("restrict_z", false)) params_.flags |= ftl::cuda::kILWFlag_RestrictZ; + else params_.flags &= ~ftl::cuda::kILWFlag_RestrictZ; + }); +} + +ILW::~ILW() { + +} + +bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { + if (!enabled_) return false; + + _phase0(fs, stream); + + for (int i=0; i<iterations_; ++i) { + int win; + switch (i) { + case 0: win = 17; break; + case 1: win = 9; break; + default: win = 5; break; + } + _phase1(fs, win, stream); + //for (int j=0; j<3; ++j) { + _phase2(fs, motion_rate_, stream); + //} + + // TODO: Break if no time left + } + + return true; +} + +bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { + // Make points channel... + for (size_t i=0; i<fs.frames.size(); ++i) { + auto &f = fs.frames[i]; + auto *s = fs.sources[i]; + + if (f.empty(Channel::Depth + Channel::Colour)) { + LOG(ERROR) << "Missing required channel"; + continue; + } + + auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size())); + auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse()); + ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, stream); + + // TODO: Create energy vector texture and clear it + // Create energy and clear it + + // Convert colour from BGR to BGRA if needed + if (f.get<GpuMat>(Channel::Colour).type() == CV_8UC3) { + cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + // Convert to 4 channel colour + auto &col = f.get<GpuMat>(Channel::Colour); + GpuMat tmp(col.size(), CV_8UC4); + cv::cuda::swap(col, tmp); + if (use_lab_) cv::cuda::cvtColor(tmp,tmp, cv::COLOR_BGR2Lab, 0, cvstream); + cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA, 0, cvstream); + } + + f.createTexture<float4>(Channel::EnergyVector, Format<float4>(f.get<GpuMat>(Channel::Colour).size())); + f.createTexture<float>(Channel::Energy, Format<float>(f.get<GpuMat>(Channel::Colour).size())); + f.createTexture<uchar4>(Channel::Colour); + } + + return true; +} + +bool ILW::_phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream) { + // Run correspondence kernel to create an energy vector + cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + + // For each camera combination + for (size_t i=0; i<fs.frames.size(); ++i) { + auto &f1 = fs.frames[i]; + f1.get<GpuMat>(Channel::EnergyVector).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream); + f1.get<GpuMat>(Channel::Energy).setTo(cv::Scalar(0.0f), cvstream); + + Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0); + d1 = fs.sources[i]->getPose() * d1; + + for (size_t j=0; j<fs.frames.size(); ++j) { + if (i == j) continue; + + //LOG(INFO) << "Running phase1"; + + auto &f2 = fs.frames[j]; + auto s1 = fs.sources[i]; + auto s2 = fs.sources[j]; + + // Are cameras facing similar enough direction? + Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0); + d2 = fs.sources[j]->getPose() * d2; + // No, so skip this combination + if (d1.dot(d2) <= 0.0) continue; + + auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse()); + auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse()); + + try { + //Calculate energy vector to best correspondence + ftl::cuda::correspondence_energy_vector( + f1.getTexture<float4>(Channel::Points), + f2.getTexture<float4>(Channel::Points), + f1.getTexture<uchar4>(Channel::Colour), + f2.getTexture<uchar4>(Channel::Colour), + // TODO: Add normals and other things... + f1.getTexture<float4>(Channel::EnergyVector), + f1.getTexture<float>(Channel::Energy), + pose1, + pose2, + s2->parameters(), + params_, + win, + stream + ); + } catch (ftl::exception &e) { + LOG(ERROR) << "Exception in correspondence: " << e.what(); + } + + //LOG(INFO) << "Correspondences done... " << i; + } + } + + return true; +} + +bool ILW::_phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream) { + // Run energies and motion kernel + + // Smooth vectors across a window and iteratively + // strongly disagreeing vectors should cancel out + // A weak vector is overriden by a stronger one. + + for (size_t i=0; i<fs.frames.size(); ++i) { + auto &f = fs.frames[i]; + + ftl::cuda::move_points( + f.getTexture<float4>(Channel::Points), + f.getTexture<float4>(Channel::EnergyVector), + fs.sources[i]->parameters(), + rate, + motion_window_, + stream + ); + } + + return true; +} diff --git a/applications/reconstruct/src/ilw/ilw.cu b/applications/reconstruct/src/ilw/ilw.cu new file mode 100644 index 0000000000000000000000000000000000000000..67c1c9d8ded3be60209139b858059063d6ca696b --- /dev/null +++ b/applications/reconstruct/src/ilw/ilw.cu @@ -0,0 +1,205 @@ +#include "ilw_cuda.hpp" +#include <ftl/cuda/weighting.hpp> + +using ftl::cuda::TextureObject; +using ftl::rgbd::Camera; + +#define WARP_SIZE 32 +#define T_PER_BLOCK 8 +#define FULL_MASK 0xffffffff + +__device__ inline float warpMin(float e) { + for (int i = WARP_SIZE/2; i > 0; i /= 2) { + const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e = min(e, other); + } + return e; +} + +__device__ inline float warpSum(float e) { + for (int i = WARP_SIZE/2; i > 0; i /= 2) { + const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e += other; + } + return e; +} + +//#define COR_WIN_RADIUS 17 +//#define COR_WIN_SIZE (COR_WIN_RADIUS * COR_WIN_RADIUS) + +template<int COR_WIN_RADIUS> +__global__ void correspondence_energy_vector_kernel( + TextureObject<float4> p1, + TextureObject<float4> p2, + TextureObject<uchar4> c1, + TextureObject<uchar4> c2, + TextureObject<float4> vout, + TextureObject<float> eout, + float4x4 pose1, // Inverse + float4x4 pose2, // Inverse + Camera cam2, ftl::cuda::ILWParams params) { + + // Each warp picks point in p1 + const int tid = (threadIdx.x + threadIdx.y * blockDim.x); + const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float3 world1 = make_float3(p1.tex2D(x, y)); + const uchar4 colour1 = c1.tex2D(x, y); + if (world1.x == MINF) return; + const float3 camPos2 = pose2 * world1; + const uint2 screen2 = cam2.camToScreen<uint2>(camPos2); + + float bestcost = 1.1f; + float avgcost = 0.0f; + float3 bestpoint; + int count = 0; + + // Project to p2 using cam2 + // Each thread takes a possible correspondence and calculates a weighting + const int lane = tid % WARP_SIZE; + for (int i=lane; i<COR_WIN_RADIUS*COR_WIN_RADIUS; i+=WARP_SIZE) { + const float u = (i % COR_WIN_RADIUS) - (COR_WIN_RADIUS / 2); + const float v = (i / COR_WIN_RADIUS) - (COR_WIN_RADIUS / 2); + + const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v)); + if ((params.flags & ftl::cuda::kILWFlag_IgnoreBad) && world2.x == MINF) continue; + const uchar4 colour2 = c2.tex2D(screen2.x+u, screen2.y+v); + + // Determine degree of correspondence + float cost = 1.0f - ftl::cuda::spatialWeighting(world1, world2, params.spatial_smooth); + // Point is too far away to even count + if (world2.x != MINF && cost == 1.0f) continue; + + // Mix ratio of colour and distance costs + const float ccost = 1.0f - ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth); + if ((params.flags & ftl::cuda::kILWFlag_SkipBadColour) && ccost == 1.0f) continue; + cost = params.cost_ratio * (ccost) + (1.0f - params.cost_ratio) * cost; + //cost /= 2.0f; + + ++count; + avgcost += cost; + if (world2.x != MINF && cost < bestcost) { + bestpoint = world2; + bestcost = cost; + } + } + + count = warpSum(count); + const float mincost = warpMin(bestcost); + bool best = mincost == bestcost; + avgcost = warpSum(avgcost) / count; + const float confidence = (avgcost - mincost); + + if (best && mincost < 1.0f) { + float3 tvecA = pose1 * bestpoint; + float3 tvecB = pose1 * world1; + if (params.flags & ftl::cuda::kILWFlag_RestrictZ) { + tvecA.x = tvecB.x; + tvecA.y = tvecB.y; + } + tvecA = (pose1.getInverse() * tvecA) - world1; + vout(x,y) = vout.tex2D(x, y) + make_float4( + tvecA.x, // * (1.0f - mincost) * confidence, + tvecA.y, // * (1.0f - mincost) * confidence, + tvecA.z, // * (1.0f - mincost) * confidence, + (1.0f - mincost) * confidence); + + //eout(x,y) = max(eout(x,y), (length(bestpoint-world1) / 0.04f) * 7.0f); + //eout(x,y) = max(eout(x,y), (1.0f - mincost) * 7.0f); + //eout(x,y) = max(eout(x, y), (1.0f - mincost) * confidence * (length(bestpoint-world1) / 0.04f) * 12.0f); + eout(x,y) = max(eout(x, y), (1.0f - mincost) * confidence * 12.0f); + //eout(x,y) = max(eout(x, y), confidence * 12.0f); + } else if (mincost >= 1.0f && lane == 0) { + //vout(x,y) = make_float4(0.0f); + //eout(x,y) = 0.0f; + } +} + +void ftl::cuda::correspondence_energy_vector( + TextureObject<float4> &p1, + TextureObject<float4> &p2, + TextureObject<uchar4> &c1, + TextureObject<uchar4> &c2, + TextureObject<float4> &vout, + TextureObject<float> &eout, + float4x4 &pose1, + float4x4 &pose2, + const Camera &cam2, const ILWParams ¶ms, int win, + cudaStream_t stream) { + + const dim3 gridSize((p1.width() + 2 - 1)/2, (p1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK); + + //printf("COR SIZE %d,%d\n", p1.width(), p1.height()); + + switch (win) { + case 17 : correspondence_energy_vector_kernel<17><<<gridSize, blockSize, 0, stream>>>(p1, p2, c1, c2, vout, eout, pose1, pose2, cam2, params); break; + case 9 : correspondence_energy_vector_kernel<9><<<gridSize, blockSize, 0, stream>>>(p1, p2, c1, c2, vout, eout, pose1, pose2, cam2, params); break; + case 5 : correspondence_energy_vector_kernel<5><<<gridSize, blockSize, 0, stream>>>(p1, p2, c1, c2, vout, eout, pose1, pose2, cam2, params); break; + } + cudaSafeCall( cudaGetLastError() ); +} + +//============================================================================== + +//#define MOTION_RADIUS 9 + +template <int MOTION_RADIUS> +__global__ void move_points_kernel( + ftl::cuda::TextureObject<float4> p, + ftl::cuda::TextureObject<float4> ev, + ftl::rgbd::Camera camera, + float rate) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < p.width() && y < p.height()) { + const float4 world = p(x,y); + if (world.x == MINF) return; + + float4 vec = make_float4(0.0f, 0.0f, 0.0f, 0.0f); //ev.tex2D((int)x,(int)y); + float contrib = 0.0f; + + // Calculate screen space distortion with neighbours + for (int v=-MOTION_RADIUS; v<=MOTION_RADIUS; ++v) { + for (int u=-MOTION_RADIUS; u<=MOTION_RADIUS; ++u) { + const float4 vecn = ev.tex2D((int)x+u,(int)y+v); + const float3 pn = make_float3(p.tex2D((int)x+u,(int)y+v)); + if (pn.x == MINF) continue; + + const float s = ftl::cuda::spatialWeighting(pn, make_float3(world), 0.01f); + contrib += vecn.w * s; + vec += vecn.w * s * vecn; + } + } + + if (vec.w > 0.0f) { + p(x,y) = world + rate * (vec / contrib); + } + } +} + + +void ftl::cuda::move_points( + ftl::cuda::TextureObject<float4> &p, + ftl::cuda::TextureObject<float4> &v, + const ftl::rgbd::Camera &camera, + float rate, + int radius, + cudaStream_t stream) { + + const dim3 gridSize((p.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (p.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + switch (radius) { + case 9 : move_points_kernel<9><<<gridSize, blockSize, 0, stream>>>(p,v,camera,rate); break; + case 5 : move_points_kernel<5><<<gridSize, blockSize, 0, stream>>>(p,v,camera,rate); break; + case 3 : move_points_kernel<3><<<gridSize, blockSize, 0, stream>>>(p,v,camera,rate); break; + case 1 : move_points_kernel<1><<<gridSize, blockSize, 0, stream>>>(p,v,camera,rate); break; + case 0 : move_points_kernel<0><<<gridSize, blockSize, 0, stream>>>(p,v,camera,rate); break; + } + + cudaSafeCall( cudaGetLastError() ); +} diff --git a/applications/reconstruct/src/ilw.hpp b/applications/reconstruct/src/ilw/ilw.hpp similarity index 81% rename from applications/reconstruct/src/ilw.hpp rename to applications/reconstruct/src/ilw/ilw.hpp index 0be45d015e976b540263a2c16cc5605376092a43..66de927a285716b2a1d9efd1b2fdb8577a1f6149 100644 --- a/applications/reconstruct/src/ilw.hpp +++ b/applications/reconstruct/src/ilw/ilw.hpp @@ -6,6 +6,8 @@ #include <ftl/configurable.hpp> #include <vector> +#include "ilw_cuda.hpp" + namespace ftl { namespace detail { @@ -42,6 +44,8 @@ class ILW : public ftl::Configurable { */ bool process(ftl::rgbd::FrameSet &fs, cudaStream_t stream=0); + inline bool isLabColour() const { return use_lab_; } + private: /* * Initialise data. @@ -51,14 +55,20 @@ class ILW : public ftl::Configurable { /* * Find possible correspondences and a confidence value. */ - bool _phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream); + bool _phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream); /* * Calculate energies and move the points. */ - bool _phase2(ftl::rgbd::FrameSet &fs); + bool _phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream); std::vector<detail::ILWData> data_; + bool enabled_; + ftl::cuda::ILWParams params_; + int iterations_; + float motion_rate_; + int motion_window_; + bool use_lab_; }; } diff --git a/applications/reconstruct/src/ilw_cuda.hpp b/applications/reconstruct/src/ilw/ilw_cuda.hpp similarity index 52% rename from applications/reconstruct/src/ilw_cuda.hpp rename to applications/reconstruct/src/ilw/ilw_cuda.hpp index a01af75149409fe033ba39ffb0170489ee926be9..8c6c925b1fe501d955350c82c66268c275ce0eb4 100644 --- a/applications/reconstruct/src/ilw_cuda.hpp +++ b/applications/reconstruct/src/ilw/ilw_cuda.hpp @@ -8,6 +8,18 @@ namespace ftl { namespace cuda { +struct ILWParams { + float spatial_smooth; + float colour_smooth; + float cost_ratio; + float threshold; + uint flags; +}; + +static const uint kILWFlag_IgnoreBad = 0x0001; +static const uint kILWFlag_RestrictZ = 0x0002; +static const uint kILWFlag_SkipBadColour = 0x0004; + void correspondence_energy_vector( ftl::cuda::TextureObject<float4> &p1, ftl::cuda::TextureObject<float4> &p2, @@ -15,8 +27,19 @@ void correspondence_energy_vector( ftl::cuda::TextureObject<uchar4> &c2, ftl::cuda::TextureObject<float4> &vout, ftl::cuda::TextureObject<float> &eout, + float4x4 &pose1, float4x4 &pose2, const ftl::rgbd::Camera &cam2, + const ILWParams ¶ms, int win, + cudaStream_t stream +); + +void move_points( + ftl::cuda::TextureObject<float4> &p, + ftl::cuda::TextureObject<float4> &v, + const ftl::rgbd::Camera &camera, + float rate, + int radius, cudaStream_t stream ); diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp index f6548cf32efd565f581695ef82da7f55724beabf..92a65ea44c3598fd257d53ac6c10c22dafdba0e6 100644 --- a/applications/reconstruct/src/main.cpp +++ b/applications/reconstruct/src/main.cpp @@ -16,7 +16,7 @@ #include <ftl/rgbd/group.hpp> #include <ftl/threads.hpp> -#include "ilw.hpp" +#include "ilw/ilw.hpp" #include <ftl/render/splat_render.hpp> #include <string> @@ -29,6 +29,8 @@ #include <ftl/registration.hpp> +#include <cuda_profiler_api.h> + #ifdef WIN32 #pragma comment(lib, "Rpcrt4.lib") #endif @@ -135,8 +137,15 @@ static void run(ftl::Configurable *root) { ftl::ILW *align = ftl::create<ftl::ILW>(root, "merge"); // Generate virtual camera render when requested by streamer - virt->onRender([splat,virt,&scene_B](ftl::rgbd::Frame &out) { + virt->onRender([splat,virt,&scene_B,align](ftl::rgbd::Frame &out) { virt->setTimestamp(scene_B.timestamp); + // Do we need to convert Lab to BGR? + if (align->isLabColour()) { + for (auto &f : scene_B.frames) { + auto &col = f.get<cv::cuda::GpuMat>(Channel::Colour); + cv::cuda::cvtColor(col,col, cv::COLOR_Lab2BGR); + } + } splat->render(virt, out); }); stream->add(virt); @@ -177,7 +186,7 @@ static void run(ftl::Configurable *root) { // Send all frames to GPU, block until done? scene_A.upload(Channel::Colour + Channel::Depth); // TODO: (Nick) Add scene stream. - //align->process(scene_A); + align->process(scene_A); // TODO: To use second GPU, could do a download, swap, device change, // then upload to other device. Or some direct device-2-device copy. @@ -188,14 +197,22 @@ static void run(ftl::Configurable *root) { return true; }); + LOG(INFO) << "Shutting down..."; ftl::timer::stop(); slave.stop(); net->shutdown(); + ftl::pool.stop(); + + cudaProfilerStop(); + + LOG(INFO) << "Deleting..."; + delete align; delete splat; delete stream; delete virt; delete net; + LOG(INFO) << "Done."; } int main(int argc, char **argv) { diff --git a/applications/reconstruct/src/mls.cu b/applications/reconstruct/src/mls/mls.cu similarity index 100% rename from applications/reconstruct/src/mls.cu rename to applications/reconstruct/src/mls/mls.cu diff --git a/applications/reconstruct/src/point_cloud.hpp b/applications/reconstruct/src/point_cloud.hpp deleted file mode 100644 index 7b5f49c07ea5826736feb861bcd42f133367c291..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/point_cloud.hpp +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef _FTL_POINT_CLOUD_HPP_ -#define _FTL_POINT_CLOUD_HPP_ - -namespace ftl { -namespace pointcloud { - - -struct VoxelPoint { - union { - uint64_t val; - struct { - uint16_t x : 12; // Block X - uint16_t y : 12; // Block Y - uint16_t z : 12; // Block Z - uint16_t v : 9; // Voxel offset in block 0-511 - }; - }; -}; - -struct VoxelColour { - union { - uint32_t val; - struct { - uint8_t b; - uint8_t g; - uint8_t r; - uint8_t a; - }; - }; -}; - - -} -} - -#endif // _FTL_POINT_CLOUD_HPP_ diff --git a/applications/reconstruct/src/ray_cast_sdf.cpp b/applications/reconstruct/src/ray_cast_sdf.cpp deleted file mode 100644 index 1c12a762cd56b82d8dd8be585c0fe75733b010f5..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/ray_cast_sdf.cpp +++ /dev/null @@ -1,106 +0,0 @@ -//#include <stdafx.h> - -#include <ftl/voxel_hash.hpp> -#include "compactors.hpp" -#include "splat_render_cuda.hpp" - -//#include "Util.h" - -#include <ftl/ray_cast_sdf.hpp> - - -extern "C" void renderCS( - const ftl::voxhash::HashData& hashData, - const RayCastData &rayCastData, - const RayCastParams &rayCastParams, cudaStream_t stream); - -extern "C" void computeNormals(float4* d_output, float3* d_input, unsigned int width, unsigned int height); -extern "C" void convertDepthFloatToCameraSpaceFloat3(float3* d_output, float* d_input, float4x4 intrinsicsInv, unsigned int width, unsigned int height, const DepthCameraData& depthCameraData); - -extern "C" void resetRayIntervalSplatCUDA(RayCastData& data, const RayCastParams& params); -extern "C" void rayIntervalSplatCUDA(const ftl::voxhash::HashData& hashData, - const RayCastData &rayCastData, const RayCastParams &rayCastParams); - -extern "C" void nickRenderCUDA(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const RayCastData &rayCastData, const RayCastParams ¶ms, cudaStream_t stream); - - - -void CUDARayCastSDF::create(const RayCastParams& params) -{ - m_params = params; - m_data.allocate(m_params); - //m_rayIntervalSplatting.OnD3D11CreateDevice(DXUTGetD3D11Device(), params.m_width, params.m_height); -} - -void CUDARayCastSDF::destroy(void) -{ - m_data.free(); - //m_rayIntervalSplatting.OnD3D11DestroyDevice(); -} - -//extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); - - -void CUDARayCastSDF::compactifyHashEntries(ftl::voxhash::HashData& hashData, ftl::voxhash::HashParams& hashParams, cudaStream_t stream) { //const DepthCameraData& depthCameraData) { - - ftl::cuda::compactifyVisible(hashData, hashParams, m_params.camera, stream); //this version uses atomics over prefix sums, which has a much better performance - //std::cout << "Ray blocks = " << hashParams.m_numOccupiedBlocks << std::endl; - //hashData.updateParams(hashParams); //make sure numOccupiedBlocks is updated on the GPU -} - -void CUDARayCastSDF::render(ftl::voxhash::HashData& hashData, ftl::voxhash::HashParams& hashParams, const DepthCameraParams& cameraParams, const Eigen::Matrix4f& lastRigidTransform, cudaStream_t stream) -{ - //updateConstantDepthCameraParams(cameraParams); - //rayIntervalSplatting(hashData, hashParams, lastRigidTransform); - //m_data.d_rayIntervalSplatMinArray = m_rayIntervalSplatting.mapMinToCuda(); - //m_data.d_rayIntervalSplatMaxArray = m_rayIntervalSplatting.mapMaxToCuda(); - - m_params.camera = cameraParams; - //m_params.m_numOccupiedSDFBlocks = hashParams.m_numOccupiedBlocks; - m_params.m_viewMatrix = MatrixConversion::toCUDA(lastRigidTransform.inverse()); - m_params.m_viewMatrixInverse = MatrixConversion::toCUDA(lastRigidTransform); - //m_data.updateParams(m_params); - - compactifyHashEntries(hashData, hashParams, stream); - - if (hash_render_) { - //ftl::cuda::isosurface_point_image(hashData, hashParams, m_data, m_params, stream); - } else renderCS(hashData, m_data, m_params, stream); - - //convertToCameraSpace(cameraData); - //if (!m_params.m_useGradients) - //{ - // computeNormals(m_data.d_normals, m_data.d_depth3, m_params.m_width, m_params.m_height); - //} - - //m_rayIntervalSplatting.unmapCuda(); - -} - - -/*void CUDARayCastSDF::convertToCameraSpace(const DepthCameraData& cameraData) -{ - convertDepthFloatToCameraSpaceFloat3(m_data.d_depth3, m_data.d_depth, m_params.m_intrinsicsInverse, m_params.m_width, m_params.m_height, cameraData); - - if(!m_params.m_useGradients) { - computeNormals(m_data.d_normals, m_data.d_depth3, m_params.m_width, m_params.m_height); - } -}*/ - -/*void CUDARayCastSDF::rayIntervalSplatting(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const Eigen::Matrix4f& lastRigidTransform) -{ - if (hashParams.m_numOccupiedBlocks == 0) return; - - if (m_params.m_maxNumVertices <= 6*hashParams.m_numOccupiedBlocks) { // 6 verts (2 triangles) per block - throw std::runtime_error("not enough space for vertex buffer for ray interval splatting"); - } - - m_params.m_numOccupiedSDFBlocks = hashParams.m_numOccupiedBlocks; - m_params.m_viewMatrix = MatrixConversion::toCUDA(lastRigidTransform.inverse()); - m_params.m_viewMatrixInverse = MatrixConversion::toCUDA(lastRigidTransform); - - m_data.updateParams(m_params); // !!! debugging - - //don't use ray interval splatting (cf CUDARayCastSDF.cu -> line 40 - //m_rayIntervalSplatting.rayIntervalSplatting(DXUTGetD3D11DeviceContext(), hashData, cameraData, m_data, m_params, m_params.m_numOccupiedSDFBlocks*6); -}*/ \ No newline at end of file diff --git a/applications/reconstruct/src/ray_cast_sdf.cu b/applications/reconstruct/src/ray_cast_sdf.cu deleted file mode 100644 index 10fd3e0b7b84ca694432abc7dc68fc513ad483eb..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/ray_cast_sdf.cu +++ /dev/null @@ -1,189 +0,0 @@ - -//#include <cuda_runtime.h> - -#include <ftl/cuda_matrix_util.hpp> - -#include <ftl/depth_camera.hpp> -#include <ftl/voxel_hash.hpp> -#include <ftl/ray_cast_util.hpp> - -#define T_PER_BLOCK 8 -#define NUM_GROUPS_X 1024 - -#define NUM_CUDA_BLOCKS 10000 - -//texture<float, cudaTextureType2D, cudaReadModeElementType> rayMinTextureRef; -//texture<float, cudaTextureType2D, cudaReadModeElementType> rayMaxTextureRef; - -__global__ void renderKernel(ftl::voxhash::HashData hashData, RayCastData rayCastData, RayCastParams rayCastParams) -{ - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - //const RayCastParams& rayCastParams = c_rayCastParams; - - if (x < rayCastParams.m_width && y < rayCastParams.m_height) { - rayCastData.d_depth[y*rayCastParams.m_width+x] = MINF; - rayCastData.d_depth3[y*rayCastParams.m_width+x] = make_float3(MINF,MINF,MINF); - rayCastData.d_normals[y*rayCastParams.m_width+x] = make_float4(MINF,MINF,MINF,MINF); - rayCastData.d_colors[y*rayCastParams.m_width+x] = make_uchar3(0,0,0); - - float3 camDir = normalize(rayCastParams.camera.kinectProjToCamera(x, y, 1.0f)); - float3 worldCamPos = rayCastParams.m_viewMatrixInverse * make_float3(0.0f, 0.0f, 0.0f); - float4 w = rayCastParams.m_viewMatrixInverse * make_float4(camDir, 0.0f); - float3 worldDir = normalize(make_float3(w.x, w.y, w.z)); - - ////use ray interval splatting - //float minInterval = tex2D(rayMinTextureRef, x, y); - //float maxInterval = tex2D(rayMaxTextureRef, x, y); - - //don't use ray interval splatting - float minInterval = rayCastParams.m_minDepth; - float maxInterval = rayCastParams.m_maxDepth; - - //if (minInterval == 0 || minInterval == MINF) minInterval = rayCastParams.m_minDepth; - //if (maxInterval == 0 || maxInterval == MINF) maxInterval = rayCastParams.m_maxDepth; - //TODO MATTHIAS: shouldn't this return in the case no interval is found? - if (minInterval == 0 || minInterval == MINF) return; - if (maxInterval == 0 || maxInterval == MINF) return; - - // debugging - //if (maxInterval < minInterval) { - // printf("ERROR (%d,%d): [ %f, %f ]\n", x, y, minInterval, maxInterval); - //} - - rayCastData.traverseCoarseGridSimpleSampleAll(hashData, rayCastParams, worldCamPos, worldDir, camDir, make_int3(x,y,1), minInterval, maxInterval); - } -} - -extern "C" void renderCS(const ftl::voxhash::HashData& hashData, const RayCastData &rayCastData, const RayCastParams &rayCastParams, cudaStream_t stream) -{ - - const dim3 gridSize((rayCastParams.m_width + T_PER_BLOCK - 1)/T_PER_BLOCK, (rayCastParams.m_height + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - //cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); - //cudaBindTextureToArray(rayMinTextureRef, rayCastData.d_rayIntervalSplatMinArray, channelDesc); - //cudaBindTextureToArray(rayMaxTextureRef, rayCastData.d_rayIntervalSplatMaxArray, channelDesc); - - //printf("Ray casting render...\n"); - - renderKernel<<<gridSize, blockSize, 0, stream>>>(hashData, rayCastData, rayCastParams); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -} - - -///////////////////////////////////////////////////////////////////////// -// ray interval splatting -///////////////////////////////////////////////////////////////////////// - -/*__global__ void resetRayIntervalSplatKernel(RayCastData data) -{ - uint idx = blockIdx.x + blockIdx.y * NUM_GROUPS_X; - data.point_cloud_[idx] = make_float3(MINF); -} - -extern "C" void resetRayIntervalSplatCUDA(RayCastData& data, const RayCastParams& params) -{ - const dim3 gridSize(NUM_GROUPS_X, (params.m_maxNumVertices + NUM_GROUPS_X - 1) / NUM_GROUPS_X, 1); // ! todo check if need third dimension? - const dim3 blockSize(1, 1, 1); - - resetRayIntervalSplatKernel<<<gridSize, blockSize>>>(data); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -}*/ - -/*__global__ void rayIntervalSplatKernel(ftl::voxhash::HashData hashData, DepthCameraData depthCameraData, RayCastData rayCastData, DepthCameraData cameraData) -{ - uint idx = blockIdx.x + blockIdx.y * NUM_GROUPS_X; - - const ftl::voxhash::HashEntry& entry = hashData.d_hashCompactified[idx]; - if (entry.ptr != ftl::voxhash::FREE_ENTRY) { - //if (!hashData.isSDFBlockInCameraFrustumApprox(entry.pos)) return; - const RayCastParams ¶ms = c_rayCastParams; - const float4x4& viewMatrix = params.m_viewMatrix; - - float3 worldCurrentVoxel = hashData.SDFBlockToWorld(entry.pos); - - float3 MINV = worldCurrentVoxel - c_hashParams.m_virtualVoxelSize / 2.0f; - - float3 maxv = MINV+SDF_BLOCK_SIZE*c_hashParams.m_virtualVoxelSize; - - float3 proj000 = cameraData.cameraToKinectProj(viewMatrix * make_float3(MINV.x, MINV.y, MINV.z)); - float3 proj100 = cameraData.cameraToKinectProj(viewMatrix * make_float3(maxv.x, MINV.y, MINV.z)); - float3 proj010 = cameraData.cameraToKinectProj(viewMatrix * make_float3(MINV.x, maxv.y, MINV.z)); - float3 proj001 = cameraData.cameraToKinectProj(viewMatrix * make_float3(MINV.x, MINV.y, maxv.z)); - float3 proj110 = cameraData.cameraToKinectProj(viewMatrix * make_float3(maxv.x, maxv.y, MINV.z)); - float3 proj011 = cameraData.cameraToKinectProj(viewMatrix * make_float3(MINV.x, maxv.y, maxv.z)); - float3 proj101 = cameraData.cameraToKinectProj(viewMatrix * make_float3(maxv.x, MINV.y, maxv.z)); - float3 proj111 = cameraData.cameraToKinectProj(viewMatrix * make_float3(maxv.x, maxv.y, maxv.z)); - - // Tree Reduction Min - float3 min00 = fminf(proj000, proj100); - float3 min01 = fminf(proj010, proj001); - float3 min10 = fminf(proj110, proj011); - float3 min11 = fminf(proj101, proj111); - - float3 min0 = fminf(min00, min01); - float3 min1 = fminf(min10, min11); - - float3 minFinal = fminf(min0, min1); - - // Tree Reduction Max - float3 max00 = fmaxf(proj000, proj100); - float3 max01 = fmaxf(proj010, proj001); - float3 max10 = fmaxf(proj110, proj011); - float3 max11 = fmaxf(proj101, proj111); - - float3 max0 = fmaxf(max00, max01); - float3 max1 = fmaxf(max10, max11); - - float3 maxFinal = fmaxf(max0, max1); - - float depth = maxFinal.z; - if(params.m_splatMinimum == 1) { - depth = minFinal.z; - } - float depthWorld = cameraData.kinectProjToCameraZ(depth); - - //uint addr = idx*4; - //rayCastData.d_vertexBuffer[addr] = make_float4(maxFinal.x, minFinal.y, depth, depthWorld); - //rayCastData.d_vertexBuffer[addr+1] = make_float4(minFinal.x, minFinal.y, depth, depthWorld); - //rayCastData.d_vertexBuffer[addr+2] = make_float4(maxFinal.x, maxFinal.y, depth, depthWorld); - //rayCastData.d_vertexBuffer[addr+3] = make_float4(minFinal.x, maxFinal.y, depth, depthWorld); - - // Note (Nick) : Changed to create point cloud instead of vertex. - uint addr = idx; - rayCastData.point_cloud_[addr] = make_float3(maxFinal.x, maxFinal.y, depth); - //printf("Ray: %f\n", depth); - - uint addr = idx*6; - rayCastData.d_vertexBuffer[addr] = make_float4(maxFinal.x, minFinal.y, depth, depthWorld); - rayCastData.d_vertexBuffer[addr+1] = make_float4(minFinal.x, minFinal.y, depth, depthWorld); - rayCastData.d_vertexBuffer[addr+2] = make_float4(maxFinal.x, maxFinal.y, depth, depthWorld); - rayCastData.d_vertexBuffer[addr+3] = make_float4(minFinal.x, minFinal.y, depth, depthWorld); - rayCastData.d_vertexBuffer[addr+4] = make_float4(maxFinal.x, maxFinal.y, depth, depthWorld); - rayCastData.d_vertexBuffer[addr+5] = make_float4(minFinal.x, maxFinal.y, depth, depthWorld); - } -} - -extern "C" void rayIntervalSplatCUDA(const ftl::voxhash::HashData& hashData, const DepthCameraData& cameraData, const RayCastData &rayCastData, const RayCastParams &rayCastParams) -{ - //printf("Ray casting...\n"); - const dim3 gridSize(NUM_GROUPS_X, (rayCastParams.m_numOccupiedSDFBlocks + NUM_GROUPS_X - 1) / NUM_GROUPS_X, 1); - const dim3 blockSize(1, 1, 1); - - rayIntervalSplatKernel<<<gridSize, blockSize>>>(hashData, cameraData, rayCastData, cameraData); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -} */ diff --git a/applications/reconstruct/src/scene_rep_hash_sdf.cu b/applications/reconstruct/src/scene_rep_hash_sdf.cu deleted file mode 100644 index 4750d3e7ed4f7aeb68875e0b5050d76efed5b715..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/scene_rep_hash_sdf.cu +++ /dev/null @@ -1,282 +0,0 @@ -// From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/CUDASceneRepHashSDF.cu - -//#include <cutil_inline.h> -//#include <cutil_math.h> -//#include <vector_types.h> -//#include <cuda_runtime.h> - -#include <ftl/cuda_matrix_util.hpp> - -#include <ftl/voxel_hash.hpp> -#include <ftl/depth_camera.hpp> -#include <ftl/ray_cast_params.hpp> - -#define T_PER_BLOCK 8 - -using ftl::voxhash::HashData; -using ftl::voxhash::HashParams; -using ftl::voxhash::Voxel; -using ftl::voxhash::HashEntry; -using ftl::voxhash::FREE_ENTRY; - -// TODO (Nick) Use ftl::cuda::Texture (texture objects) -//texture<float, cudaTextureType2D, cudaReadModeElementType> depthTextureRef; -//texture<float4, cudaTextureType2D, cudaReadModeElementType> colorTextureRef; - -__device__ __constant__ HashParams c_hashParams; -__device__ __constant__ RayCastParams c_rayCastParams; -//__device__ __constant__ DepthCameraParams c_depthCameraParams; -__device__ __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS]; - -extern "C" void updateConstantHashParams(const HashParams& params) { - - size_t size; - cudaSafeCall(cudaGetSymbolSize(&size, c_hashParams)); - cudaSafeCall(cudaMemcpyToSymbol(c_hashParams, ¶ms, size, 0, cudaMemcpyHostToDevice)); - -#ifdef DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif - } - - -/*extern "C" void updateConstantRayCastParams(const RayCastParams& params) { - //printf("Update ray cast params\n"); - size_t size; - cudaSafeCall(cudaGetSymbolSize(&size, c_rayCastParams)); - cudaSafeCall(cudaMemcpyToSymbol(c_rayCastParams, ¶ms, size, 0, cudaMemcpyHostToDevice)); - -#ifdef DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif - -}*/ - -extern "C" void updateCUDACameraConstant(ftl::voxhash::DepthCameraCUDA *data, int count) { - if (count == 0 || count >= MAX_CAMERAS) return; - //printf("Update depth camera params\n"); - size_t size; - cudaSafeCall(cudaGetSymbolSize(&size, c_cameras)); - cudaSafeCall(cudaMemcpyToSymbol(c_cameras, data, sizeof(ftl::voxhash::DepthCameraCUDA)*count, 0, cudaMemcpyHostToDevice)); - -#ifdef DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif - -} - -/*__global__ void resetHeapKernel(HashData hashData) -{ - const HashParams& hashParams = c_hashParams; - unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; - - if (idx == 0) { - hashData.d_heapCounter[0] = hashParams.m_numSDFBlocks - 1; //points to the last element of the array - } - - if (idx < hashParams.m_numSDFBlocks) { - - hashData.d_heap[idx] = hashParams.m_numSDFBlocks - idx - 1; - uint blockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE; - uint base_idx = idx * blockSize; - for (uint i = 0; i < blockSize; i++) { - hashData.deleteVoxel(base_idx+i); - } - } -}*/ - -__global__ void resetHashKernel(HashData hashData) -{ - const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; - if (idx < hashParams.m_hashNumBuckets) { - hashData.deleteHashEntry(hashData.d_hash[idx]); - //hashData.deleteHashEntry(hashData.d_hashCompactified[idx]); - } -} - - -__global__ void resetHashBucketMutexKernel(HashData hashData) -{ - const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; - if (idx < hashParams.m_hashNumBuckets) { - hashData.d_hashBucketMutex[idx] = FREE_ENTRY; - } -} - -extern "C" void resetCUDA(HashData& hashData, const HashParams& hashParams) -{ - //{ - //resetting the heap and SDF blocks - //const dim3 gridSize((hashParams.m_numSDFBlocks + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); - //const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); - - //resetHeapKernel<<<gridSize, blockSize>>>(hashData); - - - //#ifdef _DEBUG - // cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); - //#endif - - //} - - { - //resetting the hash - const dim3 gridSize((hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); - const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); - - resetHashKernel<<<gridSize, blockSize>>>(hashData); - - #ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); - #endif - } - - { - //resetting the mutex - const dim3 gridSize((hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); - const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); - - resetHashBucketMutexKernel<<<gridSize, blockSize>>>(hashData); - - #ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); - #endif - } - - -} - -extern "C" void resetHashBucketMutexCUDA(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) -{ - const dim3 gridSize((hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); - const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); - - resetHashBucketMutexKernel<<<gridSize, blockSize, 0, stream>>>(hashData); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -} - - -// Note: bitMask used for Streaming out code... could be set to nullptr if not streaming out -// Note: Allocations might need to be around fat rays since multiple voxels could correspond -// to same depth map pixel at larger distances. -__global__ void allocKernel(HashData hashData, HashParams hashParams, int camnum) -{ - //const HashParams& hashParams = c_hashParams; - const ftl::voxhash::DepthCameraCUDA camera = c_cameras[camnum]; - const DepthCameraParams &cameraParams = camera.params; - - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < cameraParams.m_imageWidth && y < cameraParams.m_imageHeight) - { - float d = tex2D<float>(camera.depth, x, y); - //if (d == MINF || d < cameraParams.m_sensorDepthWorldMin || d > cameraParams.m_sensorDepthWorldMax) return; - if (d == MINF || d == 0.0f) return; - - if (d >= hashParams.m_maxIntegrationDistance) return; - - // TODO (Nick) Use covariance to include a frustrum of influence - float t = hashData.getTruncation(d); - float minDepth = min(hashParams.m_maxIntegrationDistance, d-t); - float maxDepth = min(hashParams.m_maxIntegrationDistance, d+t); - if (minDepth >= maxDepth) return; - - // Convert ray from image coords to world - // Does kinectDepthToSkeleton convert pixel values to coordinates using - // camera intrinsics? Same as what reprojectTo3D does in OpenCV? - float3 rayMin = cameraParams.kinectDepthToSkeleton(x, y, minDepth); - // Is the rigid transform then the estimated camera pose? - rayMin = camera.pose * rayMin; - //printf("Ray min: %f,%f,%f\n", rayMin.x, rayMin.y, rayMin.z); - float3 rayMax = cameraParams.kinectDepthToSkeleton(x, y, maxDepth); - rayMax = camera.pose * rayMax; - - float3 rayDir = normalize(rayMax - rayMin); - - // Only ray cast from min possible depth to max depth - int3 idCurrentVoxel = hashData.worldToSDFBlock(rayMin); - int3 idEnd = hashData.worldToSDFBlock(rayMax); - - float3 step = make_float3(sign(rayDir)); - float3 boundaryPos = hashData.SDFBlockToWorld(idCurrentVoxel+make_int3(clamp(step, 0.0, 1.0f)))-0.5f*hashParams.m_virtualVoxelSize; - float3 tMax = (boundaryPos-rayMin)/rayDir; - float3 tDelta = (step*SDF_BLOCK_SIZE*hashParams.m_virtualVoxelSize)/rayDir; - int3 idBound = make_int3(make_float3(idEnd)+step); - - //#pragma unroll - //for(int c = 0; c < 3; c++) { - // if (rayDir[c] == 0.0f) { tMax[c] = PINF; tDelta[c] = PINF; } - // if (boundaryPos[c] - rayMin[c] == 0.0f) { tMax[c] = PINF; tDelta[c] = PINF; } - //} - if (rayDir.x == 0.0f) { tMax.x = PINF; tDelta.x = PINF; } - if (boundaryPos.x - rayMin.x == 0.0f) { tMax.x = PINF; tDelta.x = PINF; } - - if (rayDir.y == 0.0f) { tMax.y = PINF; tDelta.y = PINF; } - if (boundaryPos.y - rayMin.y == 0.0f) { tMax.y = PINF; tDelta.y = PINF; } - - if (rayDir.z == 0.0f) { tMax.z = PINF; tDelta.z = PINF; } - if (boundaryPos.z - rayMin.z == 0.0f) { tMax.z = PINF; tDelta.z = PINF; } - - - unsigned int iter = 0; // iter < g_MaxLoopIterCount - unsigned int g_MaxLoopIterCount = 1024; -#pragma unroll 1 - while(iter < g_MaxLoopIterCount) { - - //check if it's in the frustum and not checked out - if (hashData.isInBoundingBox(hashParams, idCurrentVoxel)) { //} && !isSDFBlockStreamedOut(idCurrentVoxel, hashData, d_bitMask)) { - hashData.allocBlock(idCurrentVoxel); - //printf("Allocate block: %d\n",idCurrentVoxel.x); - } - - // Traverse voxel grid - if(tMax.x < tMax.y && tMax.x < tMax.z) { - idCurrentVoxel.x += step.x; - if(idCurrentVoxel.x == idBound.x) return; - tMax.x += tDelta.x; - } - else if(tMax.z < tMax.y) { - idCurrentVoxel.z += step.z; - if(idCurrentVoxel.z == idBound.z) return; - tMax.z += tDelta.z; - } - else { - idCurrentVoxel.y += step.y; - if(idCurrentVoxel.y == idBound.y) return; - tMax.y += tDelta.y; - } - - iter++; - } - } -} - -extern "C" void allocCUDA(HashData& hashData, const HashParams& hashParams, int camnum, const DepthCameraParams &depthCameraParams, cudaStream_t stream) -{ - - //printf("Allocating: %d\n",depthCameraParams.m_imageWidth); - - const dim3 gridSize((depthCameraParams.m_imageWidth + T_PER_BLOCK - 1)/T_PER_BLOCK, (depthCameraParams.m_imageHeight + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - allocKernel<<<gridSize, blockSize, 0, stream>>>(hashData, hashParams, camnum); - - - #ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); - #endif -} diff --git a/applications/reconstruct/src/virtual_source.cpp b/applications/reconstruct/src/virtual_source.cpp deleted file mode 100644 index c8dc380743373c591ffcfcea6aae0f713c1068c1..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/virtual_source.cpp +++ /dev/null @@ -1,89 +0,0 @@ -#include <ftl/virtual_source.hpp> -#include <ftl/depth_camera.hpp> -#include <ftl/voxel_scene.hpp> -#include <ftl/ray_cast_sdf.hpp> - -#define LOGURU_WITH_STREAMS 1 -#include <loguru.hpp> - -using ftl::rgbd::VirtualSource; - -VirtualSource::VirtualSource(ftl::rgbd::Source *host) - : ftl::rgbd::detail::Source(host) { - rays_ = ftl::create<CUDARayCastSDF>(host, "raycaster"); //new CUDARayCastSDF(host->getConfig()); - scene_ = nullptr; - - params_.fx = rays_->value("focal", 430.0f); - params_.fy = params_.fx; - params_.width = rays_->value("width", 640); - params_.height = rays_->value("height", 480); - params_.cx = -((double)params_.width / 2); - params_.cy = -((double)params_.height / 2); - params_.maxDepth = rays_->value("max_depth", 10.0f); - params_.minDepth = rays_->value("min_depth", 0.1f); - - capabilities_ = kCapMovable | kCapVideo | kCapStereo; - - rgb_ = cv::Mat(cv::Size(params_.width,params_.height), CV_8UC3); - idepth_ = cv::Mat(cv::Size(params_.width,params_.height), CV_32SC1); - depth_ = cv::Mat(cv::Size(params_.width,params_.height), CV_32FC1); - - rays_->on("focal", [this](const ftl::config::Event &e) { - params_.fx = rays_->value("focal", 430.0f); - params_.fy = params_.fx; - }); - - rays_->on("width", [this](const ftl::config::Event &e) { - params_.width = rays_->value("width", 640); - }); - - rays_->on("height", [this](const ftl::config::Event &e) { - params_.height = rays_->value("height", 480); - }); -} - -VirtualSource::~VirtualSource() { - if (scene_) delete scene_; - if (rays_) delete rays_; -} - -void VirtualSource::setScene(ftl::voxhash::SceneRep *scene) { - scene_ = scene; -} - -bool VirtualSource::grab(int n, int b) { - if (scene_) { - // Ensure this host thread is using correct GPU. - - cudaSafeCall(cudaSetDevice(scene_->getCUDADevice())); - DepthCameraParams params; - params.fx = params_.fx; - params.fy = params_.fy; - params.mx = -params_.cx; - params.my = -params_.cy; - params.m_imageWidth = params_.width; - params.m_imageHeight = params_.height; - params.m_sensorDepthWorldMin = params_.minDepth; - params.m_sensorDepthWorldMax = params_.maxDepth; - - // TODO(Nick) Use double precision pose here - rays_->render(scene_->getHashData(), scene_->getHashParams(), params, host_->getPose().cast<float>(), scene_->getIntegrationStream()); - - //unique_lock<mutex> lk(mutex_); - if (rays_->isIntegerDepth()) { - rays_->getRayCastData().download((int*)idepth_.data, (uchar3*)rgb_.data, rays_->getRayCastParams(), scene_->getIntegrationStream()); - - cudaSafeCall(cudaStreamSynchronize(scene_->getIntegrationStream())); - idepth_.convertTo(depth_, CV_32FC1, 1.0f / 100.0f); - } else { - rays_->getRayCastData().download((int*)depth_.data, (uchar3*)rgb_.data, rays_->getRayCastParams(), scene_->getIntegrationStream()); - cudaSafeCall(cudaStreamSynchronize(scene_->getIntegrationStream())); - } - } - - return true; -} - -bool VirtualSource::isReady() { - return true; -} diff --git a/applications/reconstruct/src/voxel_scene.cpp b/applications/reconstruct/src/voxel_scene.cpp deleted file mode 100644 index 09067b1f2334e0190e27022b64d374e31532f8c2..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/voxel_scene.cpp +++ /dev/null @@ -1,466 +0,0 @@ -#include <ftl/voxel_scene.hpp> -#include "depth_camera_cuda.hpp" - -#include <opencv2/core/cuda_stream_accessor.hpp> - -#include <vector> - -using namespace ftl::voxhash; -using ftl::rgbd::Source; -using ftl::rgbd::Channel; -using ftl::Configurable; -using cv::Mat; -using std::vector; - -#define SAFE_DELETE_ARRAY(a) { delete [] (a); (a) = NULL; } - -//extern "C" void resetCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -//extern "C" void resetHashBucketMutexCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t); -//extern "C" void allocCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, int camid, const DepthCameraParams &depthCameraParams, cudaStream_t); -//extern "C" void fillDecisionArrayCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData); -//extern "C" void compactifyHashCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -//extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -//extern "C" void integrateDepthMapCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t); -//extern "C" void bindInputDepthColorTextures(const DepthCameraData& depthCameraData); - - -SceneRep::SceneRep(nlohmann::json &config) : Configurable(config), m_frameCount(0), do_reset_(false) { - _initCUDA(); - - // Allocates voxel structure on GPU - _create(_parametersFromConfig()); - - on("SDFVoxelSize", [this](const ftl::config::Event &e) { - do_reset_ = true; - }); - on("hashNumSDFBlocks", [this](const ftl::config::Event &e) { - do_reset_ = true; - }); - on("hashNumBuckets", [this](const ftl::config::Event &e) { - do_reset_ = true; - }); - on("hashMaxCollisionLinkedListSize", [this](const ftl::config::Event &e) { - do_reset_ = true; - }); - on("SDFTruncation", [this](const ftl::config::Event &e) { - m_hashParams.m_truncation = value("SDFTruncation", 0.1f); - }); - on("SDFTruncationScale", [this](const ftl::config::Event &e) { - m_hashParams.m_truncScale = value("SDFTruncationScale", 0.01f); - }); - on("SDFMaxIntegrationDistance", [this](const ftl::config::Event &e) { - m_hashParams.m_maxIntegrationDistance = value("SDFMaxIntegrationDistance", 10.0f); - }); - on("showRegistration", [this](const ftl::config::Event &e) { - reg_mode_ = value("showRegistration", false); - }); - - reg_mode_ = value("showRegistration", false); - - cudaSafeCall(cudaStreamCreate(&integ_stream_)); - //integ_stream_ = 0; -} - -SceneRep::~SceneRep() { - _destroy(); - cudaStreamDestroy(integ_stream_); -} - -bool SceneRep::_initCUDA() { - // Do an initial CUDA check - int cuda_device_count = 0; - cudaSafeCall(cudaGetDeviceCount(&cuda_device_count)); - CHECK_GE(cuda_device_count, 1) << "No CUDA devices found"; - - LOG(INFO) << "CUDA Devices (" << cuda_device_count << "):"; - - vector<cudaDeviceProp> properties(cuda_device_count); - for (int i=0; i<cuda_device_count; i++) { - cudaSafeCall(cudaGetDeviceProperties(&properties[i], i)); - LOG(INFO) << " - " << properties[i].name; - } - - int desired_device = value("cudaDevice", 0); - cuda_device_ = (desired_device < cuda_device_count) ? desired_device : cuda_device_count-1; - cudaSafeCall(cudaSetDevice(cuda_device_)); - - // TODO:(Nick) Check memory is sufficient - // TODO:(Nick) Find out what our compute capability should be. - - LOG(INFO) << "CUDA Compute: " << properties[cuda_device_].major << "." << properties[cuda_device_].minor; - - return true; -} - -void SceneRep::addSource(ftl::rgbd::Source *src) { - auto &cam = cameras_.emplace_back(); - cam.source = src; - cam.params.m_imageWidth = 0; - src->setChannel(Channel::Depth); -} - -extern "C" void updateCUDACameraConstant(ftl::voxhash::DepthCameraCUDA *data, int count); - -void SceneRep::_updateCameraConstant() { - std::vector<ftl::voxhash::DepthCameraCUDA> cams(cameras_.size()); - for (size_t i=0; i<cameras_.size(); ++i) { - cameras_[i].gpu.data.pose = MatrixConversion::toCUDA(cameras_[i].source->getPose().cast<float>()); - cameras_[i].gpu.data.poseInverse = MatrixConversion::toCUDA(cameras_[i].source->getPose().cast<float>().inverse()); - cams[i] = cameras_[i].gpu.data; - } - updateCUDACameraConstant(cams.data(), cams.size()); -} - -int SceneRep::upload() { - int active = 0; - - for (size_t i=0; i<cameras_.size(); ++i) { - cameras_[i].source->grab(); - } - - for (size_t i=0; i<cameras_.size(); ++i) { - auto &cam = cameras_[i]; - - if (!cam.source->isReady()) { - cam.params.m_imageWidth = 0; - // TODO(Nick) : Free gpu allocs if was ready before - LOG(INFO) << "Source not ready: " << cam.source->getURI(); - continue; - } else { - auto in = cam.source; - - cam.params.fx = in->parameters().fx; - cam.params.fy = in->parameters().fy; - cam.params.mx = -in->parameters().cx; - cam.params.my = -in->parameters().cy; - - // Only now do we have camera parameters for allocations... - if (cam.params.m_imageWidth == 0) { - cam.params.m_imageWidth = in->parameters().width; - cam.params.m_imageHeight = in->parameters().height; - cam.params.m_sensorDepthWorldMax = in->parameters().maxDepth; - cam.params.m_sensorDepthWorldMin = in->parameters().minDepth; - cam.gpu.alloc(cam.params, true); - LOG(INFO) << "GPU Allocated camera " << i; - } - } - - cam.params.flags = m_frameCount; - } - - _updateCameraConstant(); - //cudaSafeCall(cudaDeviceSynchronize()); - - for (size_t i=0; i<cameras_.size(); ++i) { - auto &cam = cameras_[i]; - - // Get the RGB-Depth frame from input - Source *input = cam.source; - Mat rgb, depth; - - // TODO(Nick) Direct GPU upload to save copy - input->getFrames(rgb,depth); - - active += 1; - - if (depth.cols == 0) continue; - - // Must be in RGBA for GPU - Mat rgbt, rgba; - cv::cvtColor(rgb,rgbt, cv::COLOR_BGR2Lab); - cv::cvtColor(rgbt,rgba, cv::COLOR_BGR2BGRA); - - // Send to GPU and merge view into scene - //cam.gpu.updateParams(cam.params); - cam.gpu.updateData(depth, rgba, cam.stream); - - //setLastRigidTransform(input->getPose().cast<float>()); - - //make the rigid transform available on the GPU - //m_hashData.updateParams(m_hashParams, cv::cuda::StreamAccessor::getStream(cam.stream)); - - //if (i > 0) cudaSafeCall(cudaStreamSynchronize(cv::cuda::StreamAccessor::getStream(cameras_[i-1].stream))); - - //allocate all hash blocks which are corresponding to depth map entries - //if (value("voxels", false)) _alloc(i, cv::cuda::StreamAccessor::getStream(cam.stream)); - - // Calculate normals - } - - // Must have finished all allocations and rendering before next integration - cudaSafeCall(cudaDeviceSynchronize()); - - return active; -} - -int SceneRep::upload(ftl::rgbd::FrameSet &fs) { - int active = 0; - - for (size_t i=0; i<cameras_.size(); ++i) { - auto &cam = cameras_[i]; - - if (!cam.source->isReady()) { - cam.params.m_imageWidth = 0; - // TODO(Nick) : Free gpu allocs if was ready before - LOG(INFO) << "Source not ready: " << cam.source->getURI(); - continue; - } else { - auto in = cam.source; - - cam.params.fx = in->parameters().fx; - cam.params.fy = in->parameters().fy; - cam.params.mx = -in->parameters().cx; - cam.params.my = -in->parameters().cy; - - // Only now do we have camera parameters for allocations... - if (cam.params.m_imageWidth == 0) { - cam.params.m_imageWidth = in->parameters().width; - cam.params.m_imageHeight = in->parameters().height; - cam.params.m_sensorDepthWorldMax = in->parameters().maxDepth; - cam.params.m_sensorDepthWorldMin = in->parameters().minDepth; - cam.gpu.alloc(cam.params, true); - LOG(INFO) << "GPU Allocated camera " << i; - } - } - - cam.params.flags = m_frameCount; - } - - _updateCameraConstant(); - //cudaSafeCall(cudaDeviceSynchronize()); - - for (size_t i=0; i<cameras_.size(); ++i) { - auto &cam = cameras_[i]; - auto &chan1 = fs.frames[i].get<cv::Mat>(Channel::Colour); - auto &chan2 = fs.frames[i].get<cv::Mat>(fs.sources[i]->getChannel()); - - auto test = fs.frames[i].createTexture<uchar4>(Channel::Flow, ftl::rgbd::Format<uchar4>(100,100)); - - // Get the RGB-Depth frame from input - Source *input = cam.source; - //Mat rgb, depth; - - // TODO(Nick) Direct GPU upload to save copy - //input->getFrames(rgb,depth); - - active += 1; - - //if (depth.cols == 0) continue; - - // Must be in RGBA for GPU - Mat rgbt, rgba; - cv::cvtColor(chan1,rgbt, cv::COLOR_BGR2Lab); - cv::cvtColor(rgbt,rgba, cv::COLOR_BGR2BGRA); - - // Send to GPU and merge view into scene - //cam.gpu.updateParams(cam.params); - cam.gpu.updateData(chan2, rgba, cam.stream); - - //setLastRigidTransform(input->getPose().cast<float>()); - - //make the rigid transform available on the GPU - //m_hashData.updateParams(m_hashParams, cv::cuda::StreamAccessor::getStream(cam.stream)); - - //if (i > 0) cudaSafeCall(cudaStreamSynchronize(cv::cuda::StreamAccessor::getStream(cameras_[i-1].stream))); - - //allocate all hash blocks which are corresponding to depth map entries - //if (value("voxels", false)) _alloc(i, cv::cuda::StreamAccessor::getStream(cam.stream)); - - // Calculate normals - } - - // Must have finished all allocations and rendering before next integration - cudaSafeCall(cudaDeviceSynchronize()); - - return active; -} - -void SceneRep::integrate() { - /*for (size_t i=0; i<cameras_.size(); ++i) { - auto &cam = cameras_[i]; - - setLastRigidTransform(cam.source->getPose().cast<float>()); - //m_hashData.updateParams(m_hashParams); - - //generate a linear hash array with only occupied entries - _compactifyVisible(cam.params); - - //volumetrically integrate the depth data into the depth SDFBlocks - //_integrateDepthMap(cam.gpu, cam.params); - - //_garbageCollect(); - - m_numIntegratedFrames++; - }*/ - - _compactifyAllocated(); - _integrateDepthMaps(); -} - -void SceneRep::garbage() { - //_compactifyAllocated(); - //if (value("voxels", false)) _garbageCollect(); - - //cudaSafeCall(cudaStreamSynchronize(integ_stream_)); -} - -/*void SceneRep::integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask) { - - setLastRigidTransform(lastRigidTransform); - - //make the rigid transform available on the GPU - m_hashData.updateParams(m_hashParams); - - //allocate all hash blocks which are corresponding to depth map entries - _alloc(depthCameraData, depthCameraParams, d_bitMask); - - //generate a linear hash array with only occupied entries - _compactifyHashEntries(); - - //volumetrically integrate the depth data into the depth SDFBlocks - _integrateDepthMap(depthCameraData, depthCameraParams); - - _garbageCollect(depthCameraData); - - m_numIntegratedFrames++; -}*/ - -/*void SceneRep::setLastRigidTransformAndCompactify(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData) { - setLastRigidTransform(lastRigidTransform); - _compactifyHashEntries(); -}*/ - -/* Nick: To reduce weights between frames */ -void SceneRep::nextFrame() { - if (do_reset_) { - do_reset_ = false; - _destroy(); - _create(_parametersFromConfig()); - } else { - //ftl::cuda::compactifyAllocated(m_hashData, m_hashParams, integ_stream_); - //if (reg_mode_) ftl::cuda::clearVoxels(m_hashData, m_hashParams); - //else ftl::cuda::starveVoxels(m_hashData, m_hashParams, integ_stream_); - m_numIntegratedFrames = 0; - } -} - -//! resets the hash to the initial state (i.e., clears all data) -void SceneRep::reset() { - m_numIntegratedFrames = 0; - m_hashData.updateParams(m_hashParams); - resetCUDA(m_hashData, m_hashParams); -} - -unsigned int SceneRep::getOccupiedCount() { - unsigned int count; - cudaSafeCall(cudaMemcpy(&count, m_hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); - return count+1; //there is one more free than the address suggests (0 would be also a valid address) -} - -HashParams SceneRep::_parametersFromConfig() { - //auto &cfg = ftl::config::resolve(config); - HashParams params; - // First camera view is set to identity pose to be at the centre of - // the virtual coordinate space. - params.m_hashNumBuckets = value("hashNumBuckets", 100000); - params.m_virtualVoxelSize = value("SDFVoxelSize", 0.006f); - params.m_maxIntegrationDistance = value("SDFMaxIntegrationDistance", 10.0f); - params.m_truncation = value("SDFTruncation", 0.1f); - params.m_truncScale = value("SDFTruncationScale", 0.01f); - params.m_integrationWeightSample = value("SDFIntegrationWeightSample", 10); - params.m_integrationWeightMax = value("SDFIntegrationWeightMax", 255); - params.m_spatialSmoothing = value("spatialSmoothing", 0.04f); // 4cm - params.m_colourSmoothing = value("colourSmoothing", 20.0f); - params.m_confidenceThresh = value("confidenceThreshold", 20.0f); - params.m_flags = 0; - params.m_flags |= (value("clipping", false)) ? ftl::voxhash::kFlagClipping : 0; - params.m_flags |= (value("mls", false)) ? ftl::voxhash::kFlagMLS : 0; - params.m_maxBounds = make_float3( - value("bbox_x_max", 2.0f), - value("bbox_y_max", 2.0f), - value("bbox_z_max", 2.0f)); - params.m_minBounds = make_float3( - value("bbox_x_min", -2.0f), - value("bbox_y_min", -2.0f), - value("bbox_z_min", -2.0f)); - return params; -} - -void SceneRep::_create(const HashParams& params) { - m_hashParams = params; - m_hashData.allocate(m_hashParams); - - reset(); -} - -void SceneRep::_destroy() { - m_hashData.free(); -} - -void SceneRep::_alloc(int camid, cudaStream_t stream) { - // NOTE (nick): We might want this later... - /*if (false) { - // TODO(Nick) Make this work without memcpy to host first - allocate until all blocks are allocated - unsigned int prevFree = 0; //getHeapFreeCount(); - while (1) { - resetHashBucketMutexCUDA(m_hashData, m_hashParams, stream); - allocCUDA(m_hashData, m_hashParams, camid, cameras_[camid].params, stream); - - unsigned int currFree = getHeapFreeCount(); - - if (prevFree != currFree) { - prevFree = currFree; - } - else { - break; - } - } - } - else {*/ - //this version is faster, but it doesn't guarantee that all blocks are allocated (staggers alloc to the next frame) - //resetHashBucketMutexCUDA(m_hashData, m_hashParams, stream); - //allocCUDA(m_hashData, m_hashParams, camid, cameras_[camid].params, stream); - //} -} - - -void SceneRep::_compactifyVisible(const DepthCameraParams &camera) { //const DepthCameraData& depthCameraData) { - //ftl::cuda::compactifyOccupied(m_hashData, m_hashParams, integ_stream_); //this version uses atomics over prefix sums, which has a much better performance - //m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU -} - -void SceneRep::_compactifyAllocated() { - //ftl::cuda::compactifyAllocated(m_hashData, m_hashParams, integ_stream_); //this version uses atomics over prefix sums, which has a much better performance - //std::cout << "Occ blocks = " << m_hashParams.m_numOccupiedBlocks << std::endl; - //m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU -} - -/*void SceneRep::_integrateDepthMap(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams) { - if (!reg_mode_) ftl::cuda::integrateDepthMap(m_hashData, m_hashParams, depthCameraData, depthCameraParams, integ_stream_); - else ftl::cuda::integrateRegistration(m_hashData, m_hashParams, depthCameraData, depthCameraParams, integ_stream_); -}*/ - -//extern "C" void bilateralFilterFloatMap(float* d_output, float* d_input, float sigmaD, float sigmaR, unsigned int width, unsigned int height); - -void SceneRep::_integrateDepthMaps() { - //cudaSafeCall(cudaDeviceSynchronize()); - - for (size_t i=0; i<cameras_.size(); ++i) { - if (!cameras_[i].source->isReady()) continue; - //ftl::cuda::clear_depth(*(cameras_[i].gpu.depth2_tex_), integ_stream_); - cameras_[i].gpu.computeNormals(integ_stream_); - ftl::cuda::clear_points(*(cameras_[i].gpu.points_tex_), integ_stream_); - ftl::cuda::mls_smooth(*(cameras_[i].gpu.points_tex_), m_hashParams, cameras_.size(), i, integ_stream_); - //ftl::cuda::int_to_float(*(cameras_[i].gpu.depth2_tex_), *(cameras_[i].gpu.depth_tex_), 1.0f / 1000.0f, integ_stream_); - //ftl::cuda::hole_fill(*(cameras_[i].gpu.depth2_tex_), *(cameras_[i].gpu.depth_tex_), cameras_[i].params, integ_stream_); - //bilateralFilterFloatMap(cameras_[i].gpu.depth_tex_->devicePtr(), cameras_[i].gpu.depth3_tex_->devicePtr(), 3, 7, cameras_[i].gpu.depth_tex_->width(), cameras_[i].gpu.depth_tex_->height()); - } - //if (value("voxels", false)) ftl::cuda::integrateDepthMaps(m_hashData, m_hashParams, cameras_.size(), integ_stream_); -} - -void SceneRep::_garbageCollect() { - //ftl::cuda::garbageCollectIdentify(m_hashData, m_hashParams, integ_stream_); - //resetHashBucketMutexCUDA(m_hashData, m_hashParams, integ_stream_); //needed if linked lists are enabled -> for memeory deletion - //ftl::cuda::garbageCollectFree(m_hashData, m_hashParams, integ_stream_); -} diff --git a/components/codecs/src/nvpipe_encoder.cpp b/components/codecs/src/nvpipe_encoder.cpp index 42374bedecf95a749e842a3a2ed22b5226d6d390..f1b068d740d6c9ff5210cc6598765061752535c3 100644 --- a/components/codecs/src/nvpipe_encoder.cpp +++ b/components/codecs/src/nvpipe_encoder.cpp @@ -47,6 +47,10 @@ bool NvPipeEncoder::encode(const cv::Mat &in, definition_t odefinition, bitrate_ //LOG(INFO) << "Definition: " << ftl::codecs::getWidth(definition) << "x" << ftl::codecs::getHeight(definition); + if (in.empty()) { + LOG(ERROR) << "Missing data for Nvidia encoder"; + return false; + } if (!_createEncoder(in, definition, bitrate)) return false; //LOG(INFO) << "NvPipe Encode: " << int(definition) << " " << in.cols; diff --git a/components/renderers/cpp/include/ftl/cuda/points.hpp b/components/renderers/cpp/include/ftl/cuda/points.hpp index ddbac8ba9d09ef44bed37481575361bf6d714263..cb996c4ec68a794b2b9ede7a03cf2b2c5d92047d 100644 --- a/components/renderers/cpp/include/ftl/cuda/points.hpp +++ b/components/renderers/cpp/include/ftl/cuda/points.hpp @@ -21,6 +21,12 @@ void point_cloud(ftl::cuda::TextureObject<float4> &output, void clipping(ftl::cuda::TextureObject<float4> &points, const ClipSpace &clip, cudaStream_t stream); +void point_cloud(ftl::cuda::TextureObject<float> &output, ftl::cuda::TextureObject<float4> &points, const ftl::rgbd::Camera ¶ms, const float4x4 &poseinv, cudaStream_t stream); + +void world_to_cam(ftl::cuda::TextureObject<float4> &points, const float4x4 &poseinv, cudaStream_t stream); + +void cam_to_world(ftl::cuda::TextureObject<float4> &points, const float4x4 &pose, cudaStream_t stream); + } } diff --git a/components/renderers/cpp/include/ftl/cuda/weighting.hpp b/components/renderers/cpp/include/ftl/cuda/weighting.hpp index 9498d0508605087306db2658b2a1ae1943cde536..15d3dbcec387f97d8ffe60690bdb5c1fda2c098c 100644 --- a/components/renderers/cpp/include/ftl/cuda/weighting.hpp +++ b/components/renderers/cpp/include/ftl/cuda/weighting.hpp @@ -4,19 +4,41 @@ namespace ftl { namespace cuda { +__device__ inline float weighting(float r, float h) { + if (r >= h) return 0.0f; + float rh = r / h; + rh = 1.0f - rh*rh; + return rh*rh*rh*rh; +} + /* * Guennebaud, G.; Gross, M. Algebraic point set surfaces. ACMTransactions on Graphics Vol. 26, No. 3, Article No. 23, 2007. * Used in: FusionMLS: Highly dynamic 3D reconstruction with consumer-grade RGB-D cameras * r = distance between points * h = smoothing parameter in meters (default 4cm) */ -__device__ inline float spatialWeighting(float r, float h) { +__device__ inline float spatialWeighting(const float3 &a, const float3 &b, float h) { + const float r = length(a-b); if (r >= h) return 0.0f; float rh = r / h; rh = 1.0f - rh*rh; return rh*rh*rh*rh; } +/* + * Colour weighting as suggested in: + * C. Kuster et al. Spatio-Temporal Geometry Fusion for Multiple Hybrid Cameras using Moving Least Squares Surfaces. 2014. + * c = colour distance + */ + __device__ inline float colourWeighting(uchar4 a, uchar4 b, float h) { + const float3 delta = make_float3((float)a.x - (float)b.x, (float)a.y - (float)b.y, (float)a.z - (float)b.z); + const float c = length(delta); + if (c >= h) return 0.0f; + float ch = c / h; + ch = 1.0f - ch*ch; + return ch*ch*ch*ch; +} + } } diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp index aed04ad897072ea67947d50f9c4d5552c52cb634..6a6bbb9679112cc80c0bf8f307ae5536af3ba4f2 100644 --- a/components/renderers/cpp/src/splat_render.cpp +++ b/components/renderers/cpp/src/splat_render.cpp @@ -54,6 +54,7 @@ void Splatter::renderChannel( ftl::render::SplatParams ¶ms, ftl::rgbd::Frame &out, const Channel &channel, cudaStream_t stream) { + if (channel == Channel::None) return; cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); temp_.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(0x7FFFFFFF), cvstream); temp_.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0x7FFFFFFF), cvstream); @@ -105,7 +106,7 @@ void Splatter::renderChannel( // Accumulate attribute contributions for each pixel for (auto &f : scene_->frames) { // Convert colour from BGR to BGRA if needed - if (f.get<GpuMat>(Channel::Colour).type() == CV_8UC3) { + if (f.get<GpuMat>(channel).type() == CV_8UC3) { // Convert to 4 channel colour auto &col = f.get<GpuMat>(Channel::Colour); GpuMat tmp(col.size(), CV_8UC4); @@ -247,6 +248,15 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda out.create<GpuMat>(Channel::Right, Format<uchar4>(camera.width, camera.height)); out.get<GpuMat>(Channel::Right).setTo(cv::Scalar(76,76,76), cvstream); renderChannel(params, out, Channel::Right, stream); + } else if (chan != Channel::None) { + if (ftl::rgbd::isFloatChannel(chan)) { + out.create<GpuMat>(chan, Format<float>(camera.width, camera.height)); + out.get<GpuMat>(chan).setTo(cv::Scalar(0.0f), cvstream); + } else { + out.create<GpuMat>(chan, Format<uchar4>(camera.width, camera.height)); + out.get<GpuMat>(chan).setTo(cv::Scalar(76,76,76,255), cvstream); + } + renderChannel(params, out, chan, stream); } return true; diff --git a/components/renderers/cpp/src/splatter.cu b/components/renderers/cpp/src/splatter.cu index 3b1ae4b47ef0fe6b29b15d1fa20fdc9a0fd0b9bb..c7456e57bf4425a6f16020a66c885d58dbdda5a4 100644 --- a/components/renderers/cpp/src/splatter.cu +++ b/components/renderers/cpp/src/splatter.cu @@ -63,6 +63,8 @@ __device__ inline float4 make_float4(const uchar4 &c) { #define SMOOTHING_MULTIPLIER_B 4.0f // For z contribution #define SMOOTHING_MULTIPLIER_C 4.0f // For colour contribution +#define ACCUM_DIAMETER 8 + /* * Pass 2: Accumulate attribute contributions if the points pass a visibility test. */ @@ -92,7 +94,7 @@ __global__ void dibr_attribute_contrib_kernel( if (camPos.z > params.camera.maxDepth) return; const uint2 screenPos = params.camera.camToScreen<uint2>(camPos); - const int upsample = 8; //min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z)); + //const int upsample = 8; //min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z)); // Not on screen so stop now... if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return; @@ -107,16 +109,16 @@ __global__ void dibr_attribute_contrib_kernel( // Each thread in warp takes an upsample point and updates corresponding depth buffer. const int lane = tid % WARP_SIZE; - for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { - const float u = (i % upsample) - (upsample / 2); - const float v = (i / upsample) - (upsample / 2); + for (int i=lane; i<ACCUM_DIAMETER*ACCUM_DIAMETER; i+=WARP_SIZE) { + const float u = (i % ACCUM_DIAMETER) - (ACCUM_DIAMETER / 2); + const float v = (i / ACCUM_DIAMETER) - (ACCUM_DIAMETER / 2); // Use the depth buffer to determine this pixels 3D position in camera space const float d = ((float)depth_in.tex2D(screenPos.x+u, screenPos.y+v)/1000.0f); const float3 nearest = params.camera.screenToCam((int)(screenPos.x+u),(int)(screenPos.y+v),d); // What is contribution of our current point at this pixel? - const float weight = ftl::cuda::spatialWeighting(length(nearest - camPos), SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx)); + const float weight = ftl::cuda::spatialWeighting(nearest, camPos, SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx)); if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) { // TODO: Use confidence threshold here const float4 wcolour = colour * weight; //const float4 wnormal = normal * weight; @@ -187,7 +189,7 @@ __global__ void dibr_attribute_contrib_kernel( const float3 nearest = params.camera.screenToCam((int)(screenPos.x+u),(int)(screenPos.y+v),d); // What is contribution of our current point at this pixel? - const float weight = ftl::cuda::spatialWeighting(length(nearest - camPos), SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx)); + const float weight = ftl::cuda::spatialWeighting(nearest, camPos, SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx)); if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) { // TODO: Use confidence threshold here const float wcolour = colour * weight; //const float4 wnormal = normal * weight; diff --git a/components/rgbd-sources/src/streamer.cpp b/components/rgbd-sources/src/streamer.cpp index 676cf58cf3045111cf966d78d587c0e918a351e7..7a9118c9f47975a31d6389982b2adb818ed8a046 100644 --- a/components/rgbd-sources/src/streamer.cpp +++ b/components/rgbd-sources/src/streamer.cpp @@ -496,7 +496,7 @@ void Streamer::_transmitPacket(StreamSource *src, const ftl::codecs::Packet &pkt frame_no_, static_cast<uint8_t>((chan & 0x1) | ((hasChan2) ? 0x2 : 0x0)) }; - LOG(INFO) << "codec:" << (int) pkt.codec; + // Lock to prevent clients being added / removed //SHARED_LOCK(src->mutex,lk); auto c = src->clients.begin(); diff --git a/components/rgbd-sources/src/virtual.cpp b/components/rgbd-sources/src/virtual.cpp index 0e6db973884a3c8361fcaae764cc1688d2434d9d..0166626957f2ae16ea5b9e3fa9f960dc2fc22538 100644 --- a/components/rgbd-sources/src/virtual.cpp +++ b/components/rgbd-sources/src/virtual.cpp @@ -47,8 +47,6 @@ class VirtualImpl : public ftl::rgbd::detail::Source { frame.hasChannel(host_->getChannel())) { frame.download(host_->getChannel()); cv::swap(frame.get<cv::Mat>(host_->getChannel()), depth_); - } else { - LOG(ERROR) << "Channel 2 frame in rendering"; } auto cb = host_->callback();