diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt index 906d7ebdaca59ce249c520e8440dd2cdaa16f34d..4db3fbf4285e8022c7af9f908d87ef1966e9ecd4 100644 --- a/applications/reconstruct/CMakeLists.txt +++ b/applications/reconstruct/CMakeLists.txt @@ -10,12 +10,14 @@ set(REPSRC src/garbage.cu src/integrators.cu src/ray_cast_sdf.cu + src/splat_render.cu src/camera_util.cu src/voxel_hash.cu src/voxel_hash.cpp src/ray_cast_sdf.cpp src/registration.cpp src/virtual_source.cpp + src/splat_render.cpp ) add_executable(ftl-reconstruct ${REPSRC}) diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp index 1f04714547306d25fa7e1597eebaeac5d0143ea8..712b3232ba6d2d2c4561cf5325a8e3d657c0c4e0 100644 --- a/applications/reconstruct/src/main.cpp +++ b/applications/reconstruct/src/main.cpp @@ -15,6 +15,8 @@ #include <ftl/rgbd/streamer.hpp> #include <ftl/slave.hpp> +#include "splat_render.hpp" + #include <string> #include <vector> #include <thread> @@ -90,14 +92,16 @@ static void run(ftl::Configurable *root) { ftl::voxhash::SceneRep *scene = ftl::create<ftl::voxhash::SceneRep>(root, "voxelhash"); ftl::rgbd::Streamer *stream = ftl::create<ftl::rgbd::Streamer>(root, "stream", net); ftl::rgbd::Source *virt = ftl::create<ftl::rgbd::Source>(root, "virtual", net); + ftl::render::Splatter *splat = new ftl::render::Splatter(scene); - auto virtimpl = new ftl::rgbd::VirtualSource(virt); - virt->customImplementation(virtimpl); - virtimpl->setScene(scene); + //auto virtimpl = new ftl::rgbd::VirtualSource(virt); + //virt->customImplementation(virtimpl); + //virtimpl->setScene(scene); stream->add(virt); for (int i=0; i<sources.size(); i++) { Source *in = sources[i]; + in->setChannel(ftl::rgbd::kChanDepth); stream->add(in); scene->addSource(in); } @@ -124,6 +128,8 @@ static void run(ftl::Configurable *root) { // Make sure previous virtual camera frame has finished rendering stream->wait(); + LOG(INFO) << "Heap: " << scene->getHeapFreeCount(); + // Merge new frames into the voxel structure scene->integrate(); @@ -134,6 +140,8 @@ static void run(ftl::Configurable *root) { active = 1; } + splat->render(virt, scene->getIntegrationStream()); + // Start virtual camera rendering and previous frame compression stream->poll(); } diff --git a/applications/reconstruct/src/point_cloud.hpp b/applications/reconstruct/src/point_cloud.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7b5f49c07ea5826736feb861bcd42f133367c291 --- /dev/null +++ b/applications/reconstruct/src/point_cloud.hpp @@ -0,0 +1,36 @@ +#ifndef _FTL_POINT_CLOUD_HPP_ +#define _FTL_POINT_CLOUD_HPP_ + +namespace ftl { +namespace pointcloud { + + +struct VoxelPoint { + union { + uint64_t val; + struct { + uint16_t x : 12; // Block X + uint16_t y : 12; // Block Y + uint16_t z : 12; // Block Z + uint16_t v : 9; // Voxel offset in block 0-511 + }; + }; +}; + +struct VoxelColour { + union { + uint32_t val; + struct { + uint8_t b; + uint8_t g; + uint8_t r; + uint8_t a; + }; + }; +}; + + +} +} + +#endif // _FTL_POINT_CLOUD_HPP_ diff --git a/applications/reconstruct/src/ray_cast_sdf.cpp b/applications/reconstruct/src/ray_cast_sdf.cpp index c7c04a7293a1e92fac3de0f46c14a4eb30098b3f..1c12a762cd56b82d8dd8be585c0fe75733b010f5 100644 --- a/applications/reconstruct/src/ray_cast_sdf.cpp +++ b/applications/reconstruct/src/ray_cast_sdf.cpp @@ -2,6 +2,7 @@ #include <ftl/voxel_hash.hpp> #include "compactors.hpp" +#include "splat_render_cuda.hpp" //#include "Util.h" @@ -62,8 +63,9 @@ void CUDARayCastSDF::render(ftl::voxhash::HashData& hashData, ftl::voxhash::Hash compactifyHashEntries(hashData, hashParams, stream); - if (hash_render_) nickRenderCUDA(hashData, hashParams, m_data, m_params, stream); - else renderCS(hashData, m_data, m_params, stream); + if (hash_render_) { + //ftl::cuda::isosurface_point_image(hashData, hashParams, m_data, m_params, stream); + } else renderCS(hashData, m_data, m_params, stream); //convertToCameraSpace(cameraData); //if (!m_params.m_useGradients) diff --git a/applications/reconstruct/src/ray_cast_sdf.cu b/applications/reconstruct/src/ray_cast_sdf.cu index 09795894869cdf6138b265020d97a5cd25a443dd..a43b608429b1fad1ac160b188c9be6b084274154 100644 --- a/applications/reconstruct/src/ray_cast_sdf.cu +++ b/applications/reconstruct/src/ray_cast_sdf.cu @@ -76,326 +76,6 @@ extern "C" void renderCS(const ftl::voxhash::HashData& hashData, const RayCastDa #endif } -//////////////////////////////////////////////////////////////////////////////// -// Nicks render approach -//////////////////////////////////////////////////////////////////////////////// - -__global__ void clearDepthKernel(ftl::voxhash::HashData hashData, RayCastData rayCastData, RayCastParams rayCastParams) -{ - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - //const RayCastParams& rayCastParams = c_rayCastParams; - - if (x < rayCastParams.m_width && y < rayCastParams.m_height) { - rayCastData.d_depth_i[y*rayCastParams.m_width+x] = 0x7FFFFFFF; //PINF; - rayCastData.d_colors[y*rayCastParams.m_width+x] = make_uchar3(76,76,82); - } -} - -#define SDF_BLOCK_SIZE_PAD 9 -#define SDF_BLOCK_BUFFER 1024 // > 9x9x9 -#define SDF_DX 1 -#define SDF_DY SDF_BLOCK_SIZE_PAD -#define SDF_DZ (SDF_BLOCK_SIZE_PAD*SDF_BLOCK_SIZE_PAD) - -__device__ -float frac(float val) { - return (val - floorf(val)); -} -__device__ -float3 frac(const float3& val) { - return make_float3(frac(val.x), frac(val.y), frac(val.z)); -} - -__host__ size_t nickSharedMem() { - return sizeof(float)*SDF_BLOCK_BUFFER + - sizeof(uchar)*SDF_BLOCK_BUFFER + - sizeof(float)*SDF_BLOCK_BUFFER + - sizeof(float3)*SDF_BLOCK_BUFFER; -} - -/*__device__ void loadVoxel(const ftl::voxhash::HashData &hash, const int3 &vox, float *sdf, uint *weight, float3 *colour) { - ftl::voxhash::Voxel &v = hashData.getVoxel(vox); - *sdf = v.sdf; - *weight = v.weight; - *colour = v.color; -}*/ - -//! computes the (local) virtual voxel pos of an index; idx in [0;511] -__device__ -int3 pdelinVoxelIndex(uint idx) { - int x = idx % SDF_BLOCK_SIZE_PAD; - int y = (idx % (SDF_BLOCK_SIZE_PAD * SDF_BLOCK_SIZE_PAD)) / SDF_BLOCK_SIZE_PAD; - int z = idx / (SDF_BLOCK_SIZE_PAD * SDF_BLOCK_SIZE_PAD); - return make_int3(x,y,z); -} - -//! computes the linearized index of a local virtual voxel pos; pos in [0;7]^3 -__device__ -uint plinVoxelPos(const int3& virtualVoxelPos) { - return - virtualVoxelPos.z * SDF_BLOCK_SIZE_PAD * SDF_BLOCK_SIZE_PAD + - virtualVoxelPos.y * SDF_BLOCK_SIZE_PAD + - virtualVoxelPos.x; -} - -__device__ -void deleteVoxel(ftl::voxhash::Voxel& v) { - v.color = make_uchar3(0,0,0); - v.weight = 0; - v.sdf = PINF; -} - -__device__ inline int3 blockDelinear(const int3 &base, uint i) { - return make_int3(base.x + (i & 0x1), base.y + (i & 0x2), base.z + (i & 0x4)); -} - -__device__ inline uint blockLinear(int x, int y, int z) { - return x + (y << 1) + (z << 2); -} - -__device__ inline void trilinearInterp(const ftl::voxhash::HashData &hashData, const ftl::voxhash::Voxel *voxels, const uint *ix, const float3 &pos, float &depth, uchar3 &colour) { - float3 colorFloat = make_float3(0.0f, 0.0f, 0.0f); - const float3 weight = frac(hashData.worldToVirtualVoxelPosFloat(pos)); // Should be world position of ray, not voxel?? - float dist = 0.0f; - dist+= (1.0f-weight.x)*(1.0f-weight.y)*(1.0f-weight.z)*voxels[ix[0]].sdf; colorFloat+= (1.0f-weight.x)*(1.0f-weight.y)*(1.0f-weight.z)*make_float3(voxels[ix[0]].color); - dist+= weight.x *(1.0f-weight.y)*(1.0f-weight.z)*voxels[ix[1]].sdf; colorFloat+= weight.x *(1.0f-weight.y)*(1.0f-weight.z)*make_float3(voxels[ix[1]].color); - dist+= (1.0f-weight.x)* weight.y *(1.0f-weight.z)*voxels[ix[2]].sdf; colorFloat+= (1.0f-weight.x)* weight.y *(1.0f-weight.z)*make_float3(voxels[ix[2]].color); - dist+= (1.0f-weight.x)*(1.0f-weight.y)* weight.z *voxels[ix[3]].sdf; colorFloat+= (1.0f-weight.x)*(1.0f-weight.y)* weight.z *make_float3(voxels[ix[3]].color); - dist+= weight.x * weight.y *(1.0f-weight.z)*voxels[ix[4]].sdf; colorFloat+= weight.x * weight.y *(1.0f-weight.z)*make_float3(voxels[ix[4]].color); - dist+= (1.0f-weight.x)* weight.y * weight.z *voxels[ix[5]].sdf; colorFloat+= (1.0f-weight.x)* weight.y * weight.z *make_float3(voxels[ix[5]].color); - dist+= weight.x *(1.0f-weight.y)* weight.z *voxels[ix[6]].sdf; colorFloat+= weight.x *(1.0f-weight.y)* weight.z *make_float3(voxels[ix[6]].color); - dist+= weight.x * weight.y * weight.z *voxels[ix[7]].sdf; colorFloat+= weight.x * weight.y * weight.z *make_float3(voxels[ix[7]].color); - depth = dist; - colour = make_uchar3(colorFloat); -} - -__global__ void nickRenderKernel(ftl::voxhash::HashData hashData, RayCastData rayCastData, RayCastParams params) { - // TODO(Nick) Reduce bank conflicts by aligning these - __shared__ ftl::voxhash::Voxel voxels[SDF_BLOCK_BUFFER]; - __shared__ ftl::voxhash::HashEntry blocks[8]; - - // Stride over all allocated blocks - for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) { - __syncthreads(); - - const uint i = threadIdx.x; //inside of an SDF block - - //TODO (Nick) Either don't use compactified or re-run compacitification using render cam frustrum - if (i == 0) blocks[0] = hashData.d_hashCompactified[bi]; - //else if (i <= 7) blocks[i] = hashData.getHashEntryForSDFBlockPos(blockDelinear(blocks[0].pos, i)); - - // Make sure all hash entries are cached - __syncthreads(); - - const int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(blocks[0].pos); - const int3 vp = make_int3(hashData.delinearizeVoxelIndex(i)); - const int3 pi = pi_base + vp; - const uint j = plinVoxelPos(vp); // Padded linear index - const float3 worldPos = hashData.virtualVoxelPosToWorld(pi); - - // Load distances and colours into shared memory + padding - const ftl::voxhash::Voxel &v = hashData.d_SDFBlocks[blocks[0].ptr + i]; - voxels[j] = v; - - // TODO (Nick) Coalesce memory better? - uint imod = i & 0x7; - bool v_do = imod < 3; - - // Voxels 8 * * combinations - if (v_do) { - const uint v_a = (i >> 3) & 0x7; - const uint v_c = (i >> 6); - const uint v_b = (imod >= 1) ? v_c : v_a; - const int3 v_cache = make_int3(((imod == 0) ? 8 : v_a), ((imod == 1) ? 8 : v_b), ((imod == 2) ? 8 : v_c)); - //const int3 v_ii = make_int3((imod == 0) ? 0 : v_a, (imod == 1) ? 0 : v_b, (imod == 2) ? 0 : v_c); - //const int v_block = blockLinear((imod == 0) ? 1 : 0, (imod == 1) ? 1 : 0, (imod == 2) ? 1 : 0); - ftl::voxhash::Voxel &padVox = voxels[plinVoxelPos(v_cache)]; - //const uint ii = hashData.linearizeVoxelPos(v_ii); - //if (blocks[v_block].ptr != ftl::voxhash::FREE_ENTRY) padVox = hashData.d_SDFBlocks[blocks[v_block].ptr + ii]; - //else deleteVoxel(padVox); - - deleteVoxel(padVox); - } - - // Voxels 8 8 * combinations - if ((i >> 3) < 3) { - const uint batch = i >> 3; - const uint v_a = imod; - const int3 v_cache = make_int3(((batch != 0) ? 8 : v_a), ((batch != 1) ? 8 : v_a), ((batch != 2) ? 8 : v_a)); - //const int3 v_ii = make_int3((batch != 0) ? 0 : v_a, (batch != 1) ? 0 : v_a, (batch != 2) ? 0 : v_a); - //const int v_block = blockLinear((batch != 0) ? 1 : 0, (batch != 1) ? 1 : 0, (batch != 2) ? 1 : 0); - ftl::voxhash::Voxel &padVox = voxels[plinVoxelPos(v_cache)]; - //const uint ii = hashData.linearizeVoxelPos(v_ii); - //if (blocks[v_block].ptr != ftl::voxhash::FREE_ENTRY) padVox = hashData.d_SDFBlocks[blocks[v_block].ptr + ii]; - //else deleteVoxel(padVox); - - deleteVoxel(padVox); - } - - // Voxel 8 8 8 - if (i == 0) { - const int3 v_cache = make_int3(8,8,8); - ftl::voxhash::Voxel &padVox = voxels[plinVoxelPos(v_cache)]; - //const int v_block = blockLinear(1, 1, 1); - //if (blocks[v_block].ptr != ftl::voxhash::FREE_ENTRY) padVox = hashData.d_SDFBlocks[blocks[v_block].ptr]; - //else deleteVoxel(padVox); - - deleteVoxel(padVox); - } - - // Indexes of the 8 neighbor voxels in one direction - const uint ix[8] = { - j, j+SDF_DX, j+SDF_DY, j+SDF_DZ, j+SDF_DX+SDF_DY, j+SDF_DY+SDF_DZ, - j+SDF_DX+SDF_DZ, j+SDF_DX+SDF_DY+SDF_DZ - }; - - __syncthreads(); - - // If any weight is 0, skip this voxel - // FIXME(Nick) Unloaded voxels result in random weight values here... - //const bool missweight = voxels[ix[0]].weight == 0 || voxels[ix[1]].weight == 0 || voxels[ix[2]].weight == 0 || - // voxels[ix[3]].weight == 0 || voxels[ix[4]].weight == 0 || voxels[ix[5]].weight == 0 || - // voxels[ix[6]].weight == 0; // || voxels[ix[7]].weight == 0; - //if (missweight) return; - if (voxels[j].weight == 0) continue; - - // Trilinear Interpolation (simple and fast) - /*float3 colorFloat = make_float3(0.0f, 0.0f, 0.0f); - const float3 weight = frac(hashData.worldToVirtualVoxelPosFloat(worldPos)); // Should be world position of ray, not voxel?? - float dist = 0.0f; - dist+= (1.0f-weight.x)*(1.0f-weight.y)*(1.0f-weight.z)*voxels[ix[0]].sdf; colorFloat+= (1.0f-weight.x)*(1.0f-weight.y)*(1.0f-weight.z)*make_float3(voxels[ix[0]].color); - dist+= weight.x *(1.0f-weight.y)*(1.0f-weight.z)*voxels[ix[1]].sdf; colorFloat+= weight.x *(1.0f-weight.y)*(1.0f-weight.z)*make_float3(voxels[ix[1]].color); - dist+= (1.0f-weight.x)* weight.y *(1.0f-weight.z)*voxels[ix[2]].sdf; colorFloat+= (1.0f-weight.x)* weight.y *(1.0f-weight.z)*make_float3(voxels[ix[2]].color); - dist+= (1.0f-weight.x)*(1.0f-weight.y)* weight.z *voxels[ix[3]].sdf; colorFloat+= (1.0f-weight.x)*(1.0f-weight.y)* weight.z *make_float3(voxels[ix[3]].color); - dist+= weight.x * weight.y *(1.0f-weight.z)*voxels[ix[4]].sdf; colorFloat+= weight.x * weight.y *(1.0f-weight.z)*make_float3(voxels[ix[4]].color); - dist+= (1.0f-weight.x)* weight.y * weight.z *voxels[ix[5]].sdf; colorFloat+= (1.0f-weight.x)* weight.y * weight.z *make_float3(voxels[ix[5]].color); - dist+= weight.x *(1.0f-weight.y)* weight.z *voxels[ix[6]].sdf; colorFloat+= weight.x *(1.0f-weight.y)* weight.z *make_float3(voxels[ix[6]].color); - dist+= weight.x * weight.y * weight.z *voxels[ix[7]].sdf; colorFloat+= weight.x * weight.y * weight.z *make_float3(voxels[ix[7]].color); - - // Must finish using colours before updating colours - __syncthreads(); - - //voxels[j].color = make_uchar3(colorFloat); - //voxels[j].sdf = dist; - - // What happens if fitlered voxel is put back? - //hashData.d_SDFBlocks[blocks[0].ptr + i] = voxels[j]; - - //return;*/ - - int edgeX = (vp.x == 0 ) ? 1 : 0; - int edgeY = (vp.y == 0 ) ? 1 : 0; - int edgeZ = (vp.z == 0 ) ? 1 : 0; - - bool is_surface = ((params.m_flags & kShowBlockBorders) && edgeX + edgeY + edgeZ >= 2); - if (is_surface) voxels[j].color = make_uchar3(255,(vp.x == 0 && vp.y == 0 && vp.z == 0) ? 255 : 0,0); - - if (!is_surface && voxels[j].sdf >= 0.0f) continue; - - //if (vp.z == 7) voxels[j].color = make_uchar3(0,255,(voxels[j].sdf < 0.0f) ? 255 : 0); - - // Identify surfaces through sign change. Since we only check in one direction - // it is fine to check for any sign change? -#pragma unroll - for (int u=0; u<=1; u++) { - for (int v=0; v<=1; v++) { - for (int w=0; w<=1; w++) { - const int3 uvi = make_int3(vp.x+u,vp.y+v,vp.z+w); - - // Skip these cases since we didn't load voxels properly - if (uvi.x == 8 && uvi.y == 8 && uvi.z == 8) continue; - //if (uvi.x == 8 && uvi.y == 8 && uvi.z == 8 || uvi.x == 8 && uvi.z == 8 || uvi.y == 8 && uvi.z == 8) continue; - - const auto &vox = voxels[plinVoxelPos(uvi)]; - if (vox.weight > 0 && vox.sdf < 0.0f) { - is_surface = true; - break; - } - } - } - } - - // Only for surface voxels, work out screen coordinates - // TODO Could adjust weights, strengthen on surface, weaken otherwise?? - if (!is_surface) continue; - - const float3 camPos = params.m_viewMatrix * worldPos; - const float2 screenPosf = params.camera.cameraToKinectScreenFloat(camPos); - const uint2 screenPos = make_uint2(make_int2(screenPosf)); // + make_float2(0.5f, 0.5f) - - if (camPos.z < params.m_minDepth) continue; - - /*if (screenPos.x < params.m_width && screenPos.y < params.m_height && - rayCastData.d_depth[(screenPos.y)*params.m_width+screenPos.x] > camPos.z) { - rayCastData.d_depth[(screenPos.y)*params.m_width+screenPos.x] = camPos.z; - rayCastData.d_colors[(screenPos.y)*params.m_width+screenPos.x] = voxels[j].color; - }*/ - - //return; - - // For this voxel in hash, get its screen position and check it is on screen - // Convert depth map to int by x1000 and use atomicMin - const int pixsize = static_cast<int>((c_hashParams.m_virtualVoxelSize*params.camera.fx/(camPos.z*0.8f)))+1; // Magic number increase voxel to ensure coverage - int pixsizeX = pixsize; // Max voxel pixels - int pixsizeY = pixsize; - - for (int y=0; y<pixsizeY; y++) { - for (int x=0; x<pixsizeX; x++) { - // TODO(Nick) Within a window, check pixels that have same voxel id - // Then trilinear interpolate between current voxel and neighbors. - const float3 pixelWorldPos = params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(screenPos.x+x,screenPos.y+y, camPos.z); - const float3 posInVoxel = (pixelWorldPos - worldPos) / make_float3(c_hashParams.m_virtualVoxelSize,c_hashParams.m_virtualVoxelSize,c_hashParams.m_virtualVoxelSize); - - //if (posInVoxel.x >= 1.0f || posInVoxel.y >= 1.0f || posInVoxel.z >= 1.0f) { - // pixsizeX = x; - // continue; - //} - - /*float depth; - uchar3 col; - trilinearInterp(hashData, voxels, ix, pixelWorldPos, depth, col);*/ - int idepth = static_cast<int>(camPos.z * 100.0f); - - if (screenPos.x+x < params.m_width && screenPos.y+y < params.m_height) { - int index = (screenPos.y+y)*params.m_width+screenPos.x+x; - // TODO(Nick) Merge colors at "same" depth based upon distance from voxel centre. - // The above may need to be done using shared memory - if (rayCastData.d_depth_i[index] > idepth && atomicMin(&rayCastData.d_depth_i[index], idepth) != idepth) { - //rayCastData.d_depth[index] = idepth; - rayCastData.d_colors[index] = voxels[j].color; - } - } - } - if (pixsizeX == 0) break; - } - - } -} - -extern "C" void nickRenderCUDA(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const RayCastData &rayCastData, const RayCastParams ¶ms, cudaStream_t stream) -{ - const dim3 clear_gridSize((params.m_width + T_PER_BLOCK - 1)/T_PER_BLOCK, (params.m_height + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK); - - clearDepthKernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(hashData, rayCastData, params); - - const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; - const dim3 gridSize(NUM_CUDA_BLOCKS, 1); - const dim3 blockSize(threadsPerBlock, 1); - - //if (hashParams.m_numOccupiedBlocks > 0) { //this guard is important if there is no depth in the current frame (i.e., no blocks were allocated) - nickRenderKernel << <gridSize, blockSize, 0, stream >> >(hashData, rayCastData, params); - //} - - cudaSafeCall( cudaGetLastError() ); - #ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); - #endif -} - ///////////////////////////////////////////////////////////////////////// // ray interval splatting diff --git a/applications/reconstruct/src/splat_params.hpp b/applications/reconstruct/src/splat_params.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e38e4447286bf6c70f6c753deed35154b6aafd8a --- /dev/null +++ b/applications/reconstruct/src/splat_params.hpp @@ -0,0 +1,26 @@ +#ifndef _FTL_RENDER_SPLAT_PARAMS_HPP_ +#define _FTL_RENDER_SPLAT_PARAMS_HPP_ + +#include <ftl/cuda_util.hpp> +#include <ftl/cuda_matrix_util.hpp> +#include <ftl/depth_camera_params.hpp> + +namespace ftl { +namespace render { + +static const uint kShowBlockBorders = 0x0001; + +struct __align__(16) SplatParams { + float4x4 m_viewMatrix; + float4x4 m_viewMatrixInverse; + + uint m_flags; + float voxelSize; + + DepthCameraParams camera; +}; + +} +} + +#endif diff --git a/applications/reconstruct/src/splat_render.cpp b/applications/reconstruct/src/splat_render.cpp new file mode 100644 index 0000000000000000000000000000000000000000..267620895c460ab9cfc2113e6bb6369f74883573 --- /dev/null +++ b/applications/reconstruct/src/splat_render.cpp @@ -0,0 +1,61 @@ +#include "splat_render.hpp" +#include "splat_render_cuda.hpp" +#include "compactors.hpp" + +using ftl::render::Splatter; + +Splatter::Splatter(ftl::voxhash::SceneRep *scene) : scene_(scene) { + +} + +Splatter::~Splatter() { + +} + +void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) { + if (!src->isReady()) return; + + const auto &camera = src->parameters(); + + cudaSafeCall(cudaSetDevice(scene_->getCUDADevice())); + + if (depth1_.width() != camera.width || depth1_.height() != camera.height) { + depth1_ = ftl::cuda::TextureObject<uint>(camera.width, camera.height); + } + if (colour1_.width() != camera.width || colour1_.height() != camera.height) { + colour1_ = ftl::cuda::TextureObject<uchar4>(camera.width, camera.height); + } + if (depth2_.width() != camera.width || depth2_.height() != camera.height) { + depth2_ = ftl::cuda::TextureObject<float>(camera.width, camera.height); + } + if (colour2_.width() != camera.width || colour2_.height() != camera.height) { + colour2_ = ftl::cuda::TextureObject<uchar4>(camera.width, camera.height); + } + + SplatParams params; + params.m_flags = 0; + params.m_viewMatrix = MatrixConversion::toCUDA(src->getPose().cast<float>().inverse()); + params.m_viewMatrixInverse = MatrixConversion::toCUDA(src->getPose().cast<float>()); + params.voxelSize = scene_->getHashParams().m_virtualVoxelSize; + params.camera.flags = 0; + params.camera.fx = camera.fx; + params.camera.fy = camera.fy; + params.camera.mx = -camera.cx; + params.camera.my = -camera.cy; + params.camera.m_imageWidth = camera.width; + params.camera.m_imageHeight = camera.height; + params.camera.m_sensorDepthWorldMax = camera.maxDepth; + params.camera.m_sensorDepthWorldMin = camera.minDepth; + + ftl::cuda::compactifyVisible(scene_->getHashData(), scene_->getHashParams(), params.camera, stream); + ftl::cuda::isosurface_point_image(scene_->getHashData(), depth1_, colour1_, params, stream); + ftl::cuda::splat_points(depth1_, colour1_, depth2_, colour2_, params, stream); + + // TODO: Second pass + + src->writeFrames(colour2_, depth2_, stream); +} + +void Splatter::setOutputDevice(int device) { + device_ = device; +} diff --git a/applications/reconstruct/src/splat_render.cu b/applications/reconstruct/src/splat_render.cu new file mode 100644 index 0000000000000000000000000000000000000000..4ef0b233c807e7caa96271da63c365b22165c2c9 --- /dev/null +++ b/applications/reconstruct/src/splat_render.cu @@ -0,0 +1,345 @@ +#include "splat_render_cuda.hpp" +#include <cuda_runtime.h> + +#include <ftl/cuda_matrix_util.hpp> + +#include "splat_params.hpp" + +#define T_PER_BLOCK 8 +#define NUM_GROUPS_X 1024 + +#define NUM_CUDA_BLOCKS 10000 + +using ftl::cuda::TextureObject; +using ftl::render::SplatParams; + +__global__ void clearDepthKernel(ftl::voxhash::HashData hashData, TextureObject<uint> depth, TextureObject<uchar4> colour) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < depth.width() && y < depth.height()) { + depth(x,y) = 0x7f800000; //PINF; + colour(x,y) = make_uchar4(76,76,82,0); + } +} + +#define SDF_BLOCK_SIZE_PAD 8 +#define SDF_BLOCK_BUFFER 512 // > 8x8x8 +#define SDF_DX 1 +#define SDF_DY SDF_BLOCK_SIZE_PAD +#define SDF_DZ (SDF_BLOCK_SIZE_PAD*SDF_BLOCK_SIZE_PAD) + +#define LOCKED 0x7FFFFFFF + +//! computes the (local) virtual voxel pos of an index; idx in [0;511] +__device__ +int3 pdelinVoxelIndex(uint idx) { + int x = idx % SDF_BLOCK_SIZE_PAD; + int y = (idx % (SDF_BLOCK_SIZE_PAD * SDF_BLOCK_SIZE_PAD)) / SDF_BLOCK_SIZE_PAD; + int z = idx / (SDF_BLOCK_SIZE_PAD * SDF_BLOCK_SIZE_PAD); + return make_int3(x,y,z); +} + +//! computes the linearized index of a local virtual voxel pos; pos in [0;7]^3 +__device__ +uint plinVoxelPos(const int3& virtualVoxelPos) { + return + virtualVoxelPos.z * SDF_BLOCK_SIZE_PAD * SDF_BLOCK_SIZE_PAD + + virtualVoxelPos.y * SDF_BLOCK_SIZE_PAD + + virtualVoxelPos.x; +} + +//! computes the linearized index of a local virtual voxel pos; pos in [0;7]^3 +__device__ +uint plinVoxelPos(int x, int y, int z) { + return + z * SDF_BLOCK_SIZE_PAD * SDF_BLOCK_SIZE_PAD + + y * SDF_BLOCK_SIZE_PAD + x; +} + +__device__ +void deleteVoxel(ftl::voxhash::Voxel& v) { + v.color = make_uchar3(0,0,0); + v.weight = 0; + v.sdf = PINF; +} + +__device__ inline int3 blockDelinear(const int3 &base, uint i) { + return make_int3(base.x + (i & 0x1), base.y + (i & 0x2), base.z + (i & 0x4)); +} + +__device__ inline uint blockLinear(int x, int y, int z) { + return x + (y << 1) + (z << 2); +} + +__global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, TextureObject<uint> depth, TextureObject<uchar4> colour, SplatParams params) { + // TODO:(Nick) Reduce bank conflicts by aligning these + __shared__ ftl::voxhash::Voxel voxels[SDF_BLOCK_BUFFER]; + __shared__ ftl::voxhash::HashEntry block; + + // Stride over all allocated blocks + for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) { + __syncthreads(); + + const uint i = threadIdx.x; //inside of an SDF block + + if (i == 0) block = hashData.d_hashCompactified[bi]; + + // Make sure all hash entries are cached + __syncthreads(); + + const int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(block.pos); + const int3 vp = make_int3(hashData.delinearizeVoxelIndex(i)); + const int3 pi = pi_base + vp; + const uint j = plinVoxelPos(vp); // Padded linear index + const float3 worldPos = hashData.virtualVoxelPosToWorld(pi); + + // Load distances and colours into shared memory + padding + const ftl::voxhash::Voxel &v = hashData.d_SDFBlocks[block.ptr + i]; + voxels[j] = v; + + __syncthreads(); + + if (voxels[j].weight == 0) continue; + + + int edgeX = (vp.x == 0 ) ? 1 : 0; + int edgeY = (vp.y == 0 ) ? 1 : 0; + int edgeZ = (vp.z == 0 ) ? 1 : 0; + + bool is_surface = ((params.m_flags & kShowBlockBorders) && edgeX + edgeY + edgeZ >= 2); + if (is_surface) voxels[j].color = make_uchar3(255,(vp.x == 0 && vp.y == 0 && vp.z == 0) ? 255 : 0,0); + + if (!is_surface && voxels[j].sdf <= 0.0f) continue; + + //if (vp.z == 7) voxels[j].color = make_uchar3(0,255,(voxels[j].sdf < 0.0f) ? 255 : 0); + + // Identify surfaces through sign change. Since we only check in one direction + // it is fine to check for any sign change? + + +#pragma unroll + for (int u=0; u<=1; u++) { + for (int v=0; v<=1; v++) { + for (int w=0; w<=1; w++) { + const int3 uvi = make_int3(vp.x+u,vp.y+v,vp.z+w); + + // Skip these cases since we didn't load voxels properly + if (uvi.x == 8 || uvi.z == 8 || uvi.y == 8) continue; + + const auto &vox = voxels[plinVoxelPos(uvi)]; + if (vox.weight > 0 && vox.sdf <= 0.0f) { + is_surface = true; + // Should break but is slower? + } + } + } + } + + // Only for surface voxels, work out screen coordinates + if (!is_surface) continue; + + const float3 camPos = params.m_viewMatrix * worldPos; + const float2 screenPosf = params.camera.cameraToKinectScreenFloat(camPos); + const uint2 screenPos = make_uint2(make_int2(screenPosf)); // + make_float2(0.5f, 0.5f) + + //printf("Worldpos: %f,%f,%f\n", camPos.x, camPos.y, camPos.z); + + if (camPos.z < params.camera.m_sensorDepthWorldMin) continue; + + // For this voxel in hash, get its screen position and check it is on screen + // Convert depth map to int by x1000 and use atomicMin + //const int pixsize = static_cast<int>((c_hashParams.m_virtualVoxelSize*params.camera.fx/(camPos.z*0.8f)))+1; // Magic number increase voxel to ensure coverage + + const unsigned int x = screenPos.x; + const unsigned int y = screenPos.y; + const int idepth = static_cast<int>(camPos.z * 1000.0f); + + // See: Gunther et al. 2013. A GPGPU-based Pipeline for Accelerated Rendering of Point Clouds + if (x < depth.width() && y < depth.height()) { + // TODO:(Nick) Would warp and shared mem considerations improve things? + // Probably not given we now have thin surfaces... + + // FIXME: Needs a lock here to ensure correct colour is written. + if (atomicMin(&depth(x,y), idepth) > idepth) { + colour(x,y) = make_uchar4(voxels[j].color.x, voxels[j].color.y, voxels[j].color.z, 255); + } + + /*bool p = false; + while (!p) { + int ld = atomicExch(&depth(x,y), LOCKED); + if (ld != LOCKED) { + p = true; + if (ld > idepth) { + colour(x,y) = make_uchar4(voxels[j].color.x, voxels[j].color.y, voxels[j].color.z, 255); + depth(x,y) = idepth; + } else { + depth(x,y) = ld; + } + } + }*/ + } + + } // Stride +} + +void ftl::cuda::isosurface_point_image(const ftl::voxhash::HashData& hashData, + const TextureObject<uint> &depth, + const TextureObject<uchar4> &colour, + const SplatParams ¶ms, cudaStream_t stream) { + + const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK); + + clearDepthKernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(hashData, depth, colour); + + //cudaSafeCall( cudaDeviceSynchronize() ); + + const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; + const dim3 gridSize(NUM_CUDA_BLOCKS, 1); + const dim3 blockSize(threadsPerBlock, 1); + + isosurface_image_kernel<<<gridSize, blockSize, 0, stream>>>(hashData, depth, colour, params); + + cudaSafeCall( cudaGetLastError() ); + //cudaSafeCall( cudaDeviceSynchronize() ); +} + +// ---- Pass 2: Expand the point splats ---------------------------------------- + +#define SPLAT_RADIUS 7 +#define SPLAT_BOUNDS (2*SPLAT_RADIUS+T_PER_BLOCK+1) +#define SPLAT_BUFFER_SIZE (SPLAT_BOUNDS*SPLAT_BOUNDS) +#define MAX_VALID 8 + +__device__ float distance2(float3 a, float3 b) { + const float x = a.x-b.x; + const float y = a.y-b.y; + const float z = a.z-b.z; + return x*x+y*y+z*z; +} + +__global__ void splatting_kernel( + TextureObject<uint> depth_in, + TextureObject<uchar4> colour_in, + TextureObject<float> depth_out, + TextureObject<uchar4> colour_out, SplatParams params) { + // Read an NxN region and + // - interpolate a depth value for this pixel + // - interpolate an rgb value for this pixel + // Must respect depth discontinuities. + // How much influence a given neighbour has depends on its depth value + + __shared__ float3 positions[SPLAT_BUFFER_SIZE]; + + const int i = threadIdx.y*blockDim.y + threadIdx.x; + const int bx = blockIdx.x*blockDim.x; + const int by = blockIdx.y*blockDim.y; + const int x = bx + threadIdx.x; + const int y = by + threadIdx.y; + + const float camMinDepth = params.camera.m_sensorDepthWorldMin; + const float camMaxDepth = params.camera.m_sensorDepthWorldMax; + + for (int j=i; j<SPLAT_BUFFER_SIZE; j+=T_PER_BLOCK) { + const unsigned int sx = (j % SPLAT_BOUNDS)+bx-SPLAT_RADIUS; + const unsigned int sy = (j / SPLAT_BOUNDS)+by-SPLAT_RADIUS; + if (sx >= depth_in.width() || sy >= depth_in.height()) { + positions[j] = make_float3(1000.0f,1000.0f, 1000.0f); + } else { + positions[j] = params.camera.kinectDepthToSkeleton(sx, sy, (float)depth_in.tex2D((int)sx,(int)sy) / 1000.0f); + } + } + + __syncthreads(); + + if (x >= depth_in.width() && y >= depth_in.height()) return; + + const float voxelSquared = params.voxelSize*params.voxelSize; + float mindepth = 1000.0f; + int minidx = -1; + float3 minpos; + + //float3 validPos[MAX_VALID]; + int validIndices[MAX_VALID]; + int validix = 0; + + for (int v=-SPLAT_RADIUS; v<=SPLAT_RADIUS; ++v) { + for (int u=-SPLAT_RADIUS; u<=SPLAT_RADIUS; ++u) { + //const int idx = (threadIdx.y+v)*SPLAT_BOUNDS+threadIdx.x+u; + const int idx = (threadIdx.y+v+SPLAT_RADIUS)*SPLAT_BOUNDS+threadIdx.x+u+SPLAT_RADIUS; + + float3 posp = positions[idx]; + const float d = posp.z; + //if (d < camMinDepth || d > camMaxDepth) continue; + + float3 pos = params.camera.kinectDepthToSkeleton(x, y, d); + float dist = distance2(pos, posp); + + if (dist < voxelSquared) { + // Valid so check for minimum + //validPos[validix] = pos; + validIndices[validix++] = idx; + if (d < mindepth) { + mindepth = d; + minidx = idx; + minpos = pos; + } + } + } + } + + if (minidx == -1) { + depth_out(x,y) = 0.0f; + colour_out(x,y) = make_uchar4(76,76,82,255); + return; + } + + float3 colour = make_float3(0.0f, 0.0f, 0.0f); + float depth = 0.0f; + float contrib = 0.0f; + float3 pos = params.camera.kinectDepthToSkeleton(x, y, mindepth); // TODO:(Nick) Mindepth assumption is poor choice. + + for (int j=0; j<validix; ++j) { + const int idx = validIndices[j]; + float3 posp = positions[idx]; + //float3 pos = params.camera.kinectDepthToSkeleton(x, y, posp.z); + float3 delta = (posp - pos) / 2*params.voxelSize; + float dist = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; + + // It contributes to pixel + if (dist < 1.0f && fabs(posp.z - mindepth) < 2*params.voxelSize) { + const unsigned int sx = (idx % SPLAT_BOUNDS)+bx-SPLAT_RADIUS; + const unsigned int sy = (idx / SPLAT_BOUNDS)+by-SPLAT_RADIUS; + + // Fast and simple trilinear interpolation + float c = fabs((1.0f - delta.x) * (1.0f - delta.y) * (1.0f - delta.z)); + uchar4 col = colour_in.tex2D((int)sx, (int)sy); + colour.x += col.x*c; + colour.y += col.y*c; + colour.z += col.z*c; + contrib += c; + depth += posp.z * c; + } + } + + // Normalise + colour.x /= contrib; + colour.y /= contrib; + colour.z /= contrib; + depth /= contrib; + + depth_out(x,y) = depth; + colour_out(x,y) = make_uchar4(colour.x, colour.y, colour.z, 255); +} + +void ftl::cuda::splat_points(const TextureObject<uint> &depth_in, const TextureObject<uchar4> &colour_in, + const TextureObject<float> &depth_out, const TextureObject<uchar4> &colour_out, const SplatParams ¶ms, cudaStream_t stream) +{ + + const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + splatting_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, colour_out, params); + cudaSafeCall( cudaGetLastError() ); +} diff --git a/applications/reconstruct/src/splat_render.hpp b/applications/reconstruct/src/splat_render.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1e03801349fe0049b5eab1000a1566541383e35f --- /dev/null +++ b/applications/reconstruct/src/splat_render.hpp @@ -0,0 +1,46 @@ +#ifndef _FTL_RECONSTRUCTION_SPLAT_HPP_ +#define _FTL_RECONSTRUCTION_SPLAT_HPP_ + +#include <ftl/configurable.hpp> +#include <ftl/rgbd/source.hpp> +#include <ftl/depth_camera.hpp> +#include <ftl/voxel_scene.hpp> +#include <ftl/ray_cast_util.hpp> +#include <ftl/cuda_common.hpp> + +#include "splat_params.hpp" + +namespace ftl { +namespace render { + +/** + * Render the voxel hash structure by generating image points for surface + * voxels and expanding those into interpolated splats. This is a two pass + * algorithm with the option of completing the second pass on a separate GPU. + * It also possible to only complete the first pass and perform the second step + * on a separate machine or at a later time, the advantage being to save local + * processing resources and that the first pass result may compress better. + */ +class Splatter { + public: + explicit Splatter(ftl::voxhash::SceneRep *scene); + ~Splatter(); + + void render(ftl::rgbd::Source *src, cudaStream_t stream=0); + + void setOutputDevice(int); + + private: + int device_; + ftl::cuda::TextureObject<uint> depth1_; + ftl::cuda::TextureObject<uchar4> colour1_; + ftl::cuda::TextureObject<float> depth2_; + ftl::cuda::TextureObject<uchar4> colour2_; + SplatParams params_; + ftl::voxhash::SceneRep *scene_; +}; + +} +} + +#endif // _FTL_RECONSTRUCTION_SPLAT_HPP_ diff --git a/applications/reconstruct/src/splat_render_cuda.hpp b/applications/reconstruct/src/splat_render_cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c4bd724add53d72c478cd169c323edcc20ad3d9e --- /dev/null +++ b/applications/reconstruct/src/splat_render_cuda.hpp @@ -0,0 +1,38 @@ +#ifndef _FTL_RECONSTRUCTION_SPLAT_CUDA_HPP_ +#define _FTL_RECONSTRUCTION_SPLAT_CUDA_HPP_ + +#include <ftl/depth_camera.hpp> +#include <ftl/voxel_hash.hpp> +#include <ftl/ray_cast_util.hpp> + +#include "splat_params.hpp" + +namespace ftl { +namespace cuda { + +/** + * NOTE: Not strictly isosurface currently since it includes the internals + * of objects up to at most truncation depth. + */ +void isosurface_point_image(const ftl::voxhash::HashData& hashData, + const ftl::cuda::TextureObject<uint> &depth, + const ftl::cuda::TextureObject<uchar4> &colour, + const ftl::render::SplatParams ¶ms, cudaStream_t stream); + +//void isosurface_point_image_stereo(const ftl::voxhash::HashData& hashData, +// const ftl::voxhash::HashParams& hashParams, +// const RayCastData &rayCastData, const RayCastParams ¶ms, +// cudaStream_t stream); + +// TODO: isosurface_point_cloud + +void splat_points(const ftl::cuda::TextureObject<uint> &depth_in, + const ftl::cuda::TextureObject<uchar4> &colour_in, + const ftl::cuda::TextureObject<float> &depth_out, + const ftl::cuda::TextureObject<uchar4> &colour_out, + const ftl::render::SplatParams ¶ms, cudaStream_t stream); + +} +} + +#endif // _FTL_RECONSTRUCTION_SPLAT_CUDA_HPP_ diff --git a/applications/reconstruct/src/voxel_hash.cpp b/applications/reconstruct/src/voxel_hash.cpp index f5e6b94b6ddb79c67cdf56723d596b09e372911a..d939556cfe364190597e3adc41295bed83ac41cf 100644 --- a/applications/reconstruct/src/voxel_hash.cpp +++ b/applications/reconstruct/src/voxel_hash.cpp @@ -107,3 +107,9 @@ HashData HashData::upload() const { return hashData; } + +size_t HashData::getAllocatedBlocks() const { + unsigned int count; + cudaSafeCall(cudaMemcpy(d_heapCounter, &count, sizeof(unsigned int), cudaMemcpyDeviceToHost)); + return count; +} diff --git a/applications/reconstruct/src/voxel_scene.cpp b/applications/reconstruct/src/voxel_scene.cpp index 14351d4eaf19c77fa03e946aa665275260c125f1..e514e2640e6aa19f89da4b69b75154b88f7cccc8 100644 --- a/applications/reconstruct/src/voxel_scene.cpp +++ b/applications/reconstruct/src/voxel_scene.cpp @@ -85,8 +85,8 @@ bool SceneRep::_initCUDA() { cuda_device_ = (desired_device < cuda_device_count) ? desired_device : cuda_device_count-1; cudaSafeCall(cudaSetDevice(cuda_device_)); - // TODO(Nick) Check memory is sufficient - // TODO(Nick) Find out what our compute capability should be. + // TODO:(Nick) Check memory is sufficient + // TODO:(Nick) Find out what our compute capability should be. return true; } @@ -110,6 +110,7 @@ int SceneRep::upload() { if (!cam.source->isReady()) { cam.params.m_imageWidth = 0; // TODO(Nick) : Free gpu allocs if was ready before + LOG(INFO) << "Source not ready: " << cam.source->getURI(); continue; } else { auto in = cam.source; diff --git a/components/common/cpp/include/ftl/configurable.hpp b/components/common/cpp/include/ftl/configurable.hpp index 03197ce1e52b485e9579aedd2fb6631bae986a22..5bae67b4a9972d5f69d0947871d5be259df36be3 100644 --- a/components/common/cpp/include/ftl/configurable.hpp +++ b/components/common/cpp/include/ftl/configurable.hpp @@ -99,7 +99,7 @@ class Configurable { void patchPtr(nlohmann::json &newcfg) { config_ = &newcfg; } protected: - nlohmann::json *config_; // FIXME(Nick) Should be a reference but this can be invalidated... + nlohmann::json *config_; private: std::map<std::string, std::list<std::function<void(const config::Event&)>>> observers_; @@ -130,8 +130,8 @@ std::optional<T> ftl::Configurable::get(const std::string &name) { return {}; } } else if ((*config_)["$ref"].is_string()) { - // TODO(Nick) Add # if missing - // TODO(Nick) Cache result of ref loopkup + // FIXME:(Nick) Add # if missing + // TODO:(Nick) Cache result of ref loopkup std::string res_uri = (*config_)["$ref"].get<std::string>()+"/"+name; auto &r = ftl::config::resolve(res_uri); diff --git a/components/common/cpp/include/ftl/cuda_common.hpp b/components/common/cpp/include/ftl/cuda_common.hpp index 1df4b1b2fa89bb495f4e0108740774e8753a2afa..5a2d1de0823acea508747527fd839280d01fc710 100644 --- a/components/common/cpp/include/ftl/cuda_common.hpp +++ b/components/common/cpp/include/ftl/cuda_common.hpp @@ -27,23 +27,29 @@ class HisteresisTexture { template <typename T> class TextureObject { public: - __host__ __device__ TextureObject() : texobj_(0), ptr_(nullptr) {}; + __host__ __device__ TextureObject() + : texobj_(0), pitch_(0), pitch2_(0), width_(0), height_(0), ptr_(nullptr), needsfree_(false) {}; TextureObject(const cv::cuda::PtrStepSz<T> &d); TextureObject(T *ptr, int pitch, int width, int height); TextureObject(size_t width, size_t height); - TextureObject(const TextureObject &t); + TextureObject(const TextureObject<T> &t); + __host__ __device__ TextureObject(TextureObject<T> &&); ~TextureObject(); + + __host__ TextureObject<T> &operator=(const TextureObject<T> &); + __host__ __device__ TextureObject<T> &operator=(TextureObject<T> &&); size_t pitch() const { return pitch_; } - T *devicePtr() { return ptr_; }; - __host__ __device__ T *devicePtr(int v) { return &ptr_[v*pitch2_]; } + size_t pixelPitch() const { return pitch2_; } + T *devicePtr() const { return ptr_; }; + __host__ __device__ T *devicePtr(int v) const { return &ptr_[v*pitch2_]; } __host__ __device__ int width() const { return width_; } __host__ __device__ int height() const { return height_; } __host__ __device__ cudaTextureObject_t cudaTexture() const { return texobj_; } #ifdef __CUDACC__ - __device__ inline T tex2D(int u, int v) { return ::tex2D<T>(texobj_, u, v); } - __device__ inline T tex2D(float u, float v) { return ::tex2D<T>(texobj_, u, v); } + __device__ inline T tex2D(int u, int v) const { return ::tex2D<T>(texobj_, u, v); } + __device__ inline T tex2D(float u, float v) const { return ::tex2D<T>(texobj_, u, v); } #endif __host__ __device__ inline const T &operator()(int u, int v) const { return ptr_[u+v*pitch2_]; } @@ -131,22 +137,25 @@ TextureObject<T>::TextureObject(T *ptr, int pitch, int width, int height) { template <typename T> TextureObject<T>::TextureObject(size_t width, size_t height) { cudaMallocPitch((void**)&ptr_,&pitch_,width*sizeof(T),height); + cudaTextureObject_t tex = 0; - cudaResourceDesc resDesc; - memset(&resDesc, 0, sizeof(resDesc)); - resDesc.resType = cudaResourceTypePitch2D; - resDesc.res.pitch2D.devPtr = ptr_; - resDesc.res.pitch2D.pitchInBytes = pitch_; - resDesc.res.pitch2D.desc = cudaCreateChannelDesc<T>(); - resDesc.res.pitch2D.width = width; - resDesc.res.pitch2D.height = height; - - cudaTextureDesc texDesc; - memset(&texDesc, 0, sizeof(texDesc)); - texDesc.readMode = cudaReadModeElementType; + // Must be an even + //if (!(sizeof(T) & 0x1)) { + cudaResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = cudaResourceTypePitch2D; + resDesc.res.pitch2D.devPtr = ptr_; + resDesc.res.pitch2D.pitchInBytes = pitch_; + resDesc.res.pitch2D.desc = cudaCreateChannelDesc<T>(); + resDesc.res.pitch2D.width = width; + resDesc.res.pitch2D.height = height; + + cudaTextureDesc texDesc; + memset(&texDesc, 0, sizeof(texDesc)); + texDesc.readMode = cudaReadModeElementType; + cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL); + //} - cudaTextureObject_t tex = 0; - cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL); texobj_ = tex; width_ = (int)width; height_ = (int)height; @@ -156,7 +165,44 @@ TextureObject<T>::TextureObject(size_t width, size_t height) { } template <typename T> -TextureObject<T>::TextureObject(const TextureObject &p) { +TextureObject<T>::TextureObject(const TextureObject<T> &p) { + texobj_ = p.texobj_; + ptr_ = p.ptr_; + width_ = p.width_; + height_ = p.height_; + pitch_ = p.pitch_; + pitch2_ = pitch_ / sizeof(T); + needsfree_ = false; +} + +template <typename T> +TextureObject<T>::TextureObject(TextureObject<T> &&p) { + texobj_ = p.texobj_; + ptr_ = p.ptr_; + width_ = p.width_; + height_ = p.height_; + pitch_ = p.pitch_; + pitch2_ = pitch_ / sizeof(T); + needsfree_ = p.needsfree_; + p.texobj_ = 0; + p.needsfree_ = false; + p.ptr_ = nullptr; +} + +template <typename T> +TextureObject<T> &TextureObject<T>::operator=(const TextureObject<T> &p) { + texobj_ = p.texobj_; + ptr_ = p.ptr_; + width_ = p.width_; + height_ = p.height_; + pitch_ = p.pitch_; + pitch2_ = pitch_ / sizeof(T); + needsfree_ = false; + return *this; +} + +template <typename T> +TextureObject<T> &TextureObject<T>::operator=(TextureObject<T> &&p) { texobj_ = p.texobj_; ptr_ = p.ptr_; width_ = p.width_; @@ -164,7 +210,10 @@ TextureObject<T>::TextureObject(const TextureObject &p) { pitch_ = p.pitch_; pitch2_ = pitch_ / sizeof(T); needsfree_ = p.needsfree_; - //needsdestroy_ = false; + p.texobj_ = 0; + p.needsfree_ = false; + p.ptr_ = nullptr; + return *this; } template <typename T> diff --git a/components/common/cpp/src/configuration.cpp b/components/common/cpp/src/configuration.cpp index 20c07b978f7b2db92387659267518e2e42478796..79caa6d418b433b624597be801bdcac0b6ee5d41 100644 --- a/components/common/cpp/src/configuration.cpp +++ b/components/common/cpp/src/configuration.cpp @@ -95,7 +95,7 @@ bool ftl::is_video(const string &file) { bool ftl::create_directory(const std::string &path) { #ifdef WIN32 - // TODO(nick) + // TODO:(nick) return false; #else if (!is_directory(path)) { @@ -180,7 +180,7 @@ static void _indexConfig(json_t &cfg) { _indexConfig(cfg[i.key()]); } } - } // TODO(Nick) Arrays.... + } // TODO:(Nick) Arrays.... } ftl::Configurable *ftl::config::find(const std::string &uri) { @@ -205,7 +205,7 @@ void ftl::config::registerConfigurable(ftl::Configurable *cfg) { } auto ix = config_instance.find(*uri); if (ix != config_instance.end()) { - // FIXME HACK NOTE TODO SHOULD BE FATAL + // FIXME: HACK NOTE TODO SHOULD BE FATAL LOG(ERROR) << "Attempting to create a duplicate object: " << *uri; } else { config_instance[*uri] = cfg; diff --git a/components/net/cpp/src/peer.cpp b/components/net/cpp/src/peer.cpp index c874510963afca5b8f9a7e182bcf4b7719c2b09a..8e279090d1a439f10dd63de2c750df73e56b7ed3 100644 --- a/components/net/cpp/src/peer.cpp +++ b/components/net/cpp/src/peer.cpp @@ -61,7 +61,7 @@ ftl::UUID ftl::net::this_peer; //static ctpl::thread_pool pool(5); -// TODO(nick) Move to tcp_internal.cpp +// TODO:(nick) Move to tcp_internal.cpp static SOCKET tcpConnect(URI &uri) { int rc; //sockaddr_in destAddr; @@ -109,13 +109,13 @@ static SOCKET tcpConnect(URI &uri) { fcntl(csocket, F_SETFL, arg); #endif - // TODO(Nick) - Check all returned addresses. + // TODO:(Nick) - Check all returned addresses. auto addr = addrs; rc = ::connect(csocket, addr->ai_addr, (socklen_t)addr->ai_addrlen); if (rc < 0) { if (errno == EINPROGRESS) { - // TODO(Nick) Move to main select thread to prevent blocking + // FIXME:(Nick) Move to main select thread to prevent blocking fd_set myset; struct timeval tv; tv.tv_sec = 1; @@ -306,14 +306,14 @@ bool Peer::reconnect() { } } - // TODO(Nick) allow for other protocols in reconnect + // TODO:(Nick) allow for other protocols in reconnect return false; } void Peer::_updateURI() { sockaddr_storage addr; - // TODO(Nick) Get actual protocol... + // FIXME:(Nick) Get actual protocol... scheme_ = ftl::URI::SCHEME_TCP; int rsize = sizeof(sockaddr_storage); @@ -333,8 +333,6 @@ void Peer::_updateURI() { port = s->sin6_port; } - // TODO verify tcp or udp etc. - uri_ = std::string("tcp://")+addrbuf; uri_ += ":"; uri_ += std::to_string(port); @@ -394,10 +392,6 @@ void Peer::error(int e) { } void Peer::data() { - // TODO(Nick) Should not enter here twice if recv call has yet to be - // processed. - //if (!is_waiting_) return; - //is_waiting_ = false; UNIQUE_LOCK(recv_mtx_,lk); recv_buf_.reserve_buffer(kMaxMessage); @@ -496,7 +490,7 @@ bool Peer::_data() { } void Peer::_dispatchResponse(uint32_t id, msgpack::object &res) { - // TODO Handle error reporting... + // TODO: Handle error reporting... UNIQUE_LOCK(cb_mtx_,lk); if (callbacks_.count(id) > 0) { DLOG(1) << "Received return RPC value"; @@ -569,7 +563,7 @@ int Peer::_send() { size_t len = 0; const iovec *sendvec = send_buf_.vector(); size_t size = send_buf_.vector_size(); - char buf[20]; // TODO(nick) Should not be a stack buffer. + char buf[20]; // Calculate total size of message for (size_t i=1; i < size; i++) { @@ -592,8 +586,6 @@ int Peer::_send() { } #ifdef WIN32 - // TODO(nick) Use WSASend instead as equivalent to writev - auto send_vec = send_buf_.vector(); auto send_size = send_buf_.vector_size(); vector<WSABUF> wsabuf(send_size); diff --git a/components/net/cpp/src/universe.cpp b/components/net/cpp/src/universe.cpp index 2282af25208d48214ff5c1b4d4d40cf912c6c237..bc4f24fca3fbe53911a18b52850f931cb8e58f29 100644 --- a/components/net/cpp/src/universe.cpp +++ b/components/net/cpp/src/universe.cpp @@ -118,7 +118,7 @@ int Universe::_setDescriptors() { SOCKET n = 0; - // TODO Shared lock for some of the time... + // TODO: Shared lock for some of the time... UNIQUE_LOCK(net_mutex_,lk); //Set file descriptor for the listening sockets. @@ -199,49 +199,6 @@ Peer *Universe::getPeer(const UUID &id) const { else return ix->second; } -/*optional<UUID> Universe::findOwner(const string &res) { - // TODO(nick) cache this information - return findOne<UUID>("__owner__", res); -} - -bool Universe::createResource(const std::string &uri) { - owned_.insert(uri); - subscribers_[uri]; - return true; -} - -// TODO (nick) Add URI version and correctly parse URI query parameters -int Universe::numberOfSubscribers(const std::string &res) const { - auto s = subscribers_.find(res); - if (s != subscribers_.end()) { - return (int)s->second.size(); - } else { - return -1; - } -} - -bool Universe::hasSubscribers(const std::string &res) const { - // FIXME (nick) Need to parse URI and correct query order - return numberOfSubscribers(res) > 0; -} - -bool Universe::hasSubscribers(const ftl::URI &res) const { - return numberOfSubscribers(res.to_string()) > 0; -} - -bool Universe::_subscribe(const std::string &res) { - // Need to find who owns the resource - optional<UUID> pid = findOwner(res); - - if (pid) { - return call<bool>(*pid, "__subscribe__", this_peer, res); - } else { - // No resource found - LOG(WARNING) << "Subscribe to unknown resource: " << res; - return false; - } -}*/ - void Universe::_periodic() { auto i = reconnects_.begin(); while (i != reconnects_.end()) { @@ -344,7 +301,6 @@ void Universe::_run() { } } - // TODO(Nick) Might switch to shared lock here? { SHARED_LOCK(net_mutex_, lk); @@ -368,9 +324,6 @@ void Universe::_run() { } } } - - // TODO(Nick) Don't always need to call this - //_cleanupPeers(); } } diff --git a/components/rgbd-sources/include/ftl/rgbd/source.hpp b/components/rgbd-sources/include/ftl/rgbd/source.hpp index b084258bcd9d5a7365fd83136aec599d9701a406..a1190849e26a3bea763c1e7eaa5930af73dc1052 100644 --- a/components/rgbd-sources/include/ftl/rgbd/source.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/source.hpp @@ -11,6 +11,8 @@ #include <Eigen/Eigen> #include <string> +#include <ftl/cuda_common.hpp> + namespace ftl { namespace net { @@ -68,7 +70,7 @@ class Source : public ftl::Configurable { /** * Is this source valid and ready to grab?. */ - bool isReady() { return (impl_) ? impl_->isReady() : false; } + bool isReady() { return (impl_) ? impl_->isReady() : params_.width != 0; } /** * Change the second channel source. @@ -106,6 +108,16 @@ class Source : public ftl::Configurable { */ void getDepth(cv::Mat &d); + /** + * Write frames into source buffers from an external renderer. Virtual + * sources do not have an internal generator of frames but instead have + * their data provided from an external rendering class. This function only + * works when there is no internal generator. + */ + void writeFrames(const cv::Mat &rgb, const cv::Mat &depth); + void writeFrames(const ftl::cuda::TextureObject<uchar4> &rgb, const ftl::cuda::TextureObject<uint> &depth, cudaStream_t stream); + void writeFrames(const ftl::cuda::TextureObject<uchar4> &rgb, const ftl::cuda::TextureObject<float> &depth, cudaStream_t stream); + /** * Directly upload source RGB and Depth to GPU. */ diff --git a/components/rgbd-sources/src/calibrate.cpp b/components/rgbd-sources/src/calibrate.cpp index ad9b704621a05af1e249f517f51fd2b2f9684e2a..24aa650810be7810c09bc3d5dd7947c9895aa787 100644 --- a/components/rgbd-sources/src/calibrate.cpp +++ b/components/rgbd-sources/src/calibrate.cpp @@ -122,7 +122,7 @@ void Calibrate::updateCalibration(const ftl::rgbd::Camera &p) { Q_.at<double>(2,3) = p.fx; Q_.at<double>(0,3) = p.cx; Q_.at<double>(1,3) = p.cy; - // TODO(Nick) Update camera matrix also... + // FIXME:(Nick) Update camera matrix also... initUndistortRectifyMap(M1_, D1_, R1_, P1_, img_size_, CV_32FC1, map1.first, map2.first); initUndistortRectifyMap(M2_, D2_, R2_, P2_, img_size_, CV_32FC1, map1.second, map2.second); diff --git a/components/rgbd-sources/src/colour.cpp b/components/rgbd-sources/src/colour.cpp index c476ee8e22a569df4e64cd981166b7c43c9fd4ab..7168350b1cac691d374d0cfcc2e7c45309982994 100644 --- a/components/rgbd-sources/src/colour.cpp +++ b/components/rgbd-sources/src/colour.cpp @@ -46,6 +46,7 @@ inline float kelvinFactor(int temp) { return (float)std::get<C>(kelvin_table[index]) / 255.0f; } +// TODO: Implement CUDA version void ftl::rgbd::colourCorrection(cv::Mat &img, float gamma, int temp) { using namespace cv; @@ -53,7 +54,7 @@ void ftl::rgbd::colourCorrection(cv::Mat &img, float gamma, int temp) { Mat lutGreen(1, 256, CV_8U); Mat lutBlue(1, 256, CV_8U); - // TODO(Nick): Cache these lookup tables if used every frame... + // TODO:(Nick) Cache these lookup tables if used every frame... uchar* pr = lutRed.ptr(); uchar* pg = lutGreen.ptr(); uchar* pb = lutBlue.ptr(); diff --git a/components/rgbd-sources/src/disparity.cpp b/components/rgbd-sources/src/disparity.cpp index 9341a37af6b93201c9a2abad9f1d21ef4847b24f..e92c72de765c7dcd69da1aa279de9181e3170087 100644 --- a/components/rgbd-sources/src/disparity.cpp +++ b/components/rgbd-sources/src/disparity.cpp @@ -37,7 +37,7 @@ void Disparity::_register(const std::string &n, (*algorithms__)[n] = f; } -// TODO(Nick) Add remaining algorithms +// TODO:(Nick) Add remaining algorithms /* #include "algorithms/rtcensus.hpp" static ftl::rgbd::detail::Disparity::Register rtcensus("rtcensus", ftl::algorithms::RTCensus::create); diff --git a/components/rgbd-sources/src/net.cpp b/components/rgbd-sources/src/net.cpp index a5cc73e08d0c441fecf9392c9d5d3f0170492783..210a0e6249457604f350917c8cd04b00614d64e6 100644 --- a/components/rgbd-sources/src/net.cpp +++ b/components/rgbd-sources/src/net.cpp @@ -202,10 +202,8 @@ void NetSource::_updateURI() { chunks_dim_ = ftl::rgbd::kChunkDim; chunk_width_ = params_.width / chunks_dim_; chunk_height_ = params_.height / chunks_dim_; - // TODO(Nick) Must lock before changing these below since some thread may still be updating chunk rgb_ = cv::Mat(cv::Size(params_.width, params_.height), CV_8UC3, cv::Scalar(0,0,0)); depth_ = cv::Mat(cv::Size(params_.width, params_.height), CV_32FC1, 0.0f); - //idepth_ = cv::Mat(cv::Size(params_.width, params_.height), CV_16UC1, cv::Scalar(0)); uri_ = *uri; active_ = true; diff --git a/components/rgbd-sources/src/source.cpp b/components/rgbd-sources/src/source.cpp index dbf23f203f987c35a0f0a2bf9b27bdc01d73a4a3..c5ad451de46d4d6bebac2e774bd3e97c42a0bad1 100644 --- a/components/rgbd-sources/src/source.cpp +++ b/components/rgbd-sources/src/source.cpp @@ -57,10 +57,10 @@ cv::Mat Source::cameraMatrix() const { return m; } -void Source::customImplementation(ftl::rgbd::detail::Source *impl) { +/*void Source::customImplementation(ftl::rgbd::detail::Source *impl) { if (impl_) delete impl_; impl_ = impl; -} +}*/ ftl::rgbd::detail::Source *Source::_createImplementation() { auto uristr = get<string>("uri"); @@ -116,7 +116,7 @@ ftl::rgbd::detail::Source *Source::_createFileImpl(const ftl::URI &uri) { } else if (ext == "tar" || ext == "gz") { #ifdef HAVE_LIBARCHIVE ftl::rgbd::SnapshotReader reader(path); - return new ftl::rgbd::detail::SnapshotSource(this, reader, std::to_string(value("index", 0))); // TODO Get ID from config + return new ftl::rgbd::detail::SnapshotSource(this, reader, std::to_string(value("index", 0))); // TODO: Use URI fragment #else LOG(ERROR) << "Cannot read snapshots, libarchive not installed"; return nullptr; @@ -144,6 +144,17 @@ ftl::rgbd::detail::Source *Source::_createDeviceImpl(const ftl::URI &uri) { #else LOG(ERROR) << "You do not have 'librealsense2' installed"; #endif + } else { + params_.width = value("width", 1280); + params_.height = value("height", 720); + params_.fx = value("focal", 700.0f); + params_.fy = params_.fx; + params_.cx = -(double)params_.width / 2.0; + params_.cy = -(double)params_.height / 2.0; + params_.minDepth = value("minDepth", 0.1f); + params_.maxDepth = value("maxDepth", 20.0f); + params_.doffs = 0; + params_.baseline = value("baseline", 0.0f); } return nullptr; } @@ -197,7 +208,7 @@ bool Source::hasCapabilities(capability_t c) { capability_t Source::getCapabilities() const { if (impl_) return impl_->capabilities_; - else return 0; + else return kCapMovable | kCapVideo | kCapStereo; // FIXME: Don't assume these } void Source::reset() { @@ -217,8 +228,41 @@ bool Source::grab() { return false; } +void Source::writeFrames(const cv::Mat &rgb, const cv::Mat &depth) { + if (!impl_) { + UNIQUE_LOCK(mutex_,lk); + rgb.copyTo(rgb_); + depth.copyTo(depth_); + } +} + +void Source::writeFrames(const ftl::cuda::TextureObject<uchar4> &rgb, const ftl::cuda::TextureObject<uint> &depth, cudaStream_t stream) { + if (!impl_) { + UNIQUE_LOCK(mutex_,lk); + rgb_.create(rgb.height(), rgb.width(), CV_8UC4); + cudaSafeCall(cudaMemcpy2DAsync(rgb_.data, rgb_.step, rgb.devicePtr(), rgb.pitch(), rgb_.cols * sizeof(uchar4), rgb_.rows, cudaMemcpyDeviceToHost, stream)); + depth_.create(depth.height(), depth.width(), CV_32SC1); + cudaSafeCall(cudaMemcpy2DAsync(depth_.data, depth_.step, depth.devicePtr(), depth.pitch(), depth_.cols * sizeof(uint), depth_.rows, cudaMemcpyDeviceToHost, stream)); + cudaSafeCall(cudaStreamSynchronize(stream)); // TODO:(Nick) Don't wait here. + + depth_.convertTo(depth_, CV_32F, 1.0f / 1000.0f); + } else { + LOG(ERROR) << "writeFrames cannot be done on this source: " << getURI(); + } +} + +void Source::writeFrames(const ftl::cuda::TextureObject<uchar4> &rgb, const ftl::cuda::TextureObject<float> &depth, cudaStream_t stream) { + if (!impl_) { + UNIQUE_LOCK(mutex_,lk); + rgb_.create(rgb.height(), rgb.width(), CV_8UC4); + cudaSafeCall(cudaMemcpy2DAsync(rgb_.data, rgb_.step, rgb.devicePtr(), rgb.pitch(), rgb_.cols * sizeof(uchar4), rgb_.rows, cudaMemcpyDeviceToHost, stream)); + depth_.create(depth.height(), depth.width(), CV_32FC1); + cudaSafeCall(cudaMemcpy2DAsync(depth_.data, depth_.step, depth.devicePtr(), depth.pitch(), depth_.cols * sizeof(float), depth_.rows, cudaMemcpyDeviceToHost, stream)); + cudaSafeCall(cudaStreamSynchronize(stream)); // TODO:(Nick) Don't wait here. + } +} + bool Source::thumbnail(cv::Mat &t) { - // TODO(Nick) periodic refresh if (impl_) { UNIQUE_LOCK(mutex_,lk); impl_->grab(1, 9); @@ -236,6 +280,6 @@ bool Source::thumbnail(cv::Mat &t) { bool Source::setChannel(ftl::rgbd::channel_t c) { channel_ = c; - // TODO(Nick) Verify channel is supported by this source... + // FIXME:(Nick) Verify channel is supported by this source... return true; } diff --git a/components/rgbd-sources/src/streamer.cpp b/components/rgbd-sources/src/streamer.cpp index 815701cf4facce74dc2ddf26925129ebdf58d071..74738e58755ed1045315519de79077ec800e8b48 100644 --- a/components/rgbd-sources/src/streamer.cpp +++ b/components/rgbd-sources/src/streamer.cpp @@ -189,7 +189,7 @@ void Streamer::stop() { } void Streamer::poll() { - double wait = 1.0f / 25.0f; // TODO(Nick) Should be in config + double wait = 1.0f / 25.0f; // TODO:(Nick) Should be in config auto start = std::chrono::high_resolution_clock::now(); // Create frame jobs at correct FPS interval _schedule(); @@ -202,8 +202,8 @@ void Streamer::poll() { } else { //LOG(INFO) << "Frame rate @ " << (1.0f / elapsed.count()); // Otherwise, wait until next frame should start. - // CHECK(Nick) Is this accurate enough? Almost certainly not - // TODO(Nick) Synchronise by time corrections and use of fixed time points + // FIXME:(Nick) Is this accurate enough? Almost certainly not + // TODO:(Nick) Synchronise by time corrections and use of fixed time points // but this only works if framerate can be achieved. sleep_for(milliseconds((long long)((wait - elapsed.count()) * 1000.0f))); } diff --git a/components/rgbd-sources/test/CMakeLists.txt b/components/rgbd-sources/test/CMakeLists.txt index 538c4d757d7d942a13417ca151f90c6b7473fa7d..96e1441c5da6134eb17070d77e5ce9dff3a355f1 100644 --- a/components/rgbd-sources/test/CMakeLists.txt +++ b/components/rgbd-sources/test/CMakeLists.txt @@ -5,6 +5,6 @@ add_executable(source_unit ) target_include_directories(source_unit PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include") target_link_libraries(source_unit - ftlcommon Eigen3::Eigen) + ftlcommon Eigen3::Eigen ${CUDA_LIBRARIES}) add_test(SourceUnitTest source_unit)