diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt index 376852843a588446365acfe380bf12580f60591c..dcee7afa0147378ffaabd91263e200f8fe284c98 100644 --- a/applications/reconstruct/CMakeLists.txt +++ b/applications/reconstruct/CMakeLists.txt @@ -4,24 +4,19 @@ set(REPSRC src/main.cpp - src/voxel_scene.cpp - src/scene_rep_hash_sdf.cu - src/compactors.cu - src/garbage.cu - src/integrators.cu + #src/voxel_scene.cpp #src/ray_cast_sdf.cu - src/voxel_render.cu src/camera_util.cu - src/voxel_hash.cu - src/voxel_hash.cpp #src/ray_cast_sdf.cpp src/registration.cpp #src/virtual_source.cpp - src/splat_render.cpp - src/dibr.cu - src/mls.cu - src/depth_camera.cu - src/depth_camera.cpp + #src/splat_render.cpp + #src/dibr.cu + #src/mls.cu + #src/depth_camera.cu + #src/depth_camera.cpp + src/ilw.cpp + src/ilw.cu ) add_executable(ftl-reconstruct ${REPSRC}) @@ -38,6 +33,6 @@ set_property(TARGET ftl-reconstruct PROPERTY CUDA_SEPARABLE_COMPILATION ON) endif() #target_include_directories(cv-node PUBLIC ${PROJECT_SOURCE_DIR}/include) -target_link_libraries(ftl-reconstruct ftlcommon ftlrgbd Threads::Threads ${OpenCV_LIBS} ftlctrl ftlnet) +target_link_libraries(ftl-reconstruct ftlcommon ftlrgbd Threads::Threads ${OpenCV_LIBS} ftlctrl ftlnet ftlrender) diff --git a/applications/reconstruct/include/ftl/voxel_hash.hpp b/applications/reconstruct/include/ftl/voxel_hash.hpp deleted file mode 100644 index 98c2eca90530c309b5d2e46848493634aaaa053c..0000000000000000000000000000000000000000 --- a/applications/reconstruct/include/ftl/voxel_hash.hpp +++ /dev/null @@ -1,428 +0,0 @@ -// From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/VoxelUtilHashSDF.h - -#pragma once - -#ifndef sint -typedef signed int sint; -#endif - -#ifndef uint -typedef unsigned int uint; -#endif - -#ifndef slong -typedef signed long slong; -#endif - -#ifndef ulong -typedef unsigned long ulong; -#endif - -#ifndef uchar -typedef unsigned char uchar; -#endif - -#ifndef schar -typedef signed char schar; -#endif - - - - -#include <ftl/cuda_util.hpp> - -#include <ftl/cuda_matrix_util.hpp> -#include <ftl/voxel_hash_params.hpp> - -#include <ftl/depth_camera.hpp> - -#define SDF_BLOCK_SIZE 8 -#define SDF_BLOCK_SIZE_OLAP 8 - -#ifndef MINF -#define MINF __int_as_float(0xff800000) -#endif - -#ifndef PINF -#define PINF __int_as_float(0x7f800000) -#endif - -extern __constant__ ftl::voxhash::HashParams c_hashParams; -extern "C" void updateConstantHashParams(const ftl::voxhash::HashParams& hashParams); - -namespace ftl { -namespace voxhash { - -//status flags for hash entries -static const int LOCK_ENTRY = -1; -static const int FREE_ENTRY = -2147483648; -static const int NO_OFFSET = 0; - -static const uint kFlagSurface = 0x00000001; - -struct __align__(16) HashEntryHead { - union { - short4 posXYZ; // hash position (lower left corner of SDFBlock)) - uint64_t pos; - }; - int offset; // offset for collisions - uint flags; -}; - -struct __align__(16) HashEntry -{ - HashEntryHead head; - uint voxels[16]; // 512 bits, 1 bit per voxel - //uint validity[16]; // Is the voxel valid, 512 bit - - /*__device__ void operator=(const struct HashEntry& e) { - ((long long*)this)[0] = ((const long long*)&e)[0]; - ((long long*)this)[1] = ((const long long*)&e)[1]; - //((int*)this)[4] = ((const int*)&e)[4]; - ((long long*)this)[2] = ((const long long*)&e)[2]; - ((long long*)this)[2] = ((const long long*)&e)[3]; - ((long long*)this)[2] = ((const long long*)&e)[4]; - ((long long*)this)[2] = ((const long long*)&e)[5]; - ((long long*)this)[2] = ((const long long*)&e)[6]; - ((long long*)this)[2] = ((const long long*)&e)[7]; - ((long long*)this)[2] = ((const long long*)&e)[8]; - ((long long*)this)[2] = ((const long long*)&e)[9]; - ((long long*)this)[2] = ((const long long*)&e)[10]; - }*/ -}; - -struct __align__(8) Voxel { - float sdf; //signed distance function - uchar3 color; //color - uchar weight; //accumulated sdf weight - - __device__ void operator=(const struct Voxel& v) { - ((long long*)this)[0] = ((const long long*)&v)[0]; - } - -}; - -/** - * Voxel Hash Table structure and operations. Works on both CPU and GPU with - * host <-> device transfer included. - */ -struct HashData { - - /////////////// - // Host part // - /////////////// - - __device__ __host__ - HashData() { - //d_heap = NULL; - //d_heapCounter = NULL; - d_hash = NULL; - d_hashDecision = NULL; - d_hashDecisionPrefix = NULL; - d_hashCompactified = NULL; - d_hashCompactifiedCounter = NULL; - //d_SDFBlocks = NULL; - d_hashBucketMutex = NULL; - m_bIsOnGPU = false; - } - - /** - * Create all the data structures, either on GPU or system memory. - */ - __host__ void allocate(const HashParams& params, bool dataOnGPU = true); - - __host__ void updateParams(const HashParams& params); - - __host__ void free(); - - /** - * Download entire hash table from GPU to CPU memory. - */ - __host__ HashData download() const; - - /** - * Upload entire hash table from CPU to GPU memory. - */ - __host__ HashData upload() const; - - __host__ size_t getAllocatedBlocks() const; - - __host__ size_t getFreeBlocks() const; - - __host__ size_t getCollisionCount() const; - - - - ///////////////// - // Device part // - ///////////////// -//#define __CUDACC__ -#ifdef __CUDACC__ - - __device__ - const HashParams& params() const { - return c_hashParams; - } - - //! see teschner et al. (but with correct prime values) - __device__ - uint computeHashPos(const int3& virtualVoxelPos) const { - const int p0 = 73856093; - const int p1 = 19349669; - const int p2 = 83492791; - - int res = ((virtualVoxelPos.x * p0) ^ (virtualVoxelPos.y * p1) ^ (virtualVoxelPos.z * p2)) % params().m_hashNumBuckets; - if (res < 0) res += params().m_hashNumBuckets; - return (uint)res; - } - - //merges two voxels (v0 the currently stored voxel, v1 is the input voxel) - __device__ - void combineVoxel(const Voxel &v0, const Voxel& v1, Voxel &out) const { - - //v.color = (10*v0.weight * v0.color + v1.weight * v1.color)/(10*v0.weight + v1.weight); //give the currently observed color more weight - //v.color = (v0.weight * v0.color + v1.weight * v1.color)/(v0.weight + v1.weight); - //out.color = 0.5f * (v0.color + v1.color); //exponential running average - - - float3 c0 = make_float3(v0.color.x, v0.color.y, v0.color.z); - float3 c1 = make_float3(v1.color.x, v1.color.y, v1.color.z); - - //float3 res = (c0.x+c0.y+c0.z == 0) ? c1 : 0.5f*c0 + 0.5f*c1; - //float3 res = (c0+c1)/2; - float3 res = (c0 * (float)v0.weight + c1 * (float)v1.weight) / ((float)v0.weight + (float)v1.weight); - //float3 res = c1; - - out.color.x = (uchar)(res.x+0.5f); out.color.y = (uchar)(res.y+0.5f); out.color.z = (uchar)(res.z+0.5f); - - // Nick: reduces colour flicker but not ideal.. - //out.color = v1.color; - - // Option 3 (Nick): Use colour with minimum SDF since it should be closest to surface. - // Results in stable but pixelated output - //out.color = (v0.weight > 0 && (fabs(v0.sdf) < fabs(v1.sdf))) ? v0.color : v1.color; - - // Option 4 (Nick): Merge colours based upon relative closeness - /*float3 c0 = make_float3(v0.color.x, v0.color.y, v0.color.z); - float3 c1 = make_float3(v1.color.x, v1.color.y, v1.color.z); - float factor = fabs(v0.sdf - v1.sdf) / 0.05f / 2.0f; - if (factor > 0.5f) factor = 0.5f; - float factor0 = (fabs(v0.sdf) < fabs(v1.sdf)) ? 1.0f - factor : factor; - float factor1 = 1.0f - factor0; - out.color.x = (v0.weight > 0) ? (uchar)(c0.x * factor0 + c1.x * factor1) : c1.x; - out.color.y = (v0.weight > 0) ? (uchar)(c0.y * factor0 + c1.y * factor1) : c1.y; - out.color.z = (v0.weight > 0) ? (uchar)(c0.z * factor0 + c1.z * factor1) : c1.z;*/ - - out.sdf = (v0.sdf * (float)v0.weight + v1.sdf * (float)v1.weight) / ((float)v0.weight + (float)v1.weight); - out.weight = min(params().m_integrationWeightMax, (unsigned int)v0.weight + (unsigned int)v1.weight); - } - - - //! returns the truncation of the SDF for a given distance value - __device__ - float getTruncation(float z) const { - return params().m_truncation + params().m_truncScale * z; - } - - - __device__ - float3 worldToVirtualVoxelPosFloat(const float3& pos) const { - return pos / params().m_virtualVoxelSize; - } - - __device__ - int3 worldToVirtualVoxelPos(const float3& pos) const { - //const float3 p = pos*g_VirtualVoxelResolutionScalar; - const float3 p = pos / params().m_virtualVoxelSize; - return make_int3(p+make_float3(sign(p))*0.5f); - } - - __device__ - int3 virtualVoxelPosToSDFBlock(int3 virtualVoxelPos) const { - if (virtualVoxelPos.x < 0) virtualVoxelPos.x -= SDF_BLOCK_SIZE_OLAP-1; - if (virtualVoxelPos.y < 0) virtualVoxelPos.y -= SDF_BLOCK_SIZE_OLAP-1; - if (virtualVoxelPos.z < 0) virtualVoxelPos.z -= SDF_BLOCK_SIZE_OLAP-1; - - return make_int3( - virtualVoxelPos.x/SDF_BLOCK_SIZE_OLAP, - virtualVoxelPos.y/SDF_BLOCK_SIZE_OLAP, - virtualVoxelPos.z/SDF_BLOCK_SIZE_OLAP); - } - - // Computes virtual voxel position of corner sample position - __device__ - int3 SDFBlockToVirtualVoxelPos(const int3& sdfBlock) const { - return sdfBlock*SDF_BLOCK_SIZE_OLAP; - } - - __device__ - float3 virtualVoxelPosToWorld(const int3& pos) const { - return make_float3(pos)*params().m_virtualVoxelSize; - } - - __device__ - float3 SDFBlockToWorld(const int3& sdfBlock) const { - return virtualVoxelPosToWorld(SDFBlockToVirtualVoxelPos(sdfBlock)); - } - - __device__ - int3 worldToSDFBlock(const float3& worldPos) const { - return virtualVoxelPosToSDFBlock(worldToVirtualVoxelPos(worldPos)); - } - - __device__ - bool isInBoundingBox(const HashParams &hashParams, const int3& sdfBlock) { - // NOTE (Nick): Changed, just assume all voxels are potentially in frustrum - //float3 posWorld = virtualVoxelPosToWorld(SDFBlockToVirtualVoxelPos(sdfBlock)) + hashParams.m_virtualVoxelSize * 0.5f * (SDF_BLOCK_SIZE - 1.0f); - //return camera.isInCameraFrustumApprox(hashParams.m_rigidTransformInverse, posWorld); - return !(hashParams.m_flags & ftl::voxhash::kFlagClipping) || sdfBlock.x > hashParams.m_minBounds.x && sdfBlock.x < hashParams.m_maxBounds.x && - sdfBlock.y > hashParams.m_minBounds.y && sdfBlock.y < hashParams.m_maxBounds.y && - sdfBlock.z > hashParams.m_minBounds.z && sdfBlock.z < hashParams.m_maxBounds.z; - } - - //! computes the (local) virtual voxel pos of an index; idx in [0;511] - __device__ - uint3 delinearizeVoxelIndex(uint idx) const { - uint x = idx % SDF_BLOCK_SIZE; - uint y = (idx % (SDF_BLOCK_SIZE * SDF_BLOCK_SIZE)) / SDF_BLOCK_SIZE; - uint z = idx / (SDF_BLOCK_SIZE * SDF_BLOCK_SIZE); - return make_uint3(x,y,z); - } - - //! computes the linearized index of a local virtual voxel pos; pos in [0;7]^3 - __device__ - uint linearizeVoxelPos(const int3& virtualVoxelPos) const { - return - virtualVoxelPos.z * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE + - virtualVoxelPos.y * SDF_BLOCK_SIZE + - virtualVoxelPos.x; - } - - __device__ - int virtualVoxelPosToLocalSDFBlockIndex(const int3& virtualVoxelPos) const { - int3 localVoxelPos = make_int3( - virtualVoxelPos.x % SDF_BLOCK_SIZE, - virtualVoxelPos.y % SDF_BLOCK_SIZE, - virtualVoxelPos.z % SDF_BLOCK_SIZE); - - if (localVoxelPos.x < 0) localVoxelPos.x += SDF_BLOCK_SIZE; - if (localVoxelPos.y < 0) localVoxelPos.y += SDF_BLOCK_SIZE; - if (localVoxelPos.z < 0) localVoxelPos.z += SDF_BLOCK_SIZE; - - return linearizeVoxelPos(localVoxelPos); - } - - __device__ - int worldToLocalSDFBlockIndex(const float3& world) const { - int3 virtualVoxelPos = worldToVirtualVoxelPos(world); - return virtualVoxelPosToLocalSDFBlockIndex(virtualVoxelPos); - } - - - //! returns the hash entry for a given worldPos; if there was no hash entry the returned entry will have a ptr with FREE_ENTRY set - __device__ - int getHashEntry(const float3& worldPos) const { - //int3 blockID = worldToSDFVirtualVoxelPos(worldPos)/SDF_BLOCK_SIZE; //position of sdf block - int3 blockID = worldToSDFBlock(worldPos); - return getHashEntryForSDFBlockPos(blockID); - } - - - __device__ - void deleteHashEntry(uint id) { - deleteHashEntry(d_hash[id]); - } - - __device__ - void deleteHashEntry(HashEntry& hashEntry) { - hashEntry.head.pos = 0; - hashEntry.head.offset = FREE_ENTRY; - for (int i=0; i<16; ++i) hashEntry.voxels[i] = 0; - } - - __device__ - bool voxelExists(const float3& worldPos) const { - int hashEntry = getHashEntry(worldPos); - return (hashEntry != -1); - } - - __device__ - void deleteVoxel(Voxel& v) const { - v.color = make_uchar3(0,0,0); - v.weight = 0; - v.sdf = 0.0f; - } - - - __device__ - bool getVoxel(const float3& worldPos) const { - int hashEntry = getHashEntry(worldPos); - if (hashEntry == -1) { - return false; - } else { - int3 virtualVoxelPos = worldToVirtualVoxelPos(worldPos); - int ix = virtualVoxelPosToLocalSDFBlockIndex(virtualVoxelPos); - return d_hash[hashEntry].voxels[ix/32] & (0x1 << (ix % 32)); - } - } - - __device__ - bool getVoxel(const int3& virtualVoxelPos) const { - int hashEntry = getHashEntryForSDFBlockPos(virtualVoxelPosToSDFBlock(virtualVoxelPos)); - if (hashEntry == -1) { - return false; - } else { - int ix = virtualVoxelPosToLocalSDFBlockIndex(virtualVoxelPos); - return d_hash[hashEntry].voxels[ix >> 5] & (0x1 << (ix & 0x1F)); - } - } - - /*__device__ - void setVoxel(const int3& virtualVoxelPos, bool voxelInput) const { - int hashEntry = getHashEntryForSDFBlockPos(virtualVoxelPosToSDFBlock(virtualVoxelPos)); - if (hashEntry == -1) { - d_SDFBlocks[hashEntry.ptr + virtualVoxelPosToLocalSDFBlockIndex(virtualVoxelPos)] = voxelInput; - int ix = virtualVoxelPosToLocalSDFBlockIndex(virtualVoxelPos); - d_hash[hashEntry].voxels[ix >> 5] |= (0x1 << (ix & 0x1F)); - } - }*/ - - //! returns the hash entry for a given sdf block id; if there was no hash entry the returned entry will have a ptr with FREE_ENTRY set - __device__ - int getHashEntryForSDFBlockPos(const int3& sdfBlock) const; - - //for histogram (no collision traversal) - __device__ - unsigned int getNumHashEntriesPerBucket(unsigned int bucketID); - - //for histogram (collisions traversal only) - __device__ - unsigned int getNumHashLinkedList(unsigned int bucketID); - - - //pos in SDF block coordinates - __device__ - void allocBlock(const int3& pos); - - //!inserts a hash entry without allocating any memory: used by streaming: TODO MATTHIAS check the atomics in this function - __device__ - bool insertHashEntry(HashEntry entry); - - //! deletes a hash entry position for a given sdfBlock index (returns true uppon successful deletion; otherwise returns false) - __device__ - bool deleteHashEntryElement(const int3& sdfBlock); - -#endif //CUDACC - - int* d_hashDecision; // - int* d_hashDecisionPrefix; // - HashEntry* d_hash; //hash that stores pointers to sdf blocks - HashEntry** d_hashCompactified; //same as before except that only valid pointers are there - int* d_hashCompactifiedCounter; //atomic counter to add compactified entries atomically - int* d_hashBucketMutex; //binary flag per hash bucket; used for allocation to atomically lock a bucket - - bool m_bIsOnGPU; //the class be be used on both cpu and gpu -}; - -} // namespace voxhash -} // namespace ftl diff --git a/applications/reconstruct/src/compactors.cu b/applications/reconstruct/src/compactors.cu deleted file mode 100644 index b7cdd5028f0f5ec78de47d8bf6f9099c5448a494..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/compactors.cu +++ /dev/null @@ -1,236 +0,0 @@ -#include "compactors.hpp" - -using ftl::voxhash::HashData; -using ftl::voxhash::HashParams; -using ftl::voxhash::Voxel; -using ftl::voxhash::HashEntry; -using ftl::voxhash::FREE_ENTRY; - -#define COMPACTIFY_HASH_THREADS_PER_BLOCK 256 -//#define COMPACTIFY_HASH_SIMPLE - - -/*__global__ void fillDecisionArrayKernel(HashData hashData, DepthCameraData depthCameraData) -{ - const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; - - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { - hashData.d_hashDecision[idx] = 0; - if (hashData.d_hash[idx].ptr != FREE_ENTRY) { - if (hashData.isSDFBlockInCameraFrustumApprox(hashData.d_hash[idx].pos)) { - hashData.d_hashDecision[idx] = 1; //yes - } - } - } -}*/ - -/*extern "C" void fillDecisionArrayCUDA(HashData& hashData, const HashParams& hashParams, const DepthCameraData& depthCameraData) -{ - const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); - const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); - - fillDecisionArrayKernel<<<gridSize, blockSize>>>(hashData, depthCameraData); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif - -}*/ - -/*__global__ void compactifyHashKernel(HashData hashData) -{ - const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; - if (idx < hashParams.m_hashNumBuckets * HASH_BUCKET_SIZE) { - if (hashData.d_hashDecision[idx] == 1) { - hashData.d_hashCompactified[hashData.d_hashDecisionPrefix[idx]-1] = hashData.d_hash[idx]; - } - } -}*/ - -/*extern "C" void compactifyHashCUDA(HashData& hashData, const HashParams& hashParams) -{ - const dim3 gridSize((HASH_BUCKET_SIZE * hashParams.m_hashNumBuckets + (T_PER_BLOCK*T_PER_BLOCK) - 1)/(T_PER_BLOCK*T_PER_BLOCK), 1); - const dim3 blockSize((T_PER_BLOCK*T_PER_BLOCK), 1); - - compactifyHashKernel<<<gridSize, blockSize>>>(hashData); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -}*/ - -/*__global__ void compactifyVisibleKernel(HashData hashData, HashParams hashParams, DepthCameraParams camera) -{ - //const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; -#ifdef COMPACTIFY_HASH_SIMPLE - if (idx < hashParams.m_hashNumBuckets) { - if (hashData.d_hash[idx].ptr != FREE_ENTRY) { - if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, camera, hashData.d_hash[idx].pos)) - { - int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1); - hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; - } - } - } -#else - __shared__ int localCounter; - if (threadIdx.x == 0) localCounter = 0; - __syncthreads(); - - int addrLocal = -1; - if (idx < hashParams.m_hashNumBuckets) { - if (hashData.d_hash[idx].ptr != FREE_ENTRY) { - if (hashData.isSDFBlockInCameraFrustumApprox(hashParams, camera, hashData.d_hash[idx].pos)) - { - addrLocal = atomicAdd(&localCounter, 1); - } - } - } - - __syncthreads(); - - __shared__ int addrGlobal; - if (threadIdx.x == 0 && localCounter > 0) { - addrGlobal = atomicAdd(hashData.d_hashCompactifiedCounter, localCounter); - } - __syncthreads(); - - if (addrLocal != -1) { - const unsigned int addr = addrGlobal + addrLocal; - hashData.d_hashCompactified[addr] = hashData.d_hash[idx]; - } -#endif -} - -void ftl::cuda::compactifyVisible(HashData& hashData, const HashParams& hashParams, const DepthCameraParams &camera, cudaStream_t stream) { - const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; - const dim3 gridSize((hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); - const dim3 blockSize(threadsPerBlock, 1); - - cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int),stream)); - compactifyVisibleKernel << <gridSize, blockSize, 0, stream >> >(hashData, hashParams, camera); - //unsigned int res = 0; - //cudaSafeCall(cudaMemcpyAsync(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream)); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif - //return res; -}*/ - -__global__ void compactifyAllocatedKernel(HashData hashData) -{ - const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; -#ifdef COMPACTIFY_HASH_SIMPLE - if (idx < hashParams.m_hashNumBuckets) { - if (hashData.d_hash[idx].head.offset != FREE_ENTRY) { - int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1); - hashData.d_hashCompactified[addr] = &hashData.d_hash[idx]; - } - } -#else - __shared__ int localCounter; - if (threadIdx.x == 0) localCounter = 0; - __syncthreads(); - - int addrLocal = -1; - if (idx < hashParams.m_hashNumBuckets) { - if (hashData.d_hash[idx].head.offset != FREE_ENTRY) { - addrLocal = atomicAdd(&localCounter, 1); - } - } - - __syncthreads(); - - __shared__ int addrGlobal; - if (threadIdx.x == 0 && localCounter > 0) { - addrGlobal = atomicAdd(hashData.d_hashCompactifiedCounter, localCounter); - } - __syncthreads(); - - if (addrLocal != -1) { - const unsigned int addr = addrGlobal + addrLocal; - hashData.d_hashCompactified[addr] = &hashData.d_hash[idx]; - } -#endif -} - -void ftl::cuda::compactifyAllocated(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { - const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; - const dim3 gridSize((hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); - const dim3 blockSize(threadsPerBlock, 1); - - cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int), stream)); - compactifyAllocatedKernel << <gridSize, blockSize, 0, stream >> >(hashData); - //unsigned int res = 0; - //cudaSafeCall(cudaMemcpyAsync(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream)); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif - //return res; -} - - -__global__ void compactifyOccupiedKernel(HashData hashData) -{ - const HashParams& hashParams = c_hashParams; - const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; -#ifdef COMPACTIFY_HASH_SIMPLE - if (idx < hashParams.m_hashNumBuckets) { - if (hashData.d_hash[idx].head.offset != FREE_ENTRY && hashData.d_hash[idx].head.flags & ftl::voxhash::kFlagSurface) { - int addr = atomicAdd(hashData.d_hashCompactifiedCounter, 1); - hashData.d_hashCompactified[addr] = &hashData.d_hash[idx]; - } - } -#else - __shared__ int localCounter; - if (threadIdx.x == 0) localCounter = 0; - __syncthreads(); - - int addrLocal = -1; - if (idx < hashParams.m_hashNumBuckets) { - if (hashData.d_hash[idx].head.offset != FREE_ENTRY && (hashData.d_hash[idx].head.flags & ftl::voxhash::kFlagSurface)) { // TODO:(Nick) Check voxels for all 0 or all 1 - addrLocal = atomicAdd(&localCounter, 1); - } - } - - __syncthreads(); - - __shared__ int addrGlobal; - if (threadIdx.x == 0 && localCounter > 0) { - addrGlobal = atomicAdd(hashData.d_hashCompactifiedCounter, localCounter); - } - __syncthreads(); - - if (addrLocal != -1) { - const unsigned int addr = addrGlobal + addrLocal; - hashData.d_hashCompactified[addr] = &hashData.d_hash[idx]; - } -#endif -} - -void ftl::cuda::compactifyOccupied(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { - const unsigned int threadsPerBlock = COMPACTIFY_HASH_THREADS_PER_BLOCK; - const dim3 gridSize((hashParams.m_hashNumBuckets + threadsPerBlock - 1) / threadsPerBlock, 1); - const dim3 blockSize(threadsPerBlock, 1); - - cudaSafeCall(cudaMemsetAsync(hashData.d_hashCompactifiedCounter, 0, sizeof(int), stream)); - compactifyAllocatedKernel << <gridSize, blockSize, 0, stream >> >(hashData); - //unsigned int res = 0; - //cudaSafeCall(cudaMemcpyAsync(&res, hashData.d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream)); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif - //return res; -} diff --git a/applications/reconstruct/src/compactors.hpp b/applications/reconstruct/src/compactors.hpp deleted file mode 100644 index 6c61985eea8448a078b8abe3e821992519c9425f..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/compactors.hpp +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef _FTL_RECONSTRUCT_COMPACTORS_HPP_ -#define _FTL_RECONSTRUCT_COMPACTORS_HPP_ - -#include <ftl/voxel_hash.hpp> - -namespace ftl { -namespace cuda { - -// Compact visible -//void compactifyVisible(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraParams &camera, cudaStream_t); - -// Compact allocated -void compactifyAllocated(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t); - -// Compact visible surfaces -void compactifyOccupied(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t stream); - -} -} - -#endif // _FTL_RECONSTRUCT_COMPACTORS_HPP_ diff --git a/applications/reconstruct/src/garbage.cu b/applications/reconstruct/src/garbage.cu deleted file mode 100644 index b685e9e6b7d94434ff425eff268699a715261522..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/garbage.cu +++ /dev/null @@ -1,135 +0,0 @@ -#include <ftl/voxel_hash.hpp> -#include "garbage.hpp" - -using namespace ftl::voxhash; - -#define T_PER_BLOCK 8 -#define NUM_CUDA_BLOCKS 10000 - -/*__global__ void starveVoxelsKernel(HashData hashData) { - int ptr; - - // Stride over all allocated blocks - for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) { - - ptr = hashData.d_hashCompactified[bi].ptr; - int weight = hashData.d_SDFBlocks[ptr + threadIdx.x].weight; - weight = max(0, weight-2); - hashData.d_SDFBlocks[ptr + threadIdx.x].weight = weight; //CHECK Remove to totally clear previous frame (Nick) - - } -} - -void ftl::cuda::starveVoxels(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { - const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; - const dim3 gridSize(NUM_CUDA_BLOCKS, 1); - const dim3 blockSize(threadsPerBlock, 1); - - //if (hashParams.m_numOccupiedBlocks > 0) { - starveVoxelsKernel << <gridSize, blockSize, 0, stream >> >(hashData); - //} -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -}*/ - -#define ENTRIES_PER_BLOCK 4 - -__global__ void clearVoxelsKernel(HashData hashData) { - const int lane = threadIdx.x % 16; - const int halfWarp = threadIdx.x / 16; - - // Stride over all allocated blocks - for (int bi=blockIdx.x+halfWarp; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS*ENTRIES_PER_BLOCK) { - - HashEntry *entry = hashData.d_hashCompactified[bi]; - //hashData.d_SDFBlocks[entry.ptr + threadIdx.x].weight = 0; - entry->voxels[lane] = 0; - - } -} - -void ftl::cuda::clearVoxels(HashData& hashData, const HashParams& hashParams) { - const unsigned int threadsPerBlock = 16 * ENTRIES_PER_BLOCK; - const dim3 gridSize(NUM_CUDA_BLOCKS, 1); - const dim3 blockSize(threadsPerBlock, 1); - - clearVoxelsKernel << <gridSize, blockSize >> >(hashData); -} - - -__global__ void garbageCollectIdentifyKernel(HashData hashData) { - const int lane = threadIdx.x % 16; - const int halfWarp = threadIdx.x / 16; - - // Stride over all allocated blocks - for (int bi=blockIdx.x+halfWarp; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS * ENTRIES_PER_BLOCK) { - - const HashEntry *entry = hashData.d_hashCompactified[bi]; - - const uint v = entry->voxels[lane]; - const uint mask = (halfWarp & 0x1) ? 0xFFFF0000 : 0x0000FFFF; - uint ballot_result = __ballot_sync(mask, v == 0 || v == 0xFFFFFFFF); - - if (lane == 0) hashData.d_hashDecision[bi] = (ballot_result == mask) ? 1 : 0; - - } -} - -void ftl::cuda::garbageCollectIdentify(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { - - const unsigned int threadsPerBlock = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE / 2; - const dim3 gridSize(NUM_CUDA_BLOCKS, 1); - const dim3 blockSize(threadsPerBlock, 1); - - //if (hashParams.m_numOccupiedBlocks > 0) { - garbageCollectIdentifyKernel << <gridSize, blockSize, 0, stream >> >(hashData); - //} -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -} - - -__global__ void garbageCollectFreeKernel(HashData hashData) { - - // Stride over all allocated blocks - for (int bi=blockIdx.x*blockDim.x + threadIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS*blockDim.x) { - - HashEntry *entry = hashData.d_hashCompactified[bi]; - - if ((entry->head.flags & ftl::voxhash::kFlagSurface) == 0) { //decision to delete the hash entry - - - //if (entry->head.offset == FREE_ENTRY) return; //should never happen since we did compactify before - - int3 posI3 = make_int3(entry->head.posXYZ.x, entry->head.posXYZ.y, entry->head.posXYZ.z); - - if (hashData.deleteHashEntryElement(posI3)) { //delete hash entry from hash (and performs heap append) - //#pragma unroll - //for (uint i = 0; i < 16; i++) { //clear sdf block: CHECK TODO another kernel? - // entry->voxels[i] = 0; - //} - } - } - - } -} - - -void ftl::cuda::garbageCollectFree(HashData& hashData, const HashParams& hashParams, cudaStream_t stream) { - - const unsigned int threadsPerBlock = T_PER_BLOCK*T_PER_BLOCK; - const dim3 gridSize(NUM_CUDA_BLOCKS, 1); // (hashParams.m_numOccupiedBlocks + threadsPerBlock - 1) / threadsPerBlock - const dim3 blockSize(threadsPerBlock, 1); - - //if (hashParams.m_numOccupiedBlocks > 0) { - garbageCollectFreeKernel << <gridSize, blockSize, 0, stream >> >(hashData); - //} -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); - //cutilCheckMsg(__FUNCTION__); -#endif -} diff --git a/applications/reconstruct/src/garbage.hpp b/applications/reconstruct/src/garbage.hpp deleted file mode 100644 index 5d1d7574d252b40da18008da39f1bf89a7d667fb..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/garbage.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _FTL_RECONSTRUCTION_GARBAGE_HPP_ -#define _FTL_RECONSTRUCTION_GARBAGE_HPP_ - -namespace ftl { -namespace cuda { - -void clearVoxels(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -void starveVoxels(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t stream); -void garbageCollectIdentify(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t stream); -void garbageCollectFree(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t stream); - -} -} - -#endif // _FTL_RECONSTRUCTION_GARBAGE_HPP_ diff --git a/applications/reconstruct/src/ilw.cpp b/applications/reconstruct/src/ilw.cpp new file mode 100644 index 0000000000000000000000000000000000000000..86a4cca5e4f82047ed6591a137b694788da879eb --- /dev/null +++ b/applications/reconstruct/src/ilw.cpp @@ -0,0 +1,120 @@ +#include "ilw.hpp" +#include <ftl/utility/matrix_conversion.hpp> +#include <ftl/rgbd/source.hpp> +#include <ftl/cuda/points.hpp> +#include <loguru.hpp> + +#include "ilw_cuda.hpp" + +using ftl::ILW; +using ftl::detail::ILWData; +using ftl::rgbd::Channel; +using ftl::rgbd::Channels; +using ftl::rgbd::Format; +using cv::cuda::GpuMat; + +ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) { + +} + +ILW::~ILW() { + +} + +bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { + _phase0(fs, stream); + + //for (int i=0; i<2; ++i) { + _phase1(fs, stream); + //for (int j=0; j<3; ++j) { + // _phase2(fs); + //} + + // TODO: Break if no time left + //} + + return true; +} + +bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { + // Make points channel... + for (size_t i=0; i<fs.frames.size(); ++i) { + auto &f = fs.frames[i]; + auto *s = fs.sources[i]; + + if (f.empty(Channel::Depth + Channel::Colour)) { + LOG(ERROR) << "Missing required channel"; + continue; + } + + auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size())); + auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse()); + ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, stream); + + // TODO: Create energy vector texture and clear it + // Create energy and clear it + + // Convert colour from BGR to BGRA if needed + if (f.get<GpuMat>(Channel::Colour).type() == CV_8UC3) { + // Convert to 4 channel colour + auto &col = f.get<GpuMat>(Channel::Colour); + GpuMat tmp(col.size(), CV_8UC4); + cv::cuda::swap(col, tmp); + cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA); + } + + f.createTexture<float4>(Channel::EnergyVector, Format<float4>(f.get<GpuMat>(Channel::Colour).size())); + f.createTexture<float>(Channel::Energy, Format<float>(f.get<GpuMat>(Channel::Colour).size())); + f.createTexture<uchar4>(Channel::Colour); + } + + return true; +} + +bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { + // Run correspondence kernel to create an energy vector + + // For each camera combination + for (size_t i=0; i<fs.frames.size(); ++i) { + for (size_t j=0; j<fs.frames.size(); ++j) { + if (i == j) continue; + + LOG(INFO) << "Running phase1"; + + auto &f1 = fs.frames[i]; + auto &f2 = fs.frames[j]; + //auto s1 = fs.frames[i]; + auto s2 = fs.sources[j]; + + auto pose = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse()); + + try { + //Calculate energy vector to best correspondence + ftl::cuda::correspondence_energy_vector( + f1.getTexture<float4>(Channel::Points), + f2.getTexture<float4>(Channel::Points), + f1.getTexture<uchar4>(Channel::Colour), + f2.getTexture<uchar4>(Channel::Colour), + // TODO: Add normals and other things... + f1.getTexture<float4>(Channel::EnergyVector), + f1.getTexture<float>(Channel::Energy), + pose, + s2->parameters(), + stream + ); + } catch (ftl::exception &e) { + LOG(ERROR) << "Exception in correspondence: " << e.what(); + } + + LOG(INFO) << "Correspondences done... " << i; + } + } + + return true; +} + +bool ILW::_phase2(ftl::rgbd::FrameSet &fs) { + // Run energies and motion kernel + + return true; +} diff --git a/applications/reconstruct/src/ilw.cu b/applications/reconstruct/src/ilw.cu new file mode 100644 index 0000000000000000000000000000000000000000..90133a3a57800ee87a91fd50902deea5f701258a --- /dev/null +++ b/applications/reconstruct/src/ilw.cu @@ -0,0 +1,86 @@ +#include "ilw_cuda.hpp" + +using ftl::cuda::TextureObject; +using ftl::rgbd::Camera; + +#define WARP_SIZE 32 +#define T_PER_BLOCK 8 +#define FULL_MASK 0xffffffff + +__device__ inline float warpMax(float e) { + for (int i = WARP_SIZE/2; i > 0; i /= 2) { + const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e = max(e, other); + } + return e; +} + +__global__ void correspondence_energy_vector_kernel( + TextureObject<float4> p1, + TextureObject<float4> p2, + TextureObject<uchar4> c1, + TextureObject<uchar4> c2, + TextureObject<float4> vout, + TextureObject<float> eout, + float4x4 pose2, // Inverse + Camera cam2) { + + // Each warp picks point in p1 + const int tid = (threadIdx.x + threadIdx.y * blockDim.x); + const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float3 world1 = make_float3(p1.tex2D(x, y)); + const float3 camPos2 = pose2 * world1; + const uint2 screen2 = cam2.camToScreen<uint2>(camPos2); + + const int upsample = 8; + + // Project to p2 using cam2 + // Each thread takes a possible correspondence and calculates a weighting + const int lane = tid % WARP_SIZE; + for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { + const float u = (i % upsample) - (upsample / 2); + const float v = (i / upsample) - (upsample / 2); + + const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v)); + + // Determine degree of correspondence + const float confidence = 1.0f / length(world1 - world2); + + printf("conf %f\n", confidence); + const float maxconf = warpMax(confidence); + + // This thread has best confidence value + if (maxconf == confidence) { + vout(x,y) = vout.tex2D(x, y) + make_float4( + (world1.x - world2.x) * maxconf, + (world1.y - world2.y) * maxconf, + (world1.z - world2.z) * maxconf, + maxconf); + eout(x,y) = eout.tex2D(x,y) + length(world1 - world2)*maxconf; + } + } +} + +void ftl::cuda::correspondence_energy_vector( + TextureObject<float4> &p1, + TextureObject<float4> &p2, + TextureObject<uchar4> &c1, + TextureObject<uchar4> &c2, + TextureObject<float4> &vout, + TextureObject<float> &eout, + float4x4 &pose2, + const Camera &cam2, + cudaStream_t stream) { + + const dim3 gridSize((p1.width() + 2 - 1)/2, (p1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK); + + printf("COR SIZE %d,%d\n", p1.width(), p1.height()); + + correspondence_energy_vector_kernel<<<gridSize, blockSize, 0, stream>>>( + p1, p2, c1, c2, vout, eout, pose2, cam2 + ); + cudaSafeCall( cudaGetLastError() ); +} diff --git a/applications/reconstruct/src/ilw.hpp b/applications/reconstruct/src/ilw.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0be45d015e976b540263a2c16cc5605376092a43 --- /dev/null +++ b/applications/reconstruct/src/ilw.hpp @@ -0,0 +1,66 @@ +#ifndef _FTL_RECONSTRUCT_ILW_HPP_ +#define _FTL_RECONSTRUCT_ILW_HPP_ + +#include <ftl/cuda_common.hpp> +#include <ftl/rgbd/frameset.hpp> +#include <ftl/configurable.hpp> +#include <vector> + +namespace ftl { + +namespace detail { +struct ILWData{ + // x,y,z + confidence + ftl::cuda::TextureObject<float4> correspondence; + + ftl::cuda::TextureObject<float4> points; + + // Residual potential energy + ftl::cuda::TextureObject<float> residual; + + // Flow magnitude + ftl::cuda::TextureObject<float> flow; +}; +} + +/** + * For a set of sources, perform Iterative Lattice Warping to correct the + * location of points between the cameras. The algorithm finds possible + * correspondences and warps the original pixel lattice of points in each + * camera towards the correspondences, iterating the process as many times as + * possible. The result is that both local and global adjustment is made to the + * point clouds to improve micro alignment that may have been incorrect due to + * either inaccurate camera pose estimation or noise/errors in the depth maps. + */ +class ILW : public ftl::Configurable { + public: + explicit ILW(nlohmann::json &config); + ~ILW(); + + /** + * Take a frameset and perform the iterative lattice warping. + */ + bool process(ftl::rgbd::FrameSet &fs, cudaStream_t stream=0); + + private: + /* + * Initialise data. + */ + bool _phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream); + + /* + * Find possible correspondences and a confidence value. + */ + bool _phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream); + + /* + * Calculate energies and move the points. + */ + bool _phase2(ftl::rgbd::FrameSet &fs); + + std::vector<detail::ILWData> data_; +}; + +} + +#endif // _FTL_RECONSTRUCT_ILW_HPP_ diff --git a/applications/reconstruct/src/ilw_cuda.hpp b/applications/reconstruct/src/ilw_cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a01af75149409fe033ba39ffb0170489ee926be9 --- /dev/null +++ b/applications/reconstruct/src/ilw_cuda.hpp @@ -0,0 +1,26 @@ +#ifndef _FTL_ILW_CUDA_HPP_ +#define _FTL_ILW_CUDA_HPP_ + +#include <ftl/cuda_common.hpp> +#include <ftl/rgbd/camera.hpp> +#include <ftl/cuda_matrix_util.hpp> + +namespace ftl { +namespace cuda { + +void correspondence_energy_vector( + ftl::cuda::TextureObject<float4> &p1, + ftl::cuda::TextureObject<float4> &p2, + ftl::cuda::TextureObject<uchar4> &c1, + ftl::cuda::TextureObject<uchar4> &c2, + ftl::cuda::TextureObject<float4> &vout, + ftl::cuda::TextureObject<float> &eout, + float4x4 &pose2, + const ftl::rgbd::Camera &cam2, + cudaStream_t stream +); + +} +} + +#endif // _FTL_ILW_CUDA_HPP_ diff --git a/applications/reconstruct/src/integrators.cu b/applications/reconstruct/src/integrators.cu deleted file mode 100644 index e97d55e3c8e81e397f4128b9c3e6a5093f429917..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/integrators.cu +++ /dev/null @@ -1,342 +0,0 @@ -#include "integrators.hpp" -//#include <ftl/ray_cast_params.hpp> -//#include <vector_types.h> -//#include <cuda_runtime.h> -#include <ftl/cuda_matrix_util.hpp> -#include <ftl/cuda_util.hpp> -#include <ftl/cuda_common.hpp> - -#define T_PER_BLOCK 8 -#define NUM_CUDA_BLOCKS 10000 -#define WARP_SIZE 32 - -using ftl::voxhash::HashData; -using ftl::voxhash::HashParams; -using ftl::voxhash::Voxel; -using ftl::voxhash::HashEntry; -using ftl::voxhash::HashEntryHead; -using ftl::voxhash::FREE_ENTRY; - -extern __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS]; -extern __constant__ HashParams c_hashParams; - -__device__ float4 make_float4(uchar4 c) { - return make_float4(static_cast<float>(c.x), static_cast<float>(c.y), static_cast<float>(c.z), static_cast<float>(c.w)); -} - -__device__ float colourDistance(const uchar4 &c1, const uchar3 &c2) { - float x = c1.x-c2.x; - float y = c1.y-c2.y; - float z = c1.z-c2.z; - return x*x + y*y + z*z; -} - -/* - * Kim, K., Chalidabhongse, T. H., Harwood, D., & Davis, L. (2005). - * Real-time foreground-background segmentation using codebook model. - * Real-Time Imaging. https://doi.org/10.1016/j.rti.2004.12.004 - */ -__device__ bool colordiff(const uchar4 &pa, const uchar3 &pb, float epsilon) { - float x_2 = pb.x * pb.x + pb.y * pb.y + pb.z * pb.z; - float v_2 = pa.x * pa.x + pa.y * pa.y + pa.z * pa.z; - float xv_2 = powf(float(pb.x * pa.x + pb.y * pa.y + pb.z * pa.z), 2.0f); - float p_2 = xv_2 / v_2; - return sqrt(x_2 - p_2) < epsilon; -} - -/* - * Guennebaud, G.; Gross, M. Algebraic point set surfaces. ACMTransactions on Graphics Vol. 26, No. 3, Article No. 23, 2007. - * Used in: FusionMLS: Highly dynamic 3D reconstruction with consumer-grade RGB-D cameras - * r = distance between points - * h = smoothing parameter in meters (default 4cm) - */ -__device__ float spatialWeighting(float r) { - const float h = c_hashParams.m_spatialSmoothing; - if (r >= h) return 0.0f; - float rh = r / h; - rh = 1.0f - rh*rh; - return rh*rh*rh*rh; -} - -__device__ float spatialWeighting(float r, float h) { - //const float h = c_hashParams.m_spatialSmoothing; - if (r >= h) return 0.0f; - float rh = r / h; - rh = 1.0f - rh*rh; - return rh*rh*rh*rh; -} - - -__global__ void integrateDepthMapsKernel(HashData hashData, HashParams hashParams, int numcams) { - __shared__ uint all_warp_ballot; - __shared__ uint voxels[16]; - - const uint i = threadIdx.x; //inside of an SDF block - const int3 po = make_int3(hashData.delinearizeVoxelIndex(i)); - - // Stride over all allocated blocks - for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) { - - //TODO check if we should load this in shared memory - //HashEntryHead entry = hashData.d_hashCompactified[bi]->head; - - int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(make_int3(hashData.d_hashCompactified[bi]->head.posXYZ)); - - //uint idx = entry.offset + i; - int3 pi = pi_base + po; - float3 pfb = hashData.virtualVoxelPosToWorld(pi); - int count = 0; - //float camdepths[MAX_CAMERAS]; - - Voxel oldVoxel; // = hashData.d_SDFBlocks[idx]; - hashData.deleteVoxel(oldVoxel); - - for (uint cam=0; cam<numcams; ++cam) { - const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - - float3 pf = camera.poseInverse * pfb; - uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf)); - - // For this voxel in hash, get its screen position and check it is on screen - if (screenPos.x < camera.params.m_imageWidth && screenPos.y < camera.params.m_imageHeight) { //on screen - - //float depth = g_InputDepth[screenPos]; - float depth = tex2D<float>(camera.depth, screenPos.x, screenPos.y); - //if (depth > 20.0f) return; - - //uchar4 color = make_uchar4(0, 0, 0, 0); - //if (cameraData.d_colorData) { - //color = (cam == 0) ? make_uchar4(255,0,0,255) : make_uchar4(0,0,255,255); - //color = tex2D<uchar4>(camera.colour, screenPos.x, screenPos.y); - //color = bilinearFilterColor(cameraData.cameraToKinectScreenFloat(pf)); - //} - - //printf("screen pos %d\n", color.x); - //return; - - // TODO:(Nick) Accumulate weighted positions - // TODO:(Nick) Accumulate weighted normals - // TODO:(Nick) Accumulate weights - - // Depth is within accepted max distance from camera - if (depth > 0.01f && depth < hashParams.m_maxIntegrationDistance) { // valid depth and color (Nick: removed colour check) - //camdepths[count] = depth; - ++count; - - // Calculate SDF of this voxel wrt the depth map value - float sdf = depth - pf.z; - float truncation = hashData.getTruncation(depth); - float depthZeroOne = camera.params.cameraToKinectProjZ(depth); - - // Is this voxel close enough to cam for depth map value - // CHECK Nick: If is too close then free space violation so remove? - if (sdf > -truncation) // && depthZeroOne >= 0.0f && depthZeroOne <= 1.0f) //check if in truncation range should already be made in depth map computation - { - float weightUpdate = max(hashParams.m_integrationWeightSample * 1.5f * (1.0f-depthZeroOne), 1.0f); - - Voxel curr; //construct current voxel - curr.sdf = sdf; - curr.weight = weightUpdate; - //curr.color = make_uchar3(color.x, color.y, color.z); - - - //if (entry.flags != cameraParams.flags & 0xFF) { - // entry.flags = cameraParams.flags & 0xFF; - //hashData.d_SDFBlocks[idx].color = make_uchar3(0,0,0); - //} - - Voxel newVoxel; - //if (color.x == MINF) hashData.combineVoxelDepthOnly(hashData.d_SDFBlocks[idx], curr, newVoxel); - //else hashData.combineVoxel(hashData.d_SDFBlocks[idx], curr, newVoxel); - hashData.combineVoxel(oldVoxel, curr, newVoxel); - - oldVoxel = newVoxel; - - //Voxel prev = getVoxel(g_SDFBlocksSDFUAV, g_SDFBlocksRGBWUAV, idx); - //Voxel newVoxel = combineVoxel(curr, prev); - //setVoxel(g_SDFBlocksSDFUAV, g_SDFBlocksRGBWUAV, idx, newVoxel); - } - } else { - // Depth is invalid so what to do here? - // TODO(Nick) Use past voxel if available (set weight from 0 to 1) - - // Naive: need to know if this is a foreground voxel - //bool coldist = colordiff(color, hashData.d_SDFBlocks[idx].color, 5.0f); - //if (!coldist) ++count; - - } - } - } - - // Calculate voxel sign values across a warp - int warpNum = i / WARP_SIZE; - //uint ballot_result = __ballot_sync(0xFFFFFFFF, (oldVoxel.sdf >= 0.0f) ? 0 : 1); - uint ballot_result = __ballot_sync(0xFFFFFFFF, (fabs(oldVoxel.sdf) <= hashParams.m_virtualVoxelSize && oldVoxel.weight > 0) ? 1 : 0); - - // Aggregate each warp result into voxel mask - if (i % WARP_SIZE == 0) { - voxels[warpNum] = ballot_result; - } - - __syncthreads(); - - // Work out if block is occupied or not and save voxel masks - // TODO:(Nick) Is it faster to do this in a separate garbage kernel? - if (i < 16) { - const uint v = voxels[i]; - hashData.d_hashCompactified[bi]->voxels[i] = v; - const uint mask = 0x0000FFFF; - uint b1 = __ballot_sync(mask, v == 0xFFFFFFFF); - uint b2 = __ballot_sync(mask, v == 0); - if (i == 0) { - if (b1 != mask && b2 != mask) hashData.d_hashCompactified[bi]->head.flags |= ftl::voxhash::kFlagSurface; - else hashData.d_hashCompactified[bi]->head.flags &= ~ftl::voxhash::kFlagSurface; - } - } - - } -} - -#define WINDOW_RADIUS 1 -#define PATCH_SIZE 32 - -__global__ void integrateMLSKernel(HashData hashData, HashParams hashParams, int numcams) { - __shared__ uint voxels[16]; - - const uint i = threadIdx.x; //inside of an SDF block - const int3 po = make_int3(hashData.delinearizeVoxelIndex(i)); - const int warpNum = i / WARP_SIZE; - const int lane = i % WARP_SIZE; - - // Stride over all allocated blocks - for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) { - - //TODO check if we should load this in shared memory - //HashEntryHead entry = hashData.d_hashCompactified[bi]->head; - - const int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(make_int3(hashData.d_hashCompactified[bi]->head.posXYZ)); - - //uint idx = entry.offset + i; - const int3 pi = pi_base + po; - const float3 pfb = hashData.virtualVoxelPosToWorld(pi); - //int count = 0; - //float camdepths[MAX_CAMERAS]; - - //Voxel oldVoxel; // = hashData.d_SDFBlocks[idx]; - //hashData.deleteVoxel(oldVoxel); - - //float3 awpos = make_float3(0.0f); - //float3 awnorm = make_float3(0.0f); - //float aweights = 0.0f; - float sdf = 0.0f; - float weights = 0.0f; - - // Preload depth values - // 1. Find min and max screen positions - // 2. Subtract/Add WINDOW_RADIUS to min/max - // ... check that the buffer is not too small to cover this - // ... if buffer not big enough then don't buffer at all. - // 3. Populate shared mem depth map buffer using all threads - // 4. Adjust window lookups to use shared mem buffer - - //uint cam=0; - for (uint cam=0; cam<numcams; ++cam) { - const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - const uint height = camera.params.m_imageHeight; - const uint width = camera.params.m_imageWidth; - - const float3 pf = camera.poseInverse * pfb; - const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf)); - - //float3 wpos = make_float3(0.0f); - float3 wnorm = make_float3(0.0f); - - - #pragma unroll - for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) { - for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) { - if (screenPos.x+u < width && screenPos.y+v < height) { //on screen - float4 depth = tex2D<float4>(camera.points, screenPos.x+u, screenPos.y+v); - if (depth.z == MINF) continue; - - //float4 normal = tex2D<float4>(camera.normal, screenPos.x+u, screenPos.y+v); - const float3 camPos = camera.poseInverse * make_float3(depth); //camera.pose * camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth); - const float weight = spatialWeighting(length(pf - camPos)); - - //wpos += weight*worldPos; - sdf += weight*(camPos.z - pf.z); - //sdf += camPos.z - pf.z; - //wnorm += weight*make_float3(normal); - //weights += 1.0f; - weights += weight; - } - } - } - - //awpos += wpos; - //aweights += weights; - } - - //awpos /= aweights; - //wnorm /= weights; - - sdf /= weights; - - //float sdf = (aweights == 0.0f) ? MINF : length(pfb - awpos); - //float sdf = wnorm.x * (pfb.x - wpos.x) + wnorm.y * (pfb.y - wpos.y) + wnorm.z * (pfb.z - wpos.z); - - //printf("WEIGHTS: %f\n", weights); - - //if (weights < 0.00001f) sdf = 0.0f; - - // Calculate voxel sign values across a warp - int warpNum = i / WARP_SIZE; - - //uint solid_ballot = __ballot_sync(0xFFFFFFFF, (fabs(sdf) < hashParams.m_virtualVoxelSize && aweights >= 0.5f) ? 1 : 0); - //uint solid_ballot = __ballot_sync(0xFFFFFFFF, (fabs(sdf) <= hashParams.m_virtualVoxelSize) ? 1 : 0); - //uint solid_ballot = __ballot_sync(0xFFFFFFFF, (aweights >= 0.0f) ? 1 : 0); - uint solid_ballot = __ballot_sync(0xFFFFFFFF, (sdf < 0.0f ) ? 1 : 0); - - // Aggregate each warp result into voxel mask - if (i % WARP_SIZE == 0) { - voxels[warpNum] = solid_ballot; - //valid[warpNum] = valid_ballot; - } - - __syncthreads(); - - // Work out if block is occupied or not and save voxel masks - // TODO:(Nick) Is it faster to do this in a separate garbage kernel? - if (i < 16) { - const uint v = voxels[i]; - hashData.d_hashCompactified[bi]->voxels[i] = v; - //hashData.d_hashCompactified[bi]->validity[i] = valid[i]; - const uint mask = 0x0000FFFF; - uint b1 = __ballot_sync(mask, v == 0xFFFFFFFF); - uint b2 = __ballot_sync(mask, v == 0); - if (i == 0) { - if (b1 != mask && b2 != mask) hashData.d_hashCompactified[bi]->head.flags |= ftl::voxhash::kFlagSurface; - else hashData.d_hashCompactified[bi]->head.flags &= ~ftl::voxhash::kFlagSurface; - } - } - - } -} - - - -void ftl::cuda::integrateDepthMaps(HashData& hashData, const HashParams& hashParams, int numcams, cudaStream_t stream) { -const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; -const dim3 gridSize(NUM_CUDA_BLOCKS, 1); -const dim3 blockSize(threadsPerBlock, 1); - -//if (hashParams.m_numOccupiedBlocks > 0) { //this guard is important if there is no depth in the current frame (i.e., no blocks were allocated) - integrateMLSKernel << <gridSize, blockSize, 0, stream >> >(hashData, hashParams, numcams); -//} - -//cudaSafeCall( cudaGetLastError() ); -#ifdef _DEBUG -cudaSafeCall(cudaDeviceSynchronize()); -//cutilCheckMsg(__FUNCTION__); -#endif -} diff --git a/applications/reconstruct/src/integrators.hpp b/applications/reconstruct/src/integrators.hpp deleted file mode 100644 index 789551dd1fa7347bf02c518c8c5a73f6ae4269b4..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/integrators.hpp +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _FTL_RECONSTRUCTION_INTEGRATORS_HPP_ -#define _FTL_RECONSTRUCTION_INTEGRATORS_HPP_ - -#include <ftl/voxel_hash.hpp> -#include <ftl/depth_camera.hpp> - -namespace ftl { -namespace cuda { - -/*void integrateDepthMap(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, - const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t stream); - -void integrateRegistration(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, - const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, cudaStream_t stream); -*/ - -void integrateDepthMaps(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, int numcams, cudaStream_t stream); - -} -} - -#endif // _FTL_RECONSTRUCTION_INTEGRATORS_HPP_ diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp index f381d9de555d7e534703e02b809f2a7568394b47..107ed5978cb65b28f19e4a1e6fa2cb346846b64a 100644 --- a/applications/reconstruct/src/main.cpp +++ b/applications/reconstruct/src/main.cpp @@ -9,14 +9,15 @@ #include <ftl/config.h> #include <ftl/configuration.hpp> #include <ftl/depth_camera.hpp> -#include <ftl/voxel_scene.hpp> #include <ftl/rgbd.hpp> -#include <ftl/virtual_source.hpp> +#include <ftl/rgbd/virtual.hpp> #include <ftl/rgbd/streamer.hpp> #include <ftl/slave.hpp> #include <ftl/rgbd/group.hpp> +#include <ftl/threads.hpp> -#include "splat_render.hpp" +#include "ilw.hpp" +#include <ftl/render/splat_render.hpp> #include <string> #include <vector> @@ -92,102 +93,77 @@ static void run(ftl::Configurable *root) { } } - ftl::voxhash::SceneRep *scene = ftl::create<ftl::voxhash::SceneRep>(root, "voxelhash"); + ftl::rgbd::FrameSet scene_A; // Output of align process + ftl::rgbd::FrameSet scene_B; // Input of render process + + //ftl::voxhash::SceneRep *scene = ftl::create<ftl::voxhash::SceneRep>(root, "voxelhash"); ftl::rgbd::Streamer *stream = ftl::create<ftl::rgbd::Streamer>(root, "stream", net); - ftl::rgbd::Source *virt = ftl::create<ftl::rgbd::Source>(root, "virtual", net); - ftl::render::Splatter *splat = new ftl::render::Splatter(scene); + ftl::rgbd::VirtualSource *virt = ftl::create<ftl::rgbd::VirtualSource>(root, "virtual"); + ftl::render::Splatter *splat = ftl::create<ftl::render::Splatter>(root, "renderer", &scene_B); ftl::rgbd::Group group; + ftl::ILW *align = ftl::create<ftl::ILW>(root, "merge"); - //auto virtimpl = new ftl::rgbd::VirtualSource(virt); - //virt->customImplementation(virtimpl); - //virtimpl->setScene(scene); + // Generate virtual camera render when requested by streamer + virt->onRender([splat,virt,&scene_B](ftl::rgbd::Frame &out) { + virt->setTimestamp(scene_B.timestamp); + splat->render(virt, out); + }); stream->add(virt); for (size_t i=0; i<sources.size(); i++) { Source *in = sources[i]; in->setChannel(Channel::Depth); - //stream->add(in); - scene->addSource(in); group.addSource(in); } + stream->setLatency(4); // FIXME: This depends on source!? stream->run(); bool busy = false; + group.setLatency(4); group.setName("ReconGroup"); - group.sync([scene,splat,virt,&busy,&slave](ftl::rgbd::FrameSet &fs) -> bool { - cudaSetDevice(scene->getCUDADevice()); + group.sync([splat,virt,&busy,&slave,&scene_A,&scene_B,&align](ftl::rgbd::FrameSet &fs) -> bool { + //cudaSetDevice(scene->getCUDADevice()); + + if (slave.isPaused()) return true; if (busy) { LOG(INFO) << "Group frameset dropped: " << fs.timestamp; return true; } busy = true; - scene->nextFrame(); - // Send all frames to GPU, block until done? - // TODO: Allow non-block and keep frameset locked until later - if (!slave.isPaused()) scene->upload(fs); + // Swap the entire frameset to allow rapid return + fs.swapTo(scene_A); - int64_t ts = fs.timestamp; - - ftl::pool.push([scene,splat,virt,&busy,ts,&slave](int id) { - cudaSetDevice(scene->getCUDADevice()); + ftl::pool.push([&scene_B,&scene_A,&busy,&slave,&align](int id) { + //cudaSetDevice(scene->getCUDADevice()); // TODO: Release frameset here... - cudaSafeCall(cudaStreamSynchronize(scene->getIntegrationStream())); + //cudaSafeCall(cudaStreamSynchronize(scene->getIntegrationStream())); - if (!slave.isPaused()) { - scene->integrate(); - scene->garbage(); - } + UNIQUE_LOCK(scene_A.mtx, lk); - // Don't render here... but update timestamp. - splat->render(ts, virt, scene->getIntegrationStream()); + // Send all frames to GPU, block until done? + scene_A.upload(Channel::Colour + Channel::Depth); // TODO: (Nick) Add scene stream. + //align->process(scene_A); + + // TODO: To use second GPU, could do a download, swap, device change, + // then upload to other device. Or some direct device-2-device copy. + scene_A.swapTo(scene_B); + LOG(INFO) << "Align complete... " << scene_A.timestamp; busy = false; }); return true; }); - - /*int active = sources.size(); - while (ftl::running) { - if (active == 0) { - LOG(INFO) << "Waiting for sources..."; - sleep_for(milliseconds(1000)); - } - - active = 0; - - if (!slave.isPaused()) { - // Mark voxels as cleared - scene->nextFrame(); - - // Grab, upload frames and allocate voxel blocks - active = scene->upload(); - - // Make sure previous virtual camera frame has finished rendering - //stream->wait(); - cudaSafeCall(cudaStreamSynchronize(scene->getIntegrationStream())); - - - // Merge new frames into the voxel structure - scene->integrate(); - - //LOG(INFO) << "Allocated: " << scene->getOccupiedCount(); - - // Remove any redundant voxels - scene->garbage(); - - } else { - active = 1; - } - - splat->render(virt, scene->getIntegrationStream()); - - // Start virtual camera rendering and previous frame compression - stream->poll(); - }*/ + ftl::timer::stop(); + net->shutdown(); + delete align; + delete splat; + delete virt; + delete stream; + delete net; } int main(int argc, char **argv) { diff --git a/applications/reconstruct/src/splat_render.cpp b/applications/reconstruct/src/splat_render.cpp deleted file mode 100644 index 21e1b1db3952422e82be757fec56d3c75de0b4c8..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/splat_render.cpp +++ /dev/null @@ -1,138 +0,0 @@ -#include "splat_render.hpp" -#include "splat_render_cuda.hpp" -#include "compactors.hpp" -#include "depth_camera_cuda.hpp" - -using ftl::render::Splatter; -using ftl::rgbd::Channel; - -Splatter::Splatter(ftl::voxhash::SceneRep *scene) : scene_(scene) { - -} - -Splatter::~Splatter() { - -} - -void Splatter::render(int64_t ts, ftl::rgbd::Source *src, cudaStream_t stream) { - if (!src->isReady()) return; - - const auto &camera = src->parameters(); - - cudaSafeCall(cudaSetDevice(scene_->getCUDADevice())); - - // Create buffers if they don't exists - if ((unsigned int)depth1_.width() != camera.width || (unsigned int)depth1_.height() != camera.height) { - depth1_ = ftl::cuda::TextureObject<int>(camera.width, camera.height); - } - if ((unsigned int)depth3_.width() != camera.width || (unsigned int)depth3_.height() != camera.height) { - depth3_ = ftl::cuda::TextureObject<int>(camera.width, camera.height); - } - if ((unsigned int)colour1_.width() != camera.width || (unsigned int)colour1_.height() != camera.height) { - colour1_ = ftl::cuda::TextureObject<uchar4>(camera.width, camera.height); - } - if ((unsigned int)colour_tmp_.width() != camera.width || (unsigned int)colour_tmp_.height() != camera.height) { - colour_tmp_ = ftl::cuda::TextureObject<float4>(camera.width, camera.height); - } - if ((unsigned int)normal1_.width() != camera.width || (unsigned int)normal1_.height() != camera.height) { - normal1_ = ftl::cuda::TextureObject<float4>(camera.width, camera.height); - } - if ((unsigned int)depth2_.width() != camera.width || (unsigned int)depth2_.height() != camera.height) { - depth2_ = ftl::cuda::TextureObject<float>(camera.width, camera.height); - } - if ((unsigned int)colour2_.width() != camera.width || (unsigned int)colour2_.height() != camera.height) { - colour2_ = ftl::cuda::TextureObject<uchar4>(camera.width, camera.height); - } - - // Parameters object to pass to CUDA describing the camera - SplatParams params; - params.m_flags = 0; - if (src->value("splatting", true) == false) params.m_flags |= ftl::render::kNoSplatting; - if (src->value("upsampling", true) == false) params.m_flags |= ftl::render::kNoUpsampling; - if (src->value("texturing", true) == false) params.m_flags |= ftl::render::kNoTexturing; - - params.m_viewMatrix = MatrixConversion::toCUDA(src->getPose().cast<float>().inverse()); - params.m_viewMatrixInverse = MatrixConversion::toCUDA(src->getPose().cast<float>()); - params.voxelSize = scene_->getHashParams().m_virtualVoxelSize; - params.camera.flags = 0; - params.camera.fx = camera.fx; - params.camera.fy = camera.fy; - params.camera.mx = -camera.cx; - params.camera.my = -camera.cy; - params.camera.m_imageWidth = camera.width; - params.camera.m_imageHeight = camera.height; - params.camera.m_sensorDepthWorldMax = camera.maxDepth; - params.camera.m_sensorDepthWorldMin = camera.minDepth; - - //ftl::cuda::compactifyAllocated(scene_->getHashData(), scene_->getHashParams(), stream); - //LOG(INFO) << "Occupied: " << scene_->getOccupiedCount(); - - if (scene_->value("voxels", false)) { - // TODO:(Nick) Stereo for voxel version - ftl::cuda::isosurface_point_image(scene_->getHashData(), depth1_, params, stream); - //ftl::cuda::splat_points(depth1_, depth2_, params, stream); - //ftl::cuda::dibr(depth2_, colour1_, scene_->cameraCount(), params, stream); - src->writeFrames(ts, colour1_, depth2_, stream); - } else { - ftl::cuda::clear_depth(depth1_, stream); - ftl::cuda::clear_depth(depth3_, stream); - ftl::cuda::clear_depth(depth2_, stream); - ftl::cuda::clear_colour(colour2_, stream); - ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, depth3_, scene_->cameraCount(), params, stream); - - // Step 1: Put all points into virtual view to gather them - //ftl::cuda::dibr_raw(depth1_, scene_->cameraCount(), params, stream); - - // Step 2: For each point, use a warp to do MLS and up sample - //ftl::cuda::mls_render_depth(depth1_, depth3_, params, scene_->cameraCount(), stream); - - if (src->getChannel() == Channel::Depth) { - //ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); - if (src->value("splatting", false)) { - //ftl::cuda::splat_points(depth1_, colour1_, normal1_, depth2_, colour2_, params, stream); - ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); - src->writeFrames(ts, colour1_, depth2_, stream); - } else { - ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); - src->writeFrames(ts, colour1_, depth2_, stream); - } - } else if (src->getChannel() == Channel::Energy) { - //ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); - //if (src->value("splatting", false)) { - //ftl::cuda::splat_points(depth1_, colour1_, normal1_, depth2_, colour2_, params, stream); - //ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); - src->writeFrames(ts, colour1_, depth2_, stream); - //} else { - //ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); - // src->writeFrames(colour1_, depth2_, stream); - //} - } else if (src->getChannel() == Channel::Right) { - // Adjust pose to right eye position - Eigen::Affine3f transform(Eigen::Translation3f(camera.baseline,0.0f,0.0f)); - Eigen::Matrix4f matrix = src->getPose().cast<float>() * transform.matrix(); - params.m_viewMatrix = MatrixConversion::toCUDA(matrix.inverse()); - params.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix); - - ftl::cuda::clear_depth(depth1_, stream); - ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, depth3_, scene_->cameraCount(), params, stream); - src->writeFrames(ts, colour1_, colour2_, stream); - } else { - if (src->value("splatting", false)) { - //ftl::cuda::splat_points(depth1_, colour1_, normal1_, depth2_, colour2_, params, stream); - src->writeFrames(ts, colour1_, depth2_, stream); - } else { - ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); - src->writeFrames(ts, colour1_, depth2_, stream); - } - } - } - - //ftl::cuda::median_filter(depth1_, depth2_, stream); - //ftl::cuda::splat_points(depth1_, depth2_, params, stream); - - // TODO: Second pass -} - -void Splatter::setOutputDevice(int device) { - device_ = device; -} diff --git a/applications/reconstruct/src/voxel_hash.cpp b/applications/reconstruct/src/voxel_hash.cpp deleted file mode 100644 index 6f929c746d66cae5c382bf15b2d25e26f6b46702..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/voxel_hash.cpp +++ /dev/null @@ -1,95 +0,0 @@ -#include <ftl/voxel_hash.hpp> -#include <loguru.hpp> - -using ftl::voxhash::HashData; -using ftl::voxhash::HashParams; - -void HashData::allocate(const HashParams& params, bool dataOnGPU) { - m_bIsOnGPU = dataOnGPU; - if (m_bIsOnGPU) { - cudaSafeCall(cudaMalloc(&d_hash, sizeof(HashEntry)* params.m_hashNumBuckets)); - cudaSafeCall(cudaMalloc(&d_hashDecision, sizeof(int)* params.m_hashNumBuckets)); - cudaSafeCall(cudaMalloc(&d_hashDecisionPrefix, sizeof(int)* params.m_hashNumBuckets)); - cudaSafeCall(cudaMalloc(&d_hashCompactified, sizeof(HashEntry*)* params.m_hashNumBuckets)); - cudaSafeCall(cudaMalloc(&d_hashCompactifiedCounter, sizeof(int))); - cudaSafeCall(cudaMalloc(&d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets)); - } else { - d_hash = new HashEntry[params.m_hashNumBuckets]; - d_hashDecision = new int[params.m_hashNumBuckets]; - d_hashDecisionPrefix = new int[params.m_hashNumBuckets]; - d_hashCompactified = new HashEntry*[params.m_hashNumBuckets]; - d_hashCompactifiedCounter = new int[1]; - d_hashBucketMutex = new int[params.m_hashNumBuckets]; - } - - updateParams(params); -} - -void HashData::updateParams(const HashParams& params) { - if (m_bIsOnGPU) { - updateConstantHashParams(params); - } -} - -void HashData::free() { - if (m_bIsOnGPU) { - cudaSafeCall(cudaFree(d_hash)); - cudaSafeCall(cudaFree(d_hashDecision)); - cudaSafeCall(cudaFree(d_hashDecisionPrefix)); - cudaSafeCall(cudaFree(d_hashCompactified)); - cudaSafeCall(cudaFree(d_hashCompactifiedCounter)); - cudaSafeCall(cudaFree(d_hashBucketMutex)); - } else { - if (d_hash) delete[] d_hash; - if (d_hashDecision) delete[] d_hashDecision; - if (d_hashDecisionPrefix) delete[] d_hashDecisionPrefix; - if (d_hashCompactified) delete[] d_hashCompactified; - if (d_hashCompactifiedCounter) delete[] d_hashCompactifiedCounter; - if (d_hashBucketMutex) delete[] d_hashBucketMutex; - } - - d_hash = NULL; - d_hashDecision = NULL; - d_hashDecisionPrefix = NULL; - d_hashCompactified = NULL; - d_hashCompactifiedCounter = NULL; - d_hashBucketMutex = NULL; -} - -HashData HashData::download() const { - if (!m_bIsOnGPU) return *this; - HashParams params; - - HashData hashData; - hashData.allocate(params, false); //allocate the data on the CPU - cudaSafeCall(cudaMemcpy(hashData.d_hash, d_hash, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_hashDecision, d_hashDecision, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_hashDecisionPrefix, d_hashDecisionPrefix, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_hashCompactified, d_hashCompactified, sizeof(HashEntry*)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_hashCompactifiedCounter, d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost)); - cudaSafeCall(cudaMemcpy(hashData.d_hashBucketMutex, d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets, cudaMemcpyDeviceToHost)); - - return hashData; -} - -HashData HashData::upload() const { - if (m_bIsOnGPU) return *this; - HashParams params; - - HashData hashData; - hashData.allocate(params, false); //allocate the data on the CPU - cudaSafeCall(cudaMemcpy(hashData.d_hash, d_hash, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyHostToDevice)); - cudaSafeCall(cudaMemcpy(hashData.d_hashDecision, d_hashDecision, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyHostToDevice)); - cudaSafeCall(cudaMemcpy(hashData.d_hashDecisionPrefix, d_hashDecisionPrefix, sizeof(int)*params.m_hashNumBuckets, cudaMemcpyHostToDevice)); - cudaSafeCall(cudaMemcpy(hashData.d_hashCompactified, d_hashCompactified, sizeof(HashEntry)* params.m_hashNumBuckets, cudaMemcpyHostToDevice)); - cudaSafeCall(cudaMemcpy(hashData.d_hashCompactifiedCounter, d_hashCompactifiedCounter, sizeof(unsigned int), cudaMemcpyHostToDevice)); - cudaSafeCall(cudaMemcpy(hashData.d_hashBucketMutex, d_hashBucketMutex, sizeof(int)* params.m_hashNumBuckets, cudaMemcpyHostToDevice)); - - return hashData; -} - -/*size_t HashData::getAllocatedBlocks() const { - unsigned int count; - cudaSafeCall(cudaMemcpy(d_heapCounter, &count, sizeof(unsigned int), cudaMemcpyDeviceToHost)); - return count; -}*/ diff --git a/applications/reconstruct/src/voxel_hash.cu b/applications/reconstruct/src/voxel_hash.cu deleted file mode 100644 index c2d07c391a6e48d2b45cc23dbf32b00878ffd5c9..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/voxel_hash.cu +++ /dev/null @@ -1,257 +0,0 @@ -#include <ftl/voxel_hash.hpp> - -using namespace ftl::voxhash; - -#define COLLISION_LIST_SIZE 6 - -__device__ inline uint64_t compactPosition(const int3 &pos) { - union __align__(8) { - short4 posXYZ; - uint64_t pos64; - }; - posXYZ.x = pos.x; posXYZ.y = pos.y; posXYZ.z = pos.z; posXYZ.w = 0; - return pos64; -} - -//! returns the hash entry for a given sdf block id; if there was no hash entry the returned entry will have a ptr with FREE_ENTRY set -__device__ -int HashData::getHashEntryForSDFBlockPos(const int3& sdfBlock) const { - uint h = computeHashPos(sdfBlock); //hash - uint64_t pos = compactPosition(sdfBlock); - - HashEntryHead curr; - - int i = h; - unsigned int maxIter = 0; - - #pragma unroll 2 - while (maxIter < COLLISION_LIST_SIZE) { - curr = d_hash[i].head; - - if (curr.pos == pos && curr.offset != FREE_ENTRY) return i; - if (curr.offset == 0 || curr.offset == FREE_ENTRY) break; - - i += curr.offset; //go to next element in the list - i %= (params().m_hashNumBuckets); //check for overflow - ++maxIter; - } - - // Could not find - return -1; -} - -//for histogram (collisions traversal only) -__device__ -unsigned int HashData::getNumHashLinkedList(unsigned int bucketID) { - unsigned int listLen = 0; - - unsigned int i = bucketID; //start with the last entry of the current bucket - HashEntryHead curr; curr.offset = 0; - - unsigned int maxIter = 0; - - #pragma unroll 2 - while (maxIter < COLLISION_LIST_SIZE) { - curr = d_hash[i].head; - - if (curr.offset == 0 || curr.offset == FREE_ENTRY) break; - - i += curr.offset; //go to next element in the list - i %= (params().m_hashNumBuckets); //check for overflow - ++listLen; - ++maxIter; - } - - return listLen; -} - -//pos in SDF block coordinates -__device__ -void HashData::allocBlock(const int3& pos) { - uint h = computeHashPos(pos); //hash bucket - uint i = h; - HashEntryHead curr; //curr.offset = 0; - const uint64_t pos64 = compactPosition(pos); - - unsigned int maxIter = 0; - #pragma unroll 2 - while (maxIter < COLLISION_LIST_SIZE) { - //offset = curr.offset; - curr = d_hash[i].head; //TODO MATTHIAS do by reference - if (curr.pos == pos64 && curr.offset != FREE_ENTRY) return; - if (curr.offset == 0 || curr.offset == FREE_ENTRY) break; - - i += curr.offset; //go to next element in the list - i %= (params().m_hashNumBuckets); //check for overflow - ++maxIter; - } - - // Limit reached... - //if (maxIter == COLLISION_LIST_SIZE) return; - - int j = i; - while (maxIter < COLLISION_LIST_SIZE) { - //offset = curr.offset; - - if (curr.offset == FREE_ENTRY) { - int prevValue = atomicExch(&d_hashBucketMutex[i], LOCK_ENTRY); - if (prevValue != LOCK_ENTRY) { - if (i == j) { - HashEntryHead& entry = d_hash[j].head; - entry.pos = pos64; - entry.offset = 0; - entry.flags = 0; - } else { - //InterlockedExchange(g_HashBucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket where we have found a free entry - prevValue = atomicExch(&d_hashBucketMutex[j], LOCK_ENTRY); - if (prevValue != LOCK_ENTRY) { //only proceed if the bucket has been locked - HashEntryHead& entry = d_hash[j].head; - entry.pos = pos64; - entry.offset = 0; - entry.flags = 0; // Flag block as valid in this frame (Nick) - //entry.ptr = consumeHeap() * SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; //memory alloc - d_hash[i].head.offset = j-i; - //setHashEntry(g_Hash, idxLastEntryInBucket, lastEntryInBucket); - } - } - } - return; //bucket was already locked - } - - ++j; - j %= (params().m_hashNumBuckets); //check for overflow - curr = d_hash[j].head; //TODO MATTHIAS do by reference - ++maxIter; - } -} - - -//!inserts a hash entry without allocating any memory: used by streaming: TODO MATTHIAS check the atomics in this function -/*__device__ -bool HashData::insertHashEntry(HashEntry entry) -{ - uint h = computeHashPos(entry.pos); - uint hp = h * HASH_BUCKET_SIZE; - - for (uint j = 0; j < HASH_BUCKET_SIZE; j++) { - uint i = j + hp; - //const HashEntry& curr = d_hash[i]; - int prevWeight = 0; - //InterlockedCompareExchange(hash[3*i+2], FREE_ENTRY, LOCK_ENTRY, prevWeight); - prevWeight = atomicCAS(&d_hash[i].ptr, FREE_ENTRY, LOCK_ENTRY); - if (prevWeight == FREE_ENTRY) { - d_hash[i] = entry; - //setHashEntry(hash, i, entry); - return true; - } - } - -#ifdef HANDLE_COLLISIONS - //updated variables as after the loop - const uint idxLastEntryInBucket = (h+1)*HASH_BUCKET_SIZE - 1; //get last index of bucket - - uint i = idxLastEntryInBucket; //start with the last entry of the current bucket - HashEntry curr; - - unsigned int maxIter = 0; - //[allow_uav_condition] - uint g_MaxLoopIterCount = params().m_hashMaxCollisionLinkedListSize; - #pragma unroll 1 - while (maxIter < g_MaxLoopIterCount) { //traverse list until end // why find the end? we you are inserting at the start !!! - //curr = getHashEntry(hash, i); - curr = d_hash[i]; //TODO MATTHIAS do by reference - if (curr.offset == 0) break; //we have found the end of the list - i = idxLastEntryInBucket + curr.offset; //go to next element in the list - i %= (HASH_BUCKET_SIZE * params().m_hashNumBuckets); //check for overflow - - maxIter++; - } - - maxIter = 0; - int offset = 0; - #pragma unroll 1 - while (maxIter < g_MaxLoopIterCount) { //linear search for free entry - offset++; - uint i = (idxLastEntryInBucket + offset) % (HASH_BUCKET_SIZE * params().m_hashNumBuckets); //go to next hash element - if ((offset % HASH_BUCKET_SIZE) == 0) continue; //cannot insert into a last bucket element (would conflict with other linked lists) - - int prevWeight = 0; - //InterlockedCompareExchange(hash[3*i+2], FREE_ENTRY, LOCK_ENTRY, prevWeight); //check for a free entry - uint* d_hashUI = (uint*)d_hash; - prevWeight = prevWeight = atomicCAS(&d_hashUI[3*idxLastEntryInBucket+1], (uint)FREE_ENTRY, (uint)LOCK_ENTRY); - if (prevWeight == FREE_ENTRY) { //if free entry found set prev->next = curr & curr->next = prev->next - //[allow_uav_condition] - //while(hash[3*idxLastEntryInBucket+2] == LOCK_ENTRY); // expects setHashEntry to set the ptr last, required because pos.z is packed into the same value -> prev->next = curr -> might corrput pos.z - - HashEntry lastEntryInBucket = d_hash[idxLastEntryInBucket]; //get prev (= lastEntry in Bucket) - - int newOffsetPrev = (offset << 16) | (lastEntryInBucket.pos.z & 0x0000ffff); //prev->next = curr (maintain old z-pos) - int oldOffsetPrev = 0; - //InterlockedExchange(hash[3*idxLastEntryInBucket+1], newOffsetPrev, oldOffsetPrev); //set prev offset atomically - uint* d_hashUI = (uint*)d_hash; - oldOffsetPrev = prevWeight = atomicExch(&d_hashUI[3*idxLastEntryInBucket+1], newOffsetPrev); - entry.offset = oldOffsetPrev >> 16; //remove prev z-pos from old offset - - //setHashEntry(hash, i, entry); //sets the current hashEntry with: curr->next = prev->next - d_hash[i] = entry; - return true; - } - - maxIter++; - } -#endif - - return false; -}*/ - - - -//! deletes a hash entry position for a given sdfBlock index (returns true uppon successful deletion; otherwise returns false) -__device__ -bool HashData::deleteHashEntryElement(const int3& sdfBlock) { - uint h = computeHashPos(sdfBlock); //hash bucket - const uint64_t pos = compactPosition(sdfBlock); - - int i = h; - int prev = -1; - HashEntryHead curr; - unsigned int maxIter = 0; - - #pragma unroll 2 - while (maxIter < COLLISION_LIST_SIZE) { - curr = d_hash[i].head; - - //found that dude that we need/want to delete - if (curr.pos == pos && curr.offset != FREE_ENTRY) { - //int prevValue = 0; - //InterlockedExchange(bucketMutex[h], LOCK_ENTRY, prevValue); //lock the hash bucket - int prevValue = atomicExch(&d_hashBucketMutex[i], LOCK_ENTRY); - if (prevValue == LOCK_ENTRY) return false; - if (prevValue != LOCK_ENTRY) { - prevValue = (prev >= 0) ? atomicExch(&d_hashBucketMutex[prev], LOCK_ENTRY) : 0; - if (prevValue == LOCK_ENTRY) return false; - if (prevValue != LOCK_ENTRY) { - //const uint linBlockSize = SDF_BLOCK_SIZE * SDF_BLOCK_SIZE * SDF_BLOCK_SIZE; - //appendHeap(curr.ptr / linBlockSize); - deleteHashEntry(i); - - if (prev >= 0) { - d_hash[prev].head.offset = curr.offset; - } - return true; - } - } - } - - if (curr.offset == 0 || curr.offset == FREE_ENTRY) { //we have found the end of the list - return false; //should actually never happen because we need to find that guy before - } - prev = i; - i += curr.offset; //go to next element in the list - i %= (params().m_hashNumBuckets); //check for overflow - - ++maxIter; - } - - return false; -} \ No newline at end of file diff --git a/applications/reconstruct/src/voxel_render.cu b/applications/reconstruct/src/voxel_render.cu deleted file mode 100644 index dbdde22cbcad34e5054748115a0232106b3ae361..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/voxel_render.cu +++ /dev/null @@ -1,261 +0,0 @@ -#include "splat_render_cuda.hpp" -//#include <cuda_runtime.h> - -#include <ftl/cuda_matrix_util.hpp> - -#include "splat_params.hpp" - -#define T_PER_BLOCK 8 -#define NUM_GROUPS_X 1024 - -#define NUM_CUDA_BLOCKS 10000 - -using ftl::cuda::TextureObject; -using ftl::render::SplatParams; - -__global__ void clearDepthKernel(ftl::voxhash::HashData hashData, TextureObject<int> depth) { - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < depth.width() && y < depth.height()) { - depth(x,y) = 0x7f800000; //PINF; - //colour(x,y) = make_uchar4(76,76,82,0); - } -} - -#define SDF_BLOCK_SIZE_PAD 8 -#define SDF_BLOCK_BUFFER 512 // > 8x8x8 -#define SDF_DX 1 -#define SDF_DY SDF_BLOCK_SIZE_PAD -#define SDF_DZ (SDF_BLOCK_SIZE_PAD*SDF_BLOCK_SIZE_PAD) - -#define LOCKED 0x7FFFFFFF - -//! computes the (local) virtual voxel pos of an index; idx in [0;511] -__device__ -int3 pdelinVoxelIndex(uint idx) { - int x = idx % SDF_BLOCK_SIZE_PAD; - int y = (idx % (SDF_BLOCK_SIZE_PAD * SDF_BLOCK_SIZE_PAD)) / SDF_BLOCK_SIZE_PAD; - int z = idx / (SDF_BLOCK_SIZE_PAD * SDF_BLOCK_SIZE_PAD); - return make_int3(x,y,z); -} - -//! computes the linearized index of a local virtual voxel pos; pos in [0;7]^3 -__device__ -uint plinVoxelPos(const int3& virtualVoxelPos) { - return - virtualVoxelPos.z * SDF_BLOCK_SIZE_PAD * SDF_BLOCK_SIZE_PAD + - virtualVoxelPos.y * SDF_BLOCK_SIZE_PAD + - virtualVoxelPos.x; -} - -//! computes the linearized index of a local virtual voxel pos; pos in [0;7]^3 -__device__ -uint plinVoxelPos(int x, int y, int z) { - return - z * SDF_BLOCK_SIZE_PAD * SDF_BLOCK_SIZE_PAD + - y * SDF_BLOCK_SIZE_PAD + x; -} - -__device__ -void deleteVoxel(ftl::voxhash::Voxel& v) { - v.color = make_uchar3(0,0,0); - v.weight = 0; - v.sdf = PINF; -} - -__device__ inline int3 blockDelinear(const int3 &base, uint i) { - return make_int3(base.x + (i & 0x1), base.y + (i & 0x2), base.z + (i & 0x4)); -} - -__device__ inline uint blockLinear(int x, int y, int z) { - return x + (y << 1) + (z << 2); -} - -__device__ inline bool getVoxel(uint *voxels, int ix) { - return voxels[ix/32] & (0x1 << (ix % 32)); -} - -__global__ void occupied_image_kernel(ftl::voxhash::HashData hashData, TextureObject<int> depth, SplatParams params) { - __shared__ uint voxels[16]; - __shared__ ftl::voxhash::HashEntryHead block; - - // Stride over all allocated blocks - for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) { - __syncthreads(); - - const uint i = threadIdx.x; //inside of an SDF block - - if (i == 0) block = hashData.d_hashCompactified[bi]->head; - if (i < 16) { - voxels[i] = hashData.d_hashCompactified[bi]->voxels[i]; - //valid[i] = hashData.d_hashCompactified[bi]->validity[i]; - } - - // Make sure all hash entries are cached - __syncthreads(); - - const int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(make_int3(block.posXYZ)); - const int3 vp = make_int3(hashData.delinearizeVoxelIndex(i)); - const int3 pi = pi_base + vp; - const float3 worldPos = hashData.virtualVoxelPosToWorld(pi); - - const bool v = getVoxel(voxels, i); - - uchar4 color = make_uchar4(255,0,0,255); - bool is_surface = v; //((params.m_flags & ftl::render::kShowBlockBorders) && edgeX + edgeY + edgeZ >= 2); - - - // Only for surface voxels, work out screen coordinates - if (!is_surface) continue; - - // TODO: For each original camera, render a new depth map - - const float3 camPos = params.m_viewMatrix * worldPos; - const float2 screenPosf = params.camera.cameraToKinectScreenFloat(camPos); - const uint2 screenPos = make_uint2(make_int2(screenPosf)); // + make_float2(0.5f, 0.5f) - - //printf("Worldpos: %f,%f,%f\n", camPos.x, camPos.y, camPos.z); - - if (camPos.z < params.camera.m_sensorDepthWorldMin) continue; - - const unsigned int x = screenPos.x; - const unsigned int y = screenPos.y; - const int idepth = static_cast<int>(camPos.z * 1000.0f); - - // See: Gunther et al. 2013. A GPGPU-based Pipeline for Accelerated Rendering of Point Clouds - if (x < depth.width() && y < depth.height()) { - atomicMin(&depth(x,y), idepth); - } - - } // Stride -} - -__global__ void isosurface_image_kernel(ftl::voxhash::HashData hashData, TextureObject<int> depth, SplatParams params) { - // TODO:(Nick) Reduce bank conflicts by aligning these - __shared__ uint voxels[16]; - //__shared__ uint valid[16]; - __shared__ ftl::voxhash::HashEntryHead block; - - // Stride over all allocated blocks - for (int bi=blockIdx.x; bi<*hashData.d_hashCompactifiedCounter; bi+=NUM_CUDA_BLOCKS) { - __syncthreads(); - - const uint i = threadIdx.x; //inside of an SDF block - - if (i == 0) block = hashData.d_hashCompactified[bi]->head; - if (i < 16) { - voxels[i] = hashData.d_hashCompactified[bi]->voxels[i]; - //valid[i] = hashData.d_hashCompactified[bi]->validity[i]; - } - - // Make sure all hash entries are cached - __syncthreads(); - - const int3 pi_base = hashData.SDFBlockToVirtualVoxelPos(make_int3(block.posXYZ)); - const int3 vp = make_int3(hashData.delinearizeVoxelIndex(i)); - const int3 pi = pi_base + vp; - //const uint j = plinVoxelPos(vp); // Padded linear index - const float3 worldPos = hashData.virtualVoxelPosToWorld(pi); - - // Load distances and colours into shared memory + padding - //const ftl::voxhash::Voxel &v = hashData.d_SDFBlocks[block.ptr + i]; - //voxels[j] = v; - const bool v = getVoxel(voxels, i); - - //__syncthreads(); - - //if (voxels[j].weight == 0) continue; - if (vp.x == 7 || vp.y == 7 || vp.z == 7) continue; - - - int edgeX = (vp.x == 0 ) ? 1 : 0; - int edgeY = (vp.y == 0 ) ? 1 : 0; - int edgeZ = (vp.z == 0 ) ? 1 : 0; - - uchar4 color = make_uchar4(255,0,0,255); - bool is_surface = v; //((params.m_flags & ftl::render::kShowBlockBorders) && edgeX + edgeY + edgeZ >= 2); - //if (is_surface) color = make_uchar4(255,(vp.x == 0 && vp.y == 0 && vp.z == 0) ? 255 : 0,0,255); - - if (v) continue; // !getVoxel(valid, i) - - //if (vp.z == 7) voxels[j].color = make_uchar3(0,255,(voxels[j].sdf < 0.0f) ? 255 : 0); - - // Identify surfaces through sign change. Since we only check in one direction - // it is fine to check for any sign change? - - -#pragma unroll - for (int u=0; u<=1; u++) { - for (int v=0; v<=1; v++) { - for (int w=0; w<=1; w++) { - const int3 uvi = make_int3(vp.x+u,vp.y+v,vp.z+w); - - // Skip these cases since we didn't load voxels properly - //if (uvi.x == 8 || uvi.z == 8 || uvi.y == 8) continue; - - const bool vox = getVoxel(voxels, hashData.linearizeVoxelPos(uvi)); - if (vox) { //getVoxel(valid, hashData.linearizeVoxelPos(uvi))) { - is_surface = true; - // Should break but is slower? - } - } - } - } - - // Only for surface voxels, work out screen coordinates - if (!is_surface) continue; - - // TODO: For each original camera, render a new depth map - - const float3 camPos = params.m_viewMatrix * worldPos; - const float2 screenPosf = params.camera.cameraToKinectScreenFloat(camPos); - const uint2 screenPos = make_uint2(make_int2(screenPosf)); // + make_float2(0.5f, 0.5f) - - //printf("Worldpos: %f,%f,%f\n", camPos.x, camPos.y, camPos.z); - - if (camPos.z < params.camera.m_sensorDepthWorldMin) continue; - - // For this voxel in hash, get its screen position and check it is on screen - // Convert depth map to int by x1000 and use atomicMin - //const int pixsize = static_cast<int>((c_hashParams.m_virtualVoxelSize*params.camera.fx/(camPos.z*0.8f)))+1; // Magic number increase voxel to ensure coverage - - const unsigned int x = screenPos.x; - const unsigned int y = screenPos.y; - const int idepth = static_cast<int>(camPos.z * 1000.0f); - - // See: Gunther et al. 2013. A GPGPU-based Pipeline for Accelerated Rendering of Point Clouds - if (x < depth.width() && y < depth.height()) { - atomicMin(&depth(x,y), idepth); - } - - } // Stride -} - -void ftl::cuda::isosurface_point_image(const ftl::voxhash::HashData& hashData, - const TextureObject<int> &depth, - const SplatParams ¶ms, cudaStream_t stream) { - - const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK); - - clearDepthKernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(hashData, depth); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); -#endif - - const unsigned int threadsPerBlock = SDF_BLOCK_SIZE*SDF_BLOCK_SIZE*SDF_BLOCK_SIZE; - const dim3 gridSize(NUM_CUDA_BLOCKS, 1); - const dim3 blockSize(threadsPerBlock, 1); - - occupied_image_kernel<<<gridSize, blockSize, 0, stream>>>(hashData, depth, params); - - cudaSafeCall( cudaGetLastError() ); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); -#endif -} - - diff --git a/applications/reconstruct/src/voxel_scene.cpp b/applications/reconstruct/src/voxel_scene.cpp index 0ab442372d2c94981a5aa959408d449f5b8a703c..09067b1f2334e0190e27022b64d374e31532f8c2 100644 --- a/applications/reconstruct/src/voxel_scene.cpp +++ b/applications/reconstruct/src/voxel_scene.cpp @@ -1,7 +1,4 @@ #include <ftl/voxel_scene.hpp> -#include "compactors.hpp" -#include "garbage.hpp" -#include "integrators.hpp" #include "depth_camera_cuda.hpp" #include <opencv2/core/cuda_stream_accessor.hpp> @@ -17,9 +14,9 @@ using std::vector; #define SAFE_DELETE_ARRAY(a) { delete [] (a); (a) = NULL; } -extern "C" void resetCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); -extern "C" void resetHashBucketMutexCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t); -extern "C" void allocCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, int camid, const DepthCameraParams &depthCameraParams, cudaStream_t); +//extern "C" void resetCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); +//extern "C" void resetHashBucketMutexCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, cudaStream_t); +//extern "C" void allocCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, int camid, const DepthCameraParams &depthCameraParams, cudaStream_t); //extern "C" void fillDecisionArrayCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData); //extern "C" void compactifyHashCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); //extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams); @@ -185,7 +182,7 @@ int SceneRep::upload() { //if (i > 0) cudaSafeCall(cudaStreamSynchronize(cv::cuda::StreamAccessor::getStream(cameras_[i-1].stream))); //allocate all hash blocks which are corresponding to depth map entries - if (value("voxels", false)) _alloc(i, cv::cuda::StreamAccessor::getStream(cam.stream)); + //if (value("voxels", false)) _alloc(i, cv::cuda::StreamAccessor::getStream(cam.stream)); // Calculate normals } @@ -267,7 +264,7 @@ int SceneRep::upload(ftl::rgbd::FrameSet &fs) { //if (i > 0) cudaSafeCall(cudaStreamSynchronize(cv::cuda::StreamAccessor::getStream(cameras_[i-1].stream))); //allocate all hash blocks which are corresponding to depth map entries - if (value("voxels", false)) _alloc(i, cv::cuda::StreamAccessor::getStream(cam.stream)); + //if (value("voxels", false)) _alloc(i, cv::cuda::StreamAccessor::getStream(cam.stream)); // Calculate normals } @@ -302,7 +299,7 @@ void SceneRep::integrate() { void SceneRep::garbage() { //_compactifyAllocated(); - if (value("voxels", false)) _garbageCollect(); + //if (value("voxels", false)) _garbageCollect(); //cudaSafeCall(cudaStreamSynchronize(integ_stream_)); } @@ -422,19 +419,19 @@ void SceneRep::_alloc(int camid, cudaStream_t stream) { } else {*/ //this version is faster, but it doesn't guarantee that all blocks are allocated (staggers alloc to the next frame) - resetHashBucketMutexCUDA(m_hashData, m_hashParams, stream); - allocCUDA(m_hashData, m_hashParams, camid, cameras_[camid].params, stream); + //resetHashBucketMutexCUDA(m_hashData, m_hashParams, stream); + //allocCUDA(m_hashData, m_hashParams, camid, cameras_[camid].params, stream); //} } void SceneRep::_compactifyVisible(const DepthCameraParams &camera) { //const DepthCameraData& depthCameraData) { - ftl::cuda::compactifyOccupied(m_hashData, m_hashParams, integ_stream_); //this version uses atomics over prefix sums, which has a much better performance + //ftl::cuda::compactifyOccupied(m_hashData, m_hashParams, integ_stream_); //this version uses atomics over prefix sums, which has a much better performance //m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU } void SceneRep::_compactifyAllocated() { - ftl::cuda::compactifyAllocated(m_hashData, m_hashParams, integ_stream_); //this version uses atomics over prefix sums, which has a much better performance + //ftl::cuda::compactifyAllocated(m_hashData, m_hashParams, integ_stream_); //this version uses atomics over prefix sums, which has a much better performance //std::cout << "Occ blocks = " << m_hashParams.m_numOccupiedBlocks << std::endl; //m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU } @@ -444,7 +441,7 @@ void SceneRep::_compactifyAllocated() { else ftl::cuda::integrateRegistration(m_hashData, m_hashParams, depthCameraData, depthCameraParams, integ_stream_); }*/ -extern "C" void bilateralFilterFloatMap(float* d_output, float* d_input, float sigmaD, float sigmaR, unsigned int width, unsigned int height); +//extern "C" void bilateralFilterFloatMap(float* d_output, float* d_input, float sigmaD, float sigmaR, unsigned int width, unsigned int height); void SceneRep::_integrateDepthMaps() { //cudaSafeCall(cudaDeviceSynchronize()); @@ -459,11 +456,11 @@ void SceneRep::_integrateDepthMaps() { //ftl::cuda::hole_fill(*(cameras_[i].gpu.depth2_tex_), *(cameras_[i].gpu.depth_tex_), cameras_[i].params, integ_stream_); //bilateralFilterFloatMap(cameras_[i].gpu.depth_tex_->devicePtr(), cameras_[i].gpu.depth3_tex_->devicePtr(), 3, 7, cameras_[i].gpu.depth_tex_->width(), cameras_[i].gpu.depth_tex_->height()); } - if (value("voxels", false)) ftl::cuda::integrateDepthMaps(m_hashData, m_hashParams, cameras_.size(), integ_stream_); + //if (value("voxels", false)) ftl::cuda::integrateDepthMaps(m_hashData, m_hashParams, cameras_.size(), integ_stream_); } void SceneRep::_garbageCollect() { //ftl::cuda::garbageCollectIdentify(m_hashData, m_hashParams, integ_stream_); - resetHashBucketMutexCUDA(m_hashData, m_hashParams, integ_stream_); //needed if linked lists are enabled -> for memeory deletion - ftl::cuda::garbageCollectFree(m_hashData, m_hashParams, integ_stream_); + //resetHashBucketMutexCUDA(m_hashData, m_hashParams, integ_stream_); //needed if linked lists are enabled -> for memeory deletion + //ftl::cuda::garbageCollectFree(m_hashData, m_hashParams, integ_stream_); } diff --git a/components/codecs/src/nvpipe_encoder.cpp b/components/codecs/src/nvpipe_encoder.cpp index e03dc7073349a97bb7e090ce5b9a563b8ea2117a..42374bedecf95a749e842a3a2ed22b5226d6d390 100644 --- a/components/codecs/src/nvpipe_encoder.cpp +++ b/components/codecs/src/nvpipe_encoder.cpp @@ -91,7 +91,7 @@ bool NvPipeEncoder::encode(const cv::Mat &in, definition_t odefinition, bitrate_ bool NvPipeEncoder::_encoderMatch(const cv::Mat &in, definition_t def) { return ((in.type() == CV_32F && is_float_channel_) || - (in.type() == CV_8UC3 && !is_float_channel_)) && current_definition_ == def; + ((in.type() == CV_8UC3 || in.type() == CV_8UC4) && !is_float_channel_)) && current_definition_ == def; } bool NvPipeEncoder::_createEncoder(const cv::Mat &in, definition_t def, bitrate_t rate) { diff --git a/applications/reconstruct/include/ftl/cuda_matrix_util.hpp b/components/common/cpp/include/ftl/cuda_matrix_util.hpp similarity index 100% rename from applications/reconstruct/include/ftl/cuda_matrix_util.hpp rename to components/common/cpp/include/ftl/cuda_matrix_util.hpp diff --git a/components/renderers/cpp/CMakeLists.txt b/components/renderers/cpp/CMakeLists.txt index 33f910ca0342096bb551b430374776a86de89b2f..b575721587262e2e468d6cb48cf8c44c6771e6fc 100644 --- a/components/renderers/cpp/CMakeLists.txt +++ b/components/renderers/cpp/CMakeLists.txt @@ -1,6 +1,7 @@ add_library(ftlrender - src/display.cpp - src/rgbd_display.cpp + src/splat_render.cpp + src/splatter.cu + src/points.cu ) # These cause errors in CI build and are being removed from PCL in newer versions @@ -11,6 +12,6 @@ target_include_directories(ftlrender PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> $<INSTALL_INTERFACE:include> PRIVATE src) -target_link_libraries(ftlrender ftlrgbd ftlcommon ftlnet Eigen3::Eigen Threads::Threads glog::glog ${OpenCV_LIBS} ${PCL_LIBRARIES}) +target_link_libraries(ftlrender ftlrgbd ftlcommon Eigen3::Eigen Threads::Threads ${OpenCV_LIBS}) #ADD_SUBDIRECTORY(test) diff --git a/applications/reconstruct/src/splat_render_cuda.hpp b/components/renderers/cpp/include/ftl/cuda/intersections.hpp similarity index 56% rename from applications/reconstruct/src/splat_render_cuda.hpp rename to components/renderers/cpp/include/ftl/cuda/intersections.hpp index e60fc8c27c0d39ef8798803a526891c5da2fca62..9cfdbc2544d9c1bd32c9f5e12a0a161f45c50d54 100644 --- a/applications/reconstruct/src/splat_render_cuda.hpp +++ b/components/renderers/cpp/include/ftl/cuda/intersections.hpp @@ -1,11 +1,9 @@ -#ifndef _FTL_RECONSTRUCTION_SPLAT_CUDA_HPP_ -#define _FTL_RECONSTRUCTION_SPLAT_CUDA_HPP_ +#ifndef _FTL_CUDA_INTERSECTIONS_HPP_ +#define _FTL_CUDA_INTERSECTIONS_HPP_ -#include <ftl/depth_camera.hpp> -#include <ftl/voxel_hash.hpp> -//#include <ftl/ray_cast_util.hpp> - -#include "splat_params.hpp" +#ifndef PINF +#define PINF __int_as_float(0x7f800000) +#endif namespace ftl { namespace cuda { @@ -84,45 +82,7 @@ __device__ inline float intersectDistance(const float3 &n, const float3 &p0, con return PINF; } -/** - * NOTE: Not strictly isosurface currently since it includes the internals - * of objects up to at most truncation depth. - */ -void isosurface_point_image(const ftl::voxhash::HashData& hashData, - const ftl::cuda::TextureObject<int> &depth, - const ftl::render::SplatParams ¶ms, cudaStream_t stream); - -//void isosurface_point_image_stereo(const ftl::voxhash::HashData& hashData, -// const ftl::voxhash::HashParams& hashParams, -// const RayCastData &rayCastData, const RayCastParams ¶ms, -// cudaStream_t stream); - -// TODO: isosurface_point_cloud - -void splat_points(const ftl::cuda::TextureObject<int> &depth_in, - const ftl::cuda::TextureObject<uchar4> &colour_in, - const ftl::cuda::TextureObject<float4> &normal_in, - const ftl::cuda::TextureObject<float> &depth_out, - const ftl::cuda::TextureObject<uchar4> &colour_out, const ftl::render::SplatParams ¶ms, cudaStream_t stream); - -void dibr(const ftl::cuda::TextureObject<int> &depth_out, - const ftl::cuda::TextureObject<uchar4> &colour_out, - const ftl::cuda::TextureObject<float4> &normal_out, - const ftl::cuda::TextureObject<float> &confidence_out, - const ftl::cuda::TextureObject<float4> &tmp_colour, - const ftl::cuda::TextureObject<int> &tmp_depth, int numcams, - const ftl::render::SplatParams ¶ms, cudaStream_t stream); - -/** - * Directly render input depth maps to virtual view with clipping. - */ -void dibr_raw(const ftl::cuda::TextureObject<int> &depth_out, int numcams, - const ftl::render::SplatParams ¶ms, cudaStream_t stream); - -void dibr(const ftl::cuda::TextureObject<float> &depth_out, - const ftl::cuda::TextureObject<uchar4> &colour_out, int numcams, const ftl::render::SplatParams ¶ms, cudaStream_t stream); - } } -#endif // _FTL_RECONSTRUCTION_SPLAT_CUDA_HPP_ +#endif // _FTL_CUDA_INTERSECTIONS_HPP_ diff --git a/components/renderers/cpp/include/ftl/cuda/points.hpp b/components/renderers/cpp/include/ftl/cuda/points.hpp new file mode 100644 index 0000000000000000000000000000000000000000..deffe32777789e2b58a96aef2106975ad37e0cdd --- /dev/null +++ b/components/renderers/cpp/include/ftl/cuda/points.hpp @@ -0,0 +1,16 @@ +#ifndef _FTL_CUDA_POINTS_HPP_ +#define _FTL_CUDA_POINTS_HPP_ + +#include <ftl/cuda_common.hpp> +#include <ftl/rgbd/camera.hpp> +#include <ftl/cuda_matrix_util.hpp> + +namespace ftl { +namespace cuda { + +void point_cloud(ftl::cuda::TextureObject<float4> &output, ftl::cuda::TextureObject<float> &depth, const ftl::rgbd::Camera ¶ms, const float4x4 &pose, cudaStream_t stream); + +} +} + +#endif // _FTL_CUDA_POINTS_HPP_ diff --git a/components/renderers/cpp/include/ftl/cuda/weighting.hpp b/components/renderers/cpp/include/ftl/cuda/weighting.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9498d0508605087306db2658b2a1ae1943cde536 --- /dev/null +++ b/components/renderers/cpp/include/ftl/cuda/weighting.hpp @@ -0,0 +1,23 @@ +#ifndef _FTL_CUDA_WEIGHTING_HPP_ +#define _FTL_CUDA_WEIGHTING_HPP_ + +namespace ftl { +namespace cuda { + +/* + * Guennebaud, G.; Gross, M. Algebraic point set surfaces. ACMTransactions on Graphics Vol. 26, No. 3, Article No. 23, 2007. + * Used in: FusionMLS: Highly dynamic 3D reconstruction with consumer-grade RGB-D cameras + * r = distance between points + * h = smoothing parameter in meters (default 4cm) + */ +__device__ inline float spatialWeighting(float r, float h) { + if (r >= h) return 0.0f; + float rh = r / h; + rh = 1.0f - rh*rh; + return rh*rh*rh*rh; +} + +} +} + +#endif // _FTL_CUDA_WEIGHTING_HPP_ diff --git a/components/renderers/cpp/include/ftl/display.hpp b/components/renderers/cpp/include/ftl/display.hpp deleted file mode 100644 index 05ae0bf11e5ebc3a5b8d54339bc85166effbb4a9..0000000000000000000000000000000000000000 --- a/components/renderers/cpp/include/ftl/display.hpp +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright 2019 Nicolas Pope - */ - -#ifndef _FTL_DISPLAY_HPP_ -#define _FTL_DISPLAY_HPP_ - -#include <ftl/config.h> -#include <ftl/configurable.hpp> -#include "../../../rgbd-sources/include/ftl/rgbd/camera.hpp" - -#include <nlohmann/json.hpp> -#include <opencv2/opencv.hpp> -#include "opencv2/highgui.hpp" - -#if defined HAVE_PCL -#include <pcl/common/common_headers.h> -#include <pcl/visualization/pcl_visualizer.h> -#endif // HAVE_PCL - -namespace ftl { - -/** - * Multiple local display options for disparity or point clouds. - */ -class Display : public ftl::Configurable { - private: - std::string name_; - public: - enum style_t { - STYLE_NORMAL, STYLE_DISPARITY, STYLE_DEPTH - }; - - public: - explicit Display(nlohmann::json &config, std::string name); - ~Display(); - - bool render(const cv::Mat &rgb, const cv::Mat &depth, const ftl::rgbd::Camera &p); - -#if defined HAVE_PCL - bool render(pcl::PointCloud<pcl::PointXYZRGB>::ConstPtr); -#endif // HAVE_PCL - bool render(const cv::Mat &img, style_t s=STYLE_NORMAL); - - bool active() const; - - bool hasDisplays(); - - void wait(int ms); - - void onKey(const std::function<void(int)> &h) { key_handlers_.push_back(h); } - - private: -#if defined HAVE_VIZ - cv::viz::Viz3d *window_; -#endif // HAVE_VIZ - -#if defined HAVE_PCL - pcl::visualization::PCLVisualizer::Ptr pclviz_; -#endif // HAVE_PCL - - bool active_; - std::vector<std::function<void(int)>> key_handlers_; -}; -}; - -#endif // _FTL_DISPLAY_HPP_ - diff --git a/components/renderers/cpp/include/ftl/render/renderer.hpp b/components/renderers/cpp/include/ftl/render/renderer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1871b9f9f2a8e1fda0766e1c2e74d2169f47f3fa --- /dev/null +++ b/components/renderers/cpp/include/ftl/render/renderer.hpp @@ -0,0 +1,35 @@ +#ifndef _FTL_RENDER_RENDERER_HPP_ +#define _FTL_RENDER_RENDERER_HPP_ + +#include <ftl/configurable.hpp> +#include <ftl/rgbd/virtual.hpp> +#include <ftl/cuda_common.hpp> + +namespace ftl { +namespace render { + +/** + * Abstract class for all renderers. A renderer takes some 3D scene and + * generates a virtual camera perspective of that scene. The scene might be + * based upon a point cloud, or an entirely virtual mesh or procedural scene. + * It is intended that multiple scenes can be rendered into a single virtual + * view using a compositing renderer, such a renderer accepting any kind of + * renderer for compositing and hence relying on this base class. + */ +class Renderer : public ftl::Configurable { + public: + explicit Renderer(nlohmann::json &config) : Configurable(config) {}; + virtual ~Renderer() {}; + + /** + * Generate a single virtual camera frame. The frame takes its pose from + * the virtual camera object passed, and writes the result into the + * virtual camera. + */ + virtual bool render(ftl::rgbd::VirtualSource *, ftl::rgbd::Frame &, cudaStream_t)=0; +}; + +} +} + +#endif // _FTL_RENDER_RENDERER_HPP_ diff --git a/applications/reconstruct/src/splat_params.hpp b/components/renderers/cpp/include/ftl/render/splat_params.hpp similarity index 86% rename from applications/reconstruct/src/splat_params.hpp rename to components/renderers/cpp/include/ftl/render/splat_params.hpp index 8ae5bf345e4e0d348c414cc1ce7bb52190d5ffff..4f9c8882b161d7774388e8d9fff7337cb1d6e685 100644 --- a/applications/reconstruct/src/splat_params.hpp +++ b/components/renderers/cpp/include/ftl/render/splat_params.hpp @@ -3,7 +3,7 @@ #include <ftl/cuda_util.hpp> #include <ftl/cuda_matrix_util.hpp> -#include <ftl/depth_camera_params.hpp> +#include <ftl/rgbd/camera.hpp> namespace ftl { namespace render { @@ -18,10 +18,10 @@ struct __align__(16) SplatParams { float4x4 m_viewMatrixInverse; uint m_flags; - float voxelSize; + //float voxelSize; float depthThreshold; - DepthCameraParams camera; + ftl::rgbd::Camera camera; }; } diff --git a/applications/reconstruct/src/splat_render.hpp b/components/renderers/cpp/include/ftl/render/splat_render.hpp similarity index 63% rename from applications/reconstruct/src/splat_render.hpp rename to components/renderers/cpp/include/ftl/render/splat_render.hpp index 828db11fad6bbb5e3aeeae0899bc15549fab1708..55522c483c25f58d843543ee8d5ba42aae9c32c8 100644 --- a/applications/reconstruct/src/splat_render.hpp +++ b/components/renderers/cpp/include/ftl/render/splat_render.hpp @@ -1,14 +1,9 @@ #ifndef _FTL_RECONSTRUCTION_SPLAT_HPP_ #define _FTL_RECONSTRUCTION_SPLAT_HPP_ -#include <ftl/configurable.hpp> -#include <ftl/rgbd/source.hpp> -#include <ftl/depth_camera.hpp> -#include <ftl/voxel_scene.hpp> -//#include <ftl/ray_cast_util.hpp> -#include <ftl/cuda_common.hpp> - -#include "splat_params.hpp" +#include <ftl/render/renderer.hpp> +#include <ftl/rgbd/frameset.hpp> +#include <ftl/render/splat_params.hpp> namespace ftl { namespace render { @@ -21,26 +16,28 @@ namespace render { * on a separate machine or at a later time, the advantage being to save local * processing resources and that the first pass result may compress better. */ -class Splatter { +class Splatter : public ftl::render::Renderer { public: - explicit Splatter(ftl::voxhash::SceneRep *scene); + explicit Splatter(nlohmann::json &config, ftl::rgbd::FrameSet *fs); ~Splatter(); - void render(int64_t ts, ftl::rgbd::Source *src, cudaStream_t stream=0); + bool render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cudaStream_t stream=0) override; - void setOutputDevice(int); + //void setOutputDevice(int); private: int device_; - ftl::cuda::TextureObject<int> depth1_; + /*ftl::cuda::TextureObject<int> depth1_; ftl::cuda::TextureObject<int> depth3_; ftl::cuda::TextureObject<uchar4> colour1_; ftl::cuda::TextureObject<float4> colour_tmp_; ftl::cuda::TextureObject<float> depth2_; ftl::cuda::TextureObject<uchar4> colour2_; - ftl::cuda::TextureObject<float4> normal1_; - SplatParams params_; - ftl::voxhash::SceneRep *scene_; + ftl::cuda::TextureObject<float4> normal1_;*/ + //SplatParams params_; + + ftl::rgbd::Frame temp_; + ftl::rgbd::FrameSet *scene_; }; } diff --git a/components/renderers/cpp/include/ftl/rgbd_display.hpp b/components/renderers/cpp/include/ftl/rgbd_display.hpp deleted file mode 100644 index 9c1be76fb5f38a584d3032d50b6ef046f0d0b605..0000000000000000000000000000000000000000 --- a/components/renderers/cpp/include/ftl/rgbd_display.hpp +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef _FTL_RGBD_DISPLAY_HPP_ -#define _FTL_RGBD_DISPLAY_HPP_ - -#include <nlohmann/json.hpp> -#include <ftl/rgbd/source.hpp> - -using MouseAction = std::function<void(int, int, int, int)>; - -namespace ftl { -namespace rgbd { - -class Display : public ftl::Configurable { - public: - explicit Display(nlohmann::json &); - Display(nlohmann::json &, Source *); - ~Display(); - - void setSource(Source *src) { source_ = src; } - void update(); - - bool active() const { return active_; } - - void onKey(const std::function<void(int)> &h) { key_handlers_.push_back(h); } - - void wait(int ms); - - private: - Source *source_; - std::string name_; - std::vector<std::function<void(int)>> key_handlers_; - Eigen::Vector3d eye_; - Eigen::Vector3d centre_; - Eigen::Vector3d up_; - Eigen::Vector3d lookPoint_; - float lerpSpeed_; - bool active_; - MouseAction mouseaction_; - - static int viewcount__; - - void init(); -}; - -} -} - -#endif // _FTL_RGBD_DISPLAY_HPP_ diff --git a/applications/reconstruct/include/ftl/matrix_conversion.hpp b/components/renderers/cpp/include/ftl/utility/matrix_conversion.hpp similarity index 100% rename from applications/reconstruct/include/ftl/matrix_conversion.hpp rename to components/renderers/cpp/include/ftl/utility/matrix_conversion.hpp diff --git a/applications/reconstruct/src/dibr.cu b/components/renderers/cpp/src/dibr.cu similarity index 100% rename from applications/reconstruct/src/dibr.cu rename to components/renderers/cpp/src/dibr.cu diff --git a/components/renderers/cpp/src/display.cpp b/components/renderers/cpp/src/display.cpp deleted file mode 100644 index 0f838df751d0c0a315f4d0acca9798d2d7741066..0000000000000000000000000000000000000000 --- a/components/renderers/cpp/src/display.cpp +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Copyright 2019 Nicolas Pope - */ - -#include <loguru.hpp> - -#include <ftl/display.hpp> -#include <ftl/utility/opencv_to_pcl.hpp> - -using ftl::Display; -using cv::Mat; -using cv::Vec3f; - -Display::Display(nlohmann::json &config, std::string name) : ftl::Configurable(config) { - name_ = name; -#if defined HAVE_VIZ - window_ = new cv::viz::Viz3d("FTL: " + name); - window_->setBackgroundColor(cv::viz::Color::white()); -#endif // HAVE_VIZ - - //cv::namedWindow("Image", cv::WINDOW_KEEPRATIO); - -#if defined HAVE_PCL - if (value("points", false)) { - pclviz_ = pcl::visualization::PCLVisualizer::Ptr(new pcl::visualization::PCLVisualizer ("FTL Cloud: " + name)); - pclviz_->setBackgroundColor (255, 255, 255); - pclviz_->addCoordinateSystem (1.0); - pclviz_->setShowFPS(true); - pclviz_->initCameraParameters (); - - pclviz_->registerPointPickingCallback( - [](const pcl::visualization::PointPickingEvent& event, void* viewer_void) { - if (event.getPointIndex () == -1) return; - float x, y, z; - event.getPoint(x, y, z); - LOG(INFO) << "( " << x << ", " << y << ", " << z << ")"; - }, (void*) &pclviz_); - - pclviz_->registerKeyboardCallback ( - [](const pcl::visualization::KeyboardEvent &event, void* viewer_void) { - auto viewer = *static_cast<pcl::visualization::PCLVisualizer::Ptr*>(viewer_void); - pcl::visualization::Camera cam; - viewer->getCameraParameters(cam); - - Eigen::Vector3f pos(cam.pos[0], cam.pos[1], cam.pos[2]); - Eigen::Vector3f focal(cam.focal[0], cam.focal[1], cam.focal[2]); - Eigen::Vector3f dir = focal - pos; //.normalize(); - dir.normalize(); - - const float speed = 40.0f; - - if (event.getKeySym() == "Up") { - pos += speed*dir; - focal += speed*dir; - } else if (event.getKeySym() == "Down") { - pos -= speed*dir; - focal -= speed*dir; - } else if (event.getKeySym() == "Left") { - Eigen::Matrix3f m = Eigen::AngleAxisf(-0.5f*M_PI, Eigen::Vector3f::UnitY()).toRotationMatrix(); - dir = m*dir; - pos += speed*dir; - focal += speed*dir; - } else if (event.getKeySym() == "Right") { - Eigen::Matrix3f m = Eigen::AngleAxisf(0.5f*M_PI, Eigen::Vector3f::UnitY()).toRotationMatrix(); - dir = m*dir; - pos += speed*dir; - focal += speed*dir; - } - - - cam.pos[0] = pos[0]; - cam.pos[1] = pos[1]; - cam.pos[2] = pos[2]; - cam.focal[0] = focal[0]; - cam.focal[1] = focal[1]; - cam.focal[2] = focal[2]; - viewer->setCameraParameters(cam); - - }, (void*)&pclviz_); - } -#endif // HAVE_PCL - - active_ = true; -} - -Display::~Display() { - #if defined HAVE_VIZ - delete window_; - #endif // HAVE_VIZ -} - -#ifdef HAVE_PCL -/** - * Convert an OpenCV RGB and Depth Mats to a PCL XYZRGB point cloud. - */ -static pcl::PointCloud<pcl::PointXYZRGB>::Ptr rgbdToPointXYZ(const cv::Mat &rgb, const cv::Mat &depth, const ftl::rgbd::Camera &p) { - const double CX = p.cx; - const double CY = p.cy; - const double FX = p.fx; - const double FY = p.fy; - - pcl::PointCloud<pcl::PointXYZRGB>::Ptr point_cloud_ptr(new pcl::PointCloud<pcl::PointXYZRGB>); - point_cloud_ptr->width = rgb.cols * rgb.rows; - point_cloud_ptr->height = 1; - - for(int i=0;i<rgb.rows;i++) { - const float *sptr = depth.ptr<float>(i); - for(int j=0;j<rgb.cols;j++) { - float d = sptr[j] * 1000.0f; - - pcl::PointXYZRGB point; - point.x = (((double)j + CX) / FX) * d; - point.y = (((double)i + CY) / FY) * d; - point.z = d; - - if (point.x == INFINITY || point.y == INFINITY || point.z > 20000.0f || point.z < 0.04f) { - point.x = 0.0f; point.y = 0.0f; point.z = 0.0f; - } - - cv::Point3_<uchar> prgb = rgb.at<cv::Point3_<uchar>>(i, j); - uint32_t rgb = (static_cast<uint32_t>(prgb.z) << 16 | static_cast<uint32_t>(prgb.y) << 8 | static_cast<uint32_t>(prgb.x)); - point.rgb = *reinterpret_cast<float*>(&rgb); - - point_cloud_ptr -> points.push_back(point); - } - } - - return point_cloud_ptr; -} -#endif // HAVE_PCL - -bool Display::render(const cv::Mat &rgb, const cv::Mat &depth, const ftl::rgbd::Camera &p) { - Mat idepth; - - if (value("points", false) && rgb.rows != 0) { -#if defined HAVE_PCL - auto pc = rgbdToPointXYZ(rgb, depth, p); - - pcl::visualization::PointCloudColorHandlerRGBField<pcl::PointXYZRGB> rgb(pc); - if (!pclviz_->updatePointCloud<pcl::PointXYZRGB> (pc, rgb, "reconstruction")) { - pclviz_->addPointCloud<pcl::PointXYZRGB> (pc, rgb, "reconstruction"); - pclviz_->setCameraPosition(-878.0, -71.0, -2315.0, -0.1, -0.99, 0.068, 0.0, -1.0, 0.0); - pclviz_->setPointCloudRenderingProperties (pcl::visualization::PCL_VISUALIZER_POINT_SIZE, 2, "reconstruction"); - } -#elif defined HAVE_VIZ - //cv::Mat Q_32F; - //calibrate_.getQ().convertTo(Q_32F, CV_32F); - /*cv::Mat_<cv::Vec3f> XYZ(depth.rows, depth.cols); // Output point cloud - reprojectImageTo3D(depth+20.0f, XYZ, q, true); - - // Remove all invalid pixels from point cloud - XYZ.setTo(Vec3f(NAN, NAN, NAN), depth == 0.0f); - - cv::viz::WCloud cloud_widget = cv::viz::WCloud(XYZ, rgb); - cloud_widget.setRenderingProperty(cv::viz::POINT_SIZE, 2); - - window_->showWidget("coosys", cv::viz::WCoordinateSystem()); - window_->showWidget("Depth", cloud_widget); - - //window_->spinOnce(40, true);*/ - -#else // HAVE_VIZ - - LOG(ERROR) << "Need OpenCV Viz module to display points"; - -#endif // HAVE_VIZ - } - - if (value("left", false)) { - if (value("crosshair", false)) { - cv::line(rgb, cv::Point(0, rgb.rows/2), cv::Point(rgb.cols-1, rgb.rows/2), cv::Scalar(0,0,255), 1); - cv::line(rgb, cv::Point(rgb.cols/2, 0), cv::Point(rgb.cols/2, rgb.rows-1), cv::Scalar(0,0,255), 1); - } - cv::namedWindow("Left: " + name_, cv::WINDOW_KEEPRATIO); - cv::imshow("Left: " + name_, rgb); - } - if (value("right", false)) { - /*if (config_["crosshair"]) { - cv::line(rgbr, cv::Point(0, rgbr.rows/2), cv::Point(rgbr.cols-1, rgbr.rows/2), cv::Scalar(0,0,255), 1); - cv::line(rgbr, cv::Point(rgbr.cols/2, 0), cv::Point(rgbr.cols/2, rgbr.rows-1), cv::Scalar(0,0,255), 1); - } - cv::namedWindow("Right: " + name_, cv::WINDOW_KEEPRATIO); - cv::imshow("Right: " + name_, rgbr);*/ - } - - if (value("disparity", false)) { - /*Mat depth32F = (focal * (float)l.cols * base_line) / depth; - normalize(depth32F, depth32F, 0, 255, NORM_MINMAX, CV_8U); - cv::imshow("Depth", depth32F); - if(cv::waitKey(10) == 27){ - //exit if ESC is pressed - active_ = false; - }*/ - } else if (value("depth", false)) { - if (value("flip_vert", false)) { - cv::flip(depth, idepth, 0); - } else { - idepth = depth; - } - - idepth.convertTo(idepth, CV_8U, 255.0f / 10.0f); // TODO(nick) - - applyColorMap(idepth, idepth, cv::COLORMAP_JET); - cv::imshow("Depth: " + name_, idepth); - //if(cv::waitKey(40) == 27) { - // exit if ESC is pressed - // active_ = false; - //} - } - - return true; -} - -#if defined HAVE_PCL -bool Display::render(pcl::PointCloud<pcl::PointXYZRGB>::ConstPtr pc) { - pcl::visualization::PointCloudColorHandlerRGBField<pcl::PointXYZRGB> rgb(pc); - if (pclviz_ && !pclviz_->updatePointCloud<pcl::PointXYZRGB> (pc, rgb, "reconstruction")) { - pclviz_->addPointCloud<pcl::PointXYZRGB> (pc, rgb, "reconstruction"); - pclviz_->setCameraPosition(-878.0, -71.0, -2315.0, -0.1, -0.99, 0.068, 0.0, -1.0, 0.0); - pclviz_->setPointCloudRenderingProperties (pcl::visualization::PCL_VISUALIZER_POINT_SIZE, 2, "reconstruction"); - } - return true; -} -#endif // HAVE_PCL -bool Display::render(const cv::Mat &img, style_t s) { - if (s == STYLE_NORMAL) { - cv::imshow("Image", img); - } else if (s == STYLE_DISPARITY) { - Mat idepth; - - if (value("flip_vert", false)) { - cv::flip(img, idepth, 0); - } else { - idepth = img; - } - - idepth.convertTo(idepth, CV_8U, 255.0f / 256.0f); - - applyColorMap(idepth, idepth, cv::COLORMAP_JET); - cv::imshow("Disparity", idepth); - } - - return true; -} - -bool Display::hasDisplays() { - return value("depth", false) || value("left", false) || value("right", false) || value("points", false); -} - -void Display::wait(int ms) { - if (value("points", false)) { - #if defined HAVE_PCL - if (pclviz_) pclviz_->spinOnce(20); - #elif defined HAVE_VIZ - window_->spinOnce(1, true); - #endif // HAVE_VIZ - } - - if (value("depth", false) || value("left", false) || value("right", false)) { - while (true) { - int key = cv::waitKey(ms); - - if(key == 27) { - // exit if ESC is pressed - active_ = false; - } else if (key == -1) { - return; - } else { - ms = 1; - for (auto &h : key_handlers_) { - h(key); - } - } - } - } -} - -bool Display::active() const { - #if defined HAVE_PCL - return active_ && (!pclviz_ || !pclviz_->wasStopped()); - #elif defined HAVE_VIZ - return active_ && !window_->wasStopped(); - #else - return active_; - #endif -} - diff --git a/applications/reconstruct/src/mls_cuda.hpp b/components/renderers/cpp/src/mls_cuda.hpp similarity index 100% rename from applications/reconstruct/src/mls_cuda.hpp rename to components/renderers/cpp/src/mls_cuda.hpp diff --git a/components/renderers/cpp/src/points.cu b/components/renderers/cpp/src/points.cu new file mode 100644 index 0000000000000000000000000000000000000000..39764e4c8aba523caf2758262d9f41f8782ac9dc --- /dev/null +++ b/components/renderers/cpp/src/points.cu @@ -0,0 +1,28 @@ +#include <ftl/cuda/points.hpp> + +#define T_PER_BLOCK 8 + +__global__ void point_cloud_kernel(ftl::cuda::TextureObject<float4> output, ftl::cuda::TextureObject<float> depth, ftl::rgbd::Camera params, float4x4 pose) +{ + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < params.width && y < params.height) { + float d = depth.tex2D((int)x, (int)y); + + output(x,y) = (d >= params.minDepth && d <= params.maxDepth) ? + make_float4(pose * params.screenToCam(x, y, d), 0.0f) : + make_float4(MINF, MINF, MINF, MINF); + } +} + +void ftl::cuda::point_cloud(ftl::cuda::TextureObject<float4> &output, ftl::cuda::TextureObject<float> &depth, const ftl::rgbd::Camera ¶ms, const float4x4 &pose, cudaStream_t stream) { + const dim3 gridSize((params.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (params.height + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + point_cloud_kernel<<<gridSize, blockSize, 0, stream>>>(output, depth, params, pose); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif +} diff --git a/components/renderers/cpp/src/rgbd_display.cpp b/components/renderers/cpp/src/rgbd_display.cpp deleted file mode 100644 index a0d79f8aeeb42b0a0a1fd281c5f3e9065b43780c..0000000000000000000000000000000000000000 --- a/components/renderers/cpp/src/rgbd_display.cpp +++ /dev/null @@ -1,129 +0,0 @@ -#include <ftl/rgbd_display.hpp> -#include <opencv2/opencv.hpp> - -using ftl::rgbd::Source; -using ftl::rgbd::Display; -using std::string; -using cv::Mat; - -int Display::viewcount__ = 0; - -template<class T> -Eigen::Matrix<T,4,4> lookAt -( - Eigen::Matrix<T,3,1> const & eye, - Eigen::Matrix<T,3,1> const & center, - Eigen::Matrix<T,3,1> const & up -) -{ - typedef Eigen::Matrix<T,4,4> Matrix4; - typedef Eigen::Matrix<T,3,1> Vector3; - - Vector3 f = (center - eye).normalized(); - Vector3 u = up.normalized(); - Vector3 s = f.cross(u).normalized(); - u = s.cross(f); - - Matrix4 res; - res << s.x(),s.y(),s.z(),-s.dot(eye), - u.x(),u.y(),u.z(),-u.dot(eye), - -f.x(),-f.y(),-f.z(),f.dot(eye), - 0,0,0,1; - - return res; -} - -static void setMouseAction(const std::string& winName, const MouseAction &action) -{ - cv::setMouseCallback(winName, - [] (int event, int x, int y, int flags, void* userdata) { - (*(MouseAction*)userdata)(event, x, y, flags); - }, (void*)&action); -} - -Display::Display(nlohmann::json &config) : ftl::Configurable(config) { - name_ = value("name", string("View [")+std::to_string(viewcount__)+string("]")); - viewcount__++; - - init(); -} - -Display::Display(nlohmann::json &config, Source *source) - : ftl::Configurable(config) { - name_ = value("name", string("View [")+std::to_string(viewcount__)+string("]")); - viewcount__++; - init(); -} - -Display::~Display() { - -} - -void Display::init() { - active_ = true; - source_ = nullptr; - cv::namedWindow(name_, cv::WINDOW_KEEPRATIO); - - eye_ = Eigen::Vector3d(0.0, 0.0, 0.0); - centre_ = Eigen::Vector3d(0.0, 0.0, -4.0); - up_ = Eigen::Vector3d(0,1.0,0); - lookPoint_ = Eigen::Vector3d(0.0,0.0,-4.0); - lerpSpeed_ = 0.4f; - - // Keyboard camera controls - onKey([this](int key) { - //LOG(INFO) << "Key = " << key; - if (key == 81 || key == 83) { - Eigen::Quaternion<double> q; q = Eigen::AngleAxis<double>((key == 81) ? 0.01 : -0.01, up_); - eye_ = (q * (eye_ - centre_)) + centre_; - } else if (key == 84 || key == 82) { - double scalar = (key == 84) ? 0.99 : 1.01; - eye_ = ((eye_ - centre_) * scalar) + centre_; - } - }); - - // TODO(Nick) Calculate "camera" properties of viewport. - mouseaction_ = [this]( int event, int ux, int uy, int) { - //LOG(INFO) << "Mouse " << ux << "," << uy; - if (event == 1 && source_) { // click - Eigen::Vector4d camPos = source_->point(ux,uy); - camPos *= -1.0f; - Eigen::Vector4d worldPos = source_->getPose() * camPos; - lookPoint_ = Eigen::Vector3d(worldPos[0],worldPos[1],worldPos[2]); - LOG(INFO) << "Depth at click = " << -camPos[2]; - } - }; - ::setMouseAction(name_, mouseaction_); -} - -void Display::wait(int ms) { - while (true) { - int key = cv::waitKey(ms); - - if(key == 27) { - // exit if ESC is pressed - active_ = false; - } else if (key == -1) { - return; - } else { - ms = 1; - for (auto &h : key_handlers_) { - h(key); - } - } - } -} - -void Display::update() { - if (!source_) return; - - centre_ += (lookPoint_ - centre_) * (lerpSpeed_ * 0.1f); - Eigen::Matrix4d viewPose = lookAt<double>(eye_,centre_,up_).inverse(); - source_->setPose(viewPose); - - Mat rgb, depth; - source_->grab(); - source_->getFrames(rgb, depth); - if (rgb.rows > 0) cv::imshow(name_, rgb); - wait(1); -} diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp new file mode 100644 index 0000000000000000000000000000000000000000..daf9f5f64c019d24ae9afb9f2e4540c59b722922 --- /dev/null +++ b/components/renderers/cpp/src/splat_render.cpp @@ -0,0 +1,172 @@ +#include <ftl/render/splat_render.hpp> +#include <ftl/utility/matrix_conversion.hpp> +#include "splatter_cuda.hpp" +#include <ftl/cuda/points.hpp> + +#include <opencv2/core/cuda_stream_accessor.hpp> + +using ftl::render::Splatter; +using ftl::rgbd::Channel; +using ftl::rgbd::Channels; +using ftl::rgbd::Format; +using cv::cuda::GpuMat; + +Splatter::Splatter(nlohmann::json &config, ftl::rgbd::FrameSet *fs) : ftl::render::Renderer(config), scene_(fs) { + +} + +Splatter::~Splatter() { + +} + +bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cudaStream_t stream) { + SHARED_LOCK(scene_->mtx, lk); + if (!src->isReady()) return false; + + const auto &camera = src->parameters(); + + //cudaSafeCall(cudaSetDevice(scene_->getCUDADevice())); + + // Create all the required channels + out.create<GpuMat>(Channel::Depth, Format<float>(camera.width, camera.height)); + out.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height)); + + // FIXME: Use source resolutions, not virtual resolution + temp_.create<GpuMat>(Channel::Colour, Format<float4>(camera.width, camera.height)); + temp_.create<GpuMat>(Channel::Colour2, Format<uchar4>(camera.width, camera.height)); + temp_.create<GpuMat>(Channel::Contribution, Format<float>(camera.width, camera.height)); + temp_.create<GpuMat>(Channel::Depth, Format<int>(camera.width, camera.height)); + temp_.create<GpuMat>(Channel::Depth2, Format<int>(camera.width, camera.height)); + temp_.create<GpuMat>(Channel::Normals, Format<float4>(camera.width, camera.height)); + + cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + + // Create buffers if they don't exist + /*if ((unsigned int)depth1_.width() != camera.width || (unsigned int)depth1_.height() != camera.height) { + depth1_ = ftl::cuda::TextureObject<int>(camera.width, camera.height); + } + if ((unsigned int)depth3_.width() != camera.width || (unsigned int)depth3_.height() != camera.height) { + depth3_ = ftl::cuda::TextureObject<int>(camera.width, camera.height); + } + if ((unsigned int)colour1_.width() != camera.width || (unsigned int)colour1_.height() != camera.height) { + colour1_ = ftl::cuda::TextureObject<uchar4>(camera.width, camera.height); + } + if ((unsigned int)colour_tmp_.width() != camera.width || (unsigned int)colour_tmp_.height() != camera.height) { + colour_tmp_ = ftl::cuda::TextureObject<float4>(camera.width, camera.height); + } + if ((unsigned int)normal1_.width() != camera.width || (unsigned int)normal1_.height() != camera.height) { + normal1_ = ftl::cuda::TextureObject<float4>(camera.width, camera.height); + } + if ((unsigned int)depth2_.width() != camera.width || (unsigned int)depth2_.height() != camera.height) { + depth2_ = ftl::cuda::TextureObject<float>(camera.width, camera.height); + } + if ((unsigned int)colour2_.width() != camera.width || (unsigned int)colour2_.height() != camera.height) { + colour2_ = ftl::cuda::TextureObject<uchar4>(camera.width, camera.height); + }*/ + + // Parameters object to pass to CUDA describing the camera + SplatParams params; + params.m_flags = 0; + if (src->value("splatting", true) == false) params.m_flags |= ftl::render::kNoSplatting; + if (src->value("upsampling", true) == false) params.m_flags |= ftl::render::kNoUpsampling; + if (src->value("texturing", true) == false) params.m_flags |= ftl::render::kNoTexturing; + params.m_viewMatrix = MatrixConversion::toCUDA(src->getPose().cast<float>().inverse()); + params.m_viewMatrixInverse = MatrixConversion::toCUDA(src->getPose().cast<float>()); + params.camera = camera; + + // Clear all channels to 0 or max depth + temp_.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(0x7FFFFFFF), cvstream); + temp_.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0x7FFFFFFF), cvstream); + temp_.get<GpuMat>(Channel::Colour).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream); + temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0.0f), cvstream); + out.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(1000.0f), cvstream); + out.get<GpuMat>(Channel::Colour).setTo(cv::Scalar(76,76,76), cvstream); + + //LOG(INFO) << "Render ready: " << camera.width << "," << camera.height; + + temp_.createTexture<int>(Channel::Depth); + + // Render each camera into virtual view + for (size_t i=0; i<scene_->frames.size(); ++i) { + auto &f = scene_->frames[i]; + auto *s = scene_->sources[i]; + + if (f.empty(Channel::Depth + Channel::Colour)) { + LOG(ERROR) << "Missing required channel"; + continue; + } + + // Needs to create points channel first? + if (!f.hasChannel(Channel::Points)) { + //LOG(INFO) << "Creating points... " << s->parameters().width; + + auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size())); + auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse()); + ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, stream); + + //LOG(INFO) << "POINTS Added"; + } + + ftl::cuda::dibr_merge( + f.createTexture<float4>(Channel::Points), + temp_.getTexture<int>(Channel::Depth), + params, stream + ); + + //LOG(INFO) << "DIBR DONE"; + } + + // TODO: Add the depth splatting step.. + + temp_.createTexture<float4>(Channel::Colour); + temp_.createTexture<float>(Channel::Contribution); + + // Accumulate attribute contributions for each pixel + for (auto &f : scene_->frames) { + // Convert colour from BGR to BGRA if needed + if (f.get<GpuMat>(Channel::Colour).type() == CV_8UC3) { + // Convert to 4 channel colour + auto &col = f.get<GpuMat>(Channel::Colour); + GpuMat tmp(col.size(), CV_8UC4); + cv::cuda::swap(col, tmp); + cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA); + } + + ftl::cuda::dibr_attribute( + f.createTexture<uchar4>(Channel::Colour), + f.createTexture<float4>(Channel::Points), + temp_.getTexture<int>(Channel::Depth), + temp_.getTexture<float4>(Channel::Colour), + temp_.getTexture<float>(Channel::Contribution), + params, stream + ); + } + + // Normalise attribute contributions + ftl::cuda::dibr_normalise( + temp_.createTexture<float4>(Channel::Colour), + out.createTexture<uchar4>(Channel::Colour), + temp_.createTexture<float>(Channel::Contribution), + stream + ); + + Channel chan = src->getChannel(); + if (chan == Channel::Depth) { + temp_.get<GpuMat>(Channel::Depth).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream); + } else if (chan == Channel::Energy) { + cv::cuda::swap(temp_.get<GpuMat>(Channel::Energy), out.create<GpuMat>(Channel::Energy)); + } else if (chan == Channel::Right) { + Eigen::Affine3f transform(Eigen::Translation3f(camera.baseline,0.0f,0.0f)); + Eigen::Matrix4f matrix = src->getPose().cast<float>() * transform.matrix(); + params.m_viewMatrix = MatrixConversion::toCUDA(matrix.inverse()); + params.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix); + + // TODO: Repeat rendering process... + } + + return true; +} + +//void Splatter::setOutputDevice(int device) { +// device_ = device; +//} diff --git a/components/renderers/cpp/src/splatter.cu b/components/renderers/cpp/src/splatter.cu new file mode 100644 index 0000000000000000000000000000000000000000..c1b46fc1d5768dc15fe54bcf6e37f4655a076b56 --- /dev/null +++ b/components/renderers/cpp/src/splatter.cu @@ -0,0 +1,190 @@ +#include <ftl/render/splat_params.hpp> +#include "splatter_cuda.hpp" +#include <ftl/rgbd/camera.hpp> +#include <ftl/cuda_common.hpp> + +#include <ftl/cuda/weighting.hpp> + +#define T_PER_BLOCK 8 +#define UPSAMPLE_FACTOR 1.8f +#define WARP_SIZE 32 +#define DEPTH_THRESHOLD 0.05f +#define UPSAMPLE_MAX 60 +#define MAX_ITERATIONS 32 // Note: Must be multiple of 32 +#define SPATIAL_SMOOTHING 0.005f + +using ftl::cuda::TextureObject; +using ftl::render::SplatParams; + +/* + * Pass 1: Directly render each camera into virtual view but with no upsampling + * for sparse points. + */ + __global__ void dibr_merge_kernel(TextureObject<float4> points, TextureObject<int> depth, SplatParams params) { + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float3 worldPos = make_float3(points.tex2D(x, y)); + if (worldPos.x == MINF) return; + + // Find the virtual screen position of current point + const float3 camPos = params.m_viewMatrix * worldPos; + if (camPos.z < params.camera.minDepth) return; + if (camPos.z > params.camera.maxDepth) return; + + const float d = camPos.z; + + const uint2 screenPos = params.camera.camToScreen<uint2>(camPos); + const unsigned int cx = screenPos.x; + const unsigned int cy = screenPos.y; + if (d > params.camera.minDepth && d < params.camera.maxDepth && cx < depth.width() && cy < depth.height()) { + // Transform estimated point to virtual cam space and output z + atomicMin(&depth(cx,cy), d * 1000.0f); + } +} + +void ftl::cuda::dibr_merge(TextureObject<float4> &points, TextureObject<int> &depth, SplatParams params, cudaStream_t stream) { + const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>(points, depth, params); + cudaSafeCall( cudaGetLastError() ); +} + +//============================================================================== + +__device__ inline float4 make_float4(const uchar4 &c) { + return make_float4(c.x,c.y,c.z,c.w); +} + + +#define ENERGY_THRESHOLD 0.1f +#define SMOOTHING_MULTIPLIER_A 10.0f // For surface search +#define SMOOTHING_MULTIPLIER_B 4.0f // For z contribution +#define SMOOTHING_MULTIPLIER_C 4.0f // For colour contribution + +/* + * Pass 2: Accumulate attribute contributions if the points pass a visibility test. + */ +__global__ void dibr_attribute_contrib_kernel( + TextureObject<uchar4> colour_in, // Original colour image + TextureObject<float4> points, // Original 3D points + TextureObject<int> depth_in, // Virtual depth map + TextureObject<float4> colour_out, // Accumulated output + //TextureObject<float4> normal_out, + TextureObject<float> contrib_out, + SplatParams params) { + + //const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; + + const int tid = (threadIdx.x + threadIdx.y * blockDim.x); + //const int warp = tid / WARP_SIZE; + const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float3 worldPos = make_float3(points.tex2D(x, y)); + //const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y)); + if (worldPos.x == MINF) return; + //const float r = (camera.poseInverse * worldPos).z / camera.params.fx; + + const float3 camPos = params.m_viewMatrix * worldPos; + if (camPos.z < params.camera.minDepth) return; + if (camPos.z > params.camera.maxDepth) return; + const uint2 screenPos = params.camera.camToScreen<uint2>(camPos); + + const int upsample = 8; //min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z)); + + // Not on screen so stop now... + if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return; + + // Is this point near the actual surface and therefore a contributor? + const float d = ((float)depth_in.tex2D((int)screenPos.x, (int)screenPos.y)/1000.0f); + //if (abs(d - camPos.z) > DEPTH_THRESHOLD) return; + + // TODO:(Nick) Should just one thread load these to shared mem? + const float4 colour = make_float4(colour_in.tex2D(x, y)); + //const float4 normal = tex2D<float4>(camera.normal, x, y); + + // Each thread in warp takes an upsample point and updates corresponding depth buffer. + const int lane = tid % WARP_SIZE; + for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { + const float u = (i % upsample) - (upsample / 2); + const float v = (i / upsample) - (upsample / 2); + + // Use the depth buffer to determine this pixels 3D position in camera space + const float d = ((float)depth_in.tex2D(screenPos.x+u, screenPos.y+v)/1000.0f); + const float3 nearest = params.camera.screenToCam((int)(screenPos.x+u),(int)(screenPos.y+v),d); + + // What is contribution of our current point at this pixel? + const float weight = ftl::cuda::spatialWeighting(length(nearest - camPos), SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx)); + if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) { // TODO: Use confidence threshold here + const float4 wcolour = colour * weight; + //const float4 wnormal = normal * weight; + + //printf("Z %f\n", d); + + // Add this points contribution to the pixel buffer + atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v), wcolour.x); + atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+1, wcolour.y); + atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+2, wcolour.z); + atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+3, wcolour.w); + //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v), wnormal.x); + //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+1, wnormal.y); + //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+2, wnormal.z); + //atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+3, wnormal.w); + atomicAdd(&contrib_out(screenPos.x+u, screenPos.y+v), weight); + } + } +} + +void ftl::cuda::dibr_attribute( + TextureObject<uchar4> &colour_in, // Original colour image + TextureObject<float4> &points, // Original 3D points + TextureObject<int> &depth_in, // Virtual depth map + TextureObject<float4> &colour_out, // Accumulated output + //TextureObject<float4> normal_out, + TextureObject<float> &contrib_out, + SplatParams ¶ms, cudaStream_t stream) { + const dim3 gridSize((depth_in.width() + 2 - 1)/2, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK); + + dibr_attribute_contrib_kernel<<<gridSize, blockSize, 0, stream>>>( + colour_in, + points, + depth_in, + colour_out, + contrib_out, + params + ); + cudaSafeCall( cudaGetLastError() ); +} + +//============================================================================== + +__global__ void dibr_normalise_kernel( + TextureObject<float4> colour_in, + TextureObject<uchar4> colour_out, + //TextureObject<float4> normals, + TextureObject<float> contribs) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < colour_in.width() && y < colour_in.height()) { + const float4 colour = colour_in.tex2D((int)x,(int)y); + //const float4 normal = normals.tex2D((int)x,(int)y); + const float contrib = contribs.tex2D((int)x,(int)y); + + if (contrib > 0.0f) { + colour_out(x,y) = make_uchar4(colour.x / contrib, colour.y / contrib, colour.z / contrib, 0); + //normals(x,y) = normal / contrib; + } + } +} + +void ftl::cuda::dibr_normalise(TextureObject<float4> &colour_in, TextureObject<uchar4> &colour_out, TextureObject<float> &contribs, cudaStream_t stream) { + const dim3 gridSize((colour_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(colour_in, colour_out, contribs); + cudaSafeCall( cudaGetLastError() ); +} diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8f6557b7f3e9d9ce99bceed615946c19b1afbec2 --- /dev/null +++ b/components/renderers/cpp/src/splatter_cuda.hpp @@ -0,0 +1,28 @@ +#ifndef _FTL_RECONSTRUCTION_SPLAT_CUDA_HPP_ +#define _FTL_RECONSTRUCTION_SPLAT_CUDA_HPP_ + +#include <ftl/cuda_common.hpp> +#include <ftl/render/splat_params.hpp> + +namespace ftl { +namespace cuda { + void dibr_merge(ftl::cuda::TextureObject<float4> &points, ftl::cuda::TextureObject<int> &depth, ftl::render::SplatParams params, cudaStream_t stream); + + void dibr_attribute( + ftl::cuda::TextureObject<uchar4> &colour_in, // Original colour image + ftl::cuda::TextureObject<float4> &points, // Original 3D points + ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map + ftl::cuda::TextureObject<float4> &colour_out, // Accumulated output + //TextureObject<float4> normal_out, + ftl::cuda::TextureObject<float> &contrib_out, + ftl::render::SplatParams ¶ms, cudaStream_t stream); + + void dibr_normalise( + ftl::cuda::TextureObject<float4> &colour_in, + ftl::cuda::TextureObject<uchar4> &colour_out, + ftl::cuda::TextureObject<float> &contribs, + cudaStream_t stream); +} +} + +#endif // _FTL_RECONSTRUCTION_SPLAT_CUDA_HPP_ diff --git a/components/rgbd-sources/CMakeLists.txt b/components/rgbd-sources/CMakeLists.txt index 3ac2e2de5f173a12bd7107d8d40b5d2eca3b4b5d..2b056d009a73dd7ee82adaedbf03bb98194d636f 100644 --- a/components/rgbd-sources/CMakeLists.txt +++ b/components/rgbd-sources/CMakeLists.txt @@ -4,6 +4,7 @@ set(RGBDSRC src/disparity.cpp src/source.cpp src/frame.cpp + src/frameset.cpp src/stereovideo.cpp src/middlebury_source.cpp src/net.cpp @@ -17,6 +18,7 @@ set(RGBDSRC src/cb_segmentation.cpp src/abr.cpp src/offilter.cpp + src/virtual.cpp ) if (HAVE_REALSENSE) diff --git a/components/rgbd-sources/include/ftl/rgbd/camera.hpp b/components/rgbd-sources/include/ftl/rgbd/camera.hpp index 8acad9c41d6c4950398cde7d9f44ab8ae0bc08b6..e245be1ea44f3e7e8503f585bf2c387fef821a38 100644 --- a/components/rgbd-sources/include/ftl/rgbd/camera.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/camera.hpp @@ -2,23 +2,69 @@ #ifndef _FTL_RGBD_CAMERA_PARAMS_HPP_ #define _FTL_RGBD_CAMERA_PARAMS_HPP_ +#include <vector_types.h> +#include <cuda_runtime.h> +#include <ftl/cuda_util.hpp> + namespace ftl{ namespace rgbd { -struct Camera { - double fx; - double fy; - double cx; - double cy; - unsigned int width; - unsigned int height; - double minDepth; - double maxDepth; - double baseline; - double doffs; +/** + * All properties associated with cameras. This structure is designed to + * operate on CPU and GPU. + */ +struct __align__(16) Camera { + double fx; // Focal length X + double fy; // Focal length Y (usually same as fx) + double cx; // Principle point Y + double cy; // Principle point Y + unsigned int width; // Pixel width + unsigned int height; // Pixel height + double minDepth; // Near clip in meters + double maxDepth; // Far clip in meters + double baseline; // For stereo pair + double doffs; // Disparity offset + + /** + * Convert camera coordinates into screen coordinates. + */ + template <typename T> __device__ T camToScreen(const float3 &pos) const; + + /** + * Convert screen plus depth into camera coordinates. + */ + __device__ float3 screenToCam(uint ux, uint uy, float depth) const; }; }; }; +// ---- IMPLEMENTATIONS -------------------------------------------------------- + +template <> __device__ +inline float2 ftl::rgbd::Camera::camToScreen<float2>(const float3 &pos) const { + return make_float2( + pos.x*fx/pos.z - cx, + pos.y*fy/pos.z - cy); +} + +template <> __device__ +inline int2 ftl::rgbd::Camera::camToScreen<int2>(const float3 &pos) const { + float2 pImage = camToScreen<float2>(pos); + return make_int2(pImage + make_float2(0.5f, 0.5f)); +} + +template <> __device__ +inline uint2 ftl::rgbd::Camera::camToScreen<uint2>(const float3 &pos) const { + int2 p = camToScreen<int2>(pos); + return make_uint2(p.x, p.y); +} + +__device__ +inline float3 ftl::rgbd::Camera::screenToCam(uint ux, uint uy, float depth) const { + const float x = ((float)ux+cx) / fx; + const float y = ((float)uy+cy) / fy; + return make_float3(depth*x, depth*y, depth); +} + #endif diff --git a/components/rgbd-sources/include/ftl/rgbd/channels.hpp b/components/rgbd-sources/include/ftl/rgbd/channels.hpp index 591e0e312b0c5a161dc40ed0d4966852d8781971..9bf731a5319fa47c501a91e09f1e2acc48c5a4a8 100644 --- a/components/rgbd-sources/include/ftl/rgbd/channels.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/channels.hpp @@ -9,16 +9,21 @@ namespace rgbd { enum struct Channel : int { None = -1, - Colour = 0, + Colour = 0, // 8UC3 or 8UC4 Left = 0, - Depth = 1, - Right, - Disparity, - Deviation, - Normals, - Confidence, - Flow, - Energy, + Depth = 1, // 32S or 32F + Right = 2, // 8UC3 or 8UC4 + Colour2 = 2, + Disparity = 3, + Depth2 = 3, + Deviation = 4, + Normals = 5, // 32FC4 + Points = 6, // 32FC4 + Confidence = 7, // 32F + Contribution = 7, // 32F + EnergyVector, // 32FC4 + Flow, // 32F + Energy, // 32F LeftGray, RightGray, Overlay1 @@ -26,6 +31,21 @@ enum struct Channel : int { class Channels { public: + + class iterator { + public: + iterator(const Channels &c, unsigned int ix) : channels_(c), ix_(ix) { } + iterator operator++(); + iterator operator++(int junk); + inline ftl::rgbd::Channel operator*() { return static_cast<Channel>(static_cast<int>(ix_)); } + //ftl::rgbd::Channel operator->() { return ptr_; } + inline bool operator==(const iterator& rhs) { return ix_ == rhs.ix_; } + inline bool operator!=(const iterator& rhs) { return ix_ != rhs.ix_; } + private: + const Channels &channels_; + unsigned int ix_; + }; + inline Channels() { mask = 0; } inline explicit Channels(unsigned int m) { mask = m; } inline explicit Channels(Channel c) { mask = (c == Channel::None) ? 0 : 0x1 << static_cast<unsigned int>(c); } @@ -46,6 +66,9 @@ class Channels { return mask & (0x1 << c); } + inline iterator begin() { return iterator(*this, 0); } + inline iterator end() { return iterator(*this, 32); } + inline operator unsigned int() { return mask; } inline operator bool() { return mask > 0; } inline operator Channel() { @@ -61,10 +84,19 @@ class Channels { static const size_t kMax = 32; + static Channels All(); + private: unsigned int mask; }; +inline Channels::iterator Channels::iterator::operator++() { Channels::iterator i = *this; while (++ix_ < 32 && !channels_.has(ix_)); return i; } +inline Channels::iterator Channels::iterator::operator++(int junk) { while (++ix_ < 32 && !channels_.has(ix_)); return *this; } + +inline Channels Channels::All() { + return Channels(0xFFFFFFFFu); +} + static const Channels kNoChannels; static const Channels kAllChannels(0xFFFFFFFFu); diff --git a/components/rgbd-sources/include/ftl/rgbd/frame.hpp b/components/rgbd-sources/include/ftl/rgbd/frame.hpp index 6574de3bf1db29be30d299fa6fe63c84c5827943..252ff271de36336938d51d616cdd5a9e87d52187 100644 --- a/components/rgbd-sources/include/ftl/rgbd/frame.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/frame.hpp @@ -6,6 +6,7 @@ #include <ftl/exception.hpp> #include <opencv2/core.hpp> #include <opencv2/core/cuda.hpp> +#include <opencv2/core/cuda_stream_accessor.hpp> #include <ftl/rgbd/channels.hpp> #include <ftl/rgbd/format.hpp> @@ -23,22 +24,38 @@ namespace rgbd { // NN for depth/disparity/optflow, linear/cubic/etc. for RGB class Frame; +class Source; /** * Manage a set of image channels corresponding to a single camera frame. */ class Frame { public: - Frame() {} + Frame() : src_(nullptr) {} + explicit Frame(ftl::rgbd::Source *src) : src_(src) {} + + inline ftl::rgbd::Source *source() const { return src_; } // Prevent frame copy, instead use a move. //Frame(const Frame &)=delete; //Frame &operator=(const Frame &)=delete; - void download(ftl::rgbd::Channel c, cv::cuda::Stream& stream=cv::cuda::Stream::Null()); - void upload(ftl::rgbd::Channel c, cv::cuda::Stream& stream=cv::cuda::Stream::Null()); - void download(ftl::rgbd::Channels c, cv::cuda::Stream& stream=cv::cuda::Stream::Null()); - void upload(ftl::rgbd::Channels c, cv::cuda::Stream& stream=cv::cuda::Stream::Null()); + void download(ftl::rgbd::Channel c, cv::cuda::Stream stream); + void upload(ftl::rgbd::Channel c, cv::cuda::Stream stream); + void download(ftl::rgbd::Channels c, cv::cuda::Stream stream); + void upload(ftl::rgbd::Channels c, cv::cuda::Stream stream); + + inline void download(ftl::rgbd::Channel c, cudaStream_t stream=0) { download(c, cv::cuda::StreamAccessor::wrapStream(stream)); }; + inline void upload(ftl::rgbd::Channel c, cudaStream_t stream=0) { upload(c, cv::cuda::StreamAccessor::wrapStream(stream)); }; + inline void download(ftl::rgbd::Channels c, cudaStream_t stream=0) { download(c, cv::cuda::StreamAccessor::wrapStream(stream)); }; + inline void upload(ftl::rgbd::Channels c, cudaStream_t stream=0) { upload(c, cv::cuda::StreamAccessor::wrapStream(stream)); }; + + /** + * Perform a buffer swap of the selected channels. This is intended to be + * a copy from `this` to the passed frame object but by buffer swap + * instead of memory copy, meaning `this` may become invalid afterwards. + */ + void swapTo(ftl::rgbd::Channels, Frame &); /** * Create a channel with a given format. This will discard any existing @@ -71,6 +88,13 @@ public: */ void reset(); + bool empty(ftl::rgbd::Channels c); + + inline bool empty(ftl::rgbd::Channel c) { + auto &m = _get(c); + return !hasChannel(c) || (m.host.empty() && m.gpu.empty()); + } + /** * Is there valid data in channel (either host or gpu). */ @@ -78,6 +102,8 @@ public: return channels_.has(channel); } + inline ftl::rgbd::Channels getChannels() const { return channels_; } + /** * Is the channel data currently located on GPU. This also returns false if * the channel does not exist. @@ -131,6 +157,8 @@ private: ftl::rgbd::Channels channels_; // Does it have a channel ftl::rgbd::Channels gpu_; // Is the channel on a GPU + ftl::rgbd::Source *src_; + inline ChannelData &_get(ftl::rgbd::Channel c) { return data_[static_cast<unsigned int>(c)]; } inline const ChannelData &_get(ftl::rgbd::Channel c) const { return data_[static_cast<unsigned int>(c)]; } }; @@ -216,7 +244,6 @@ ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::rgbd::Channel c) { LOG(INFO) << "Creating texture object"; m.tex = ftl::cuda::TextureObject<T>(m.gpu); } else if (m.tex.cvType() != ftl::traits::OpenCVType<T>::value || m.tex.width() != m.gpu.cols || m.tex.height() != m.gpu.rows || m.tex.devicePtr() != m.gpu.data) { - LOG(INFO) << "Recreating texture object"; m.tex.free(); m.tex = ftl::cuda::TextureObject<T>(m.gpu); } diff --git a/components/rgbd-sources/include/ftl/rgbd/frameset.hpp b/components/rgbd-sources/include/ftl/rgbd/frameset.hpp index 93839efda9c6dc8cfe53e6775293949d5c1747e8..2fa39e2eacf19339860e98fa98df44f687ac64c7 100644 --- a/components/rgbd-sources/include/ftl/rgbd/frameset.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/frameset.hpp @@ -1,6 +1,7 @@ #ifndef _FTL_RGBD_FRAMESET_HPP_ #define _FTL_RGBD_FRAMESET_HPP_ +#include <ftl/threads.hpp> #include <ftl/rgbd/frame.hpp> #include <opencv2/opencv.hpp> @@ -24,6 +25,10 @@ struct FrameSet { std::atomic<unsigned int> mask; // Mask of all sources that contributed bool stale; // True if buffers have been invalidated SHARED_MUTEX mtx; + + void upload(ftl::rgbd::Channels, cudaStream_t stream=0); + void download(ftl::rgbd::Channels, cudaStream_t stream=0); + void swapTo(ftl::rgbd::FrameSet &); }; } diff --git a/components/rgbd-sources/include/ftl/rgbd/source.hpp b/components/rgbd-sources/include/ftl/rgbd/source.hpp index 07391d76f10f4eec69f4a7df97123dfc59270a0c..0ee163add0009023ec24e6df6bd18a1da927af1e 100644 --- a/components/rgbd-sources/include/ftl/rgbd/source.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/source.hpp @@ -26,6 +26,7 @@ namespace rgbd { static inline bool isValidDepth(float d) { return (d > 0.01f) && (d < 39.99f); } class SnapshotReader; +class VirtualSource; /** * RGBD Generic data source configurable entity. This class hides the @@ -40,6 +41,7 @@ class Source : public ftl::Configurable { public: template <typename T, typename... ARGS> friend T *ftl::config::create(ftl::config::json_t &, ARGS ...); + friend class VirtualSource; //template <typename T, typename... ARGS> //friend T *ftl::config::create(ftl::Configurable *, const std::string &, ARGS ...); @@ -51,11 +53,11 @@ class Source : public ftl::Configurable { Source(const Source&)=delete; Source &operator=(const Source&) =delete; - private: + protected: explicit Source(ftl::config::json_t &cfg); Source(ftl::config::json_t &cfg, ftl::rgbd::SnapshotReader *); Source(ftl::config::json_t &cfg, ftl::net::Universe *net); - ~Source(); + virtual ~Source(); public: /** @@ -121,17 +123,6 @@ class Source : public ftl::Configurable { */ void getDepth(cv::Mat &d); - /** - * Write frames into source buffers from an external renderer. Virtual - * sources do not have an internal generator of frames but instead have - * their data provided from an external rendering class. This function only - * works when there is no internal generator. - */ - void writeFrames(int64_t ts, const cv::Mat &rgb, const cv::Mat &depth); - void writeFrames(int64_t ts, const ftl::cuda::TextureObject<uchar4> &rgb, const ftl::cuda::TextureObject<uint> &depth, cudaStream_t stream); - void writeFrames(int64_t ts, const ftl::cuda::TextureObject<uchar4> &rgb, const ftl::cuda::TextureObject<float> &depth, cudaStream_t stream); - void writeFrames(int64_t ts, const ftl::cuda::TextureObject<uchar4> &rgb, const ftl::cuda::TextureObject<uchar4> &rgb2, cudaStream_t stream); - int64_t timestamp() const { return timestamp_; } /** @@ -214,7 +205,7 @@ class Source : public ftl::Configurable { void removeCallback() { callback_ = nullptr; } - private: + protected: detail::Source *impl_; cv::Mat rgb_; cv::Mat depth_; diff --git a/components/rgbd-sources/include/ftl/rgbd/streamer.hpp b/components/rgbd-sources/include/ftl/rgbd/streamer.hpp index 3f6f1c4fa35bfcb4a97877a2d46d53230e2acbec..7c6e6f479afe022cdacefbabf9098e276e0c9f79 100644 --- a/components/rgbd-sources/include/ftl/rgbd/streamer.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/streamer.hpp @@ -117,6 +117,8 @@ class Streamer : public ftl::Configurable { void wait(); + void setLatency(int l) { group_.setLatency(l); } + /** * Alternative to calling run(), it will operate a single frame capture, * compress and stream cycle. diff --git a/components/rgbd-sources/include/ftl/rgbd/virtual.hpp b/components/rgbd-sources/include/ftl/rgbd/virtual.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ed3530258d64b14427802f8e56375635ab26a24a --- /dev/null +++ b/components/rgbd-sources/include/ftl/rgbd/virtual.hpp @@ -0,0 +1,30 @@ +#ifndef _FTL_RGBD_VIRTUAL_HPP_ +#define _FTL_RGBD_VIRTUAL_HPP_ + +#include <ftl/rgbd/source.hpp> + +namespace ftl { +namespace rgbd { + +class VirtualSource : public ftl::rgbd::Source { + public: + explicit VirtualSource(ftl::config::json_t &cfg); + ~VirtualSource(); + + void onRender(const std::function<void(ftl::rgbd::Frame &)> &); + + void setTimestamp(int64_t ts) { timestamp_ = ts; } + + /** + * Write frames into source buffers from an external renderer. Virtual + * sources do not have an internal generator of frames but instead have + * their data provided from an external rendering class. This function only + * works when there is no internal generator. + */ + //void write(int64_t ts, ftl::rgbd::Frame &frame, cudaStream_t stream=0); +}; + +} +} + +#endif // _FTL_RGBD_VIRTUAL_HPP_ diff --git a/components/rgbd-sources/src/frame.cpp b/components/rgbd-sources/src/frame.cpp index 4886ff3cbde3b4835b01ef71339fcf15fc6f16b2..a56a19355526d9cdcdf25faaecdc67c05d09469d 100644 --- a/components/rgbd-sources/src/frame.cpp +++ b/components/rgbd-sources/src/frame.cpp @@ -13,15 +13,15 @@ void Frame::reset() { gpu_.clear(); } -void Frame::download(Channel c, cv::cuda::Stream& stream) { +void Frame::download(Channel c, cv::cuda::Stream stream) { download(Channels(c), stream); } -void Frame::upload(Channel c, cv::cuda::Stream& stream) { +void Frame::upload(Channel c, cv::cuda::Stream stream) { upload(Channels(c), stream); } -void Frame::download(Channels c, cv::cuda::Stream& stream) { +void Frame::download(Channels c, cv::cuda::Stream stream) { for (size_t i=0u; i<Channels::kMax; ++i) { if (c.has(i) && channels_.has(i) && gpu_.has(i)) { data_[i].gpu.download(data_[i].host, stream); @@ -30,7 +30,7 @@ void Frame::download(Channels c, cv::cuda::Stream& stream) { } } -void Frame::upload(Channels c, cv::cuda::Stream& stream) { +void Frame::upload(Channels c, cv::cuda::Stream stream) { for (size_t i=0u; i<Channels::kMax; ++i) { if (c.has(i) && channels_.has(i) && !gpu_.has(i)) { data_[i].gpu.upload(data_[i].host, stream); @@ -39,6 +39,41 @@ void Frame::upload(Channels c, cv::cuda::Stream& stream) { } } +bool Frame::empty(ftl::rgbd::Channels channels) { + for (auto c : channels) { + if (empty(c)) return true; + } + return false; +} + +void Frame::swapTo(ftl::rgbd::Channels channels, Frame &f) { + f.reset(); + + // For all channels in this frame object + for (auto c : channels_) { + // Should we swap this channel? + if (channels.has(c)) { + // Does 'f' have this channel? + //if (!f.hasChannel(c)) { + // No, so create it first + // FIXME: Allocate the memory as well? + if (isCPU(c)) f.create<cv::Mat>(c); + else f.create<cv::cuda::GpuMat>(c); + //} + + auto &m1 = _get(c); + auto &m2 = f._get(c); + + cv::swap(m1.host, m2.host); + cv::cuda::swap(m1.gpu, m2.gpu); + + auto temptex = std::move(m2.tex); + m2.tex = std::move(m1.tex); + m1.tex = std::move(temptex); + } + } +} + template<> cv::Mat& Frame::get(ftl::rgbd::Channel channel) { if (channel == Channel::None) { DLOG(WARNING) << "Cannot get the None channel from a Frame"; diff --git a/components/rgbd-sources/src/frameset.cpp b/components/rgbd-sources/src/frameset.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9b9a807d8599c23141b6c3806546bf8038ef30f9 --- /dev/null +++ b/components/rgbd-sources/src/frameset.cpp @@ -0,0 +1,39 @@ +#include <ftl/rgbd/frameset.hpp> + +using ftl::rgbd::FrameSet; +using ftl::rgbd::Channels; +using ftl::rgbd::Channel; + +void FrameSet::upload(ftl::rgbd::Channels c, cudaStream_t stream) { + for (auto &f : frames) { + f.upload(c, stream); + } +} + +void FrameSet::download(ftl::rgbd::Channels c, cudaStream_t stream) { + for (auto &f : frames) { + f.download(c, stream); + } +} + +void FrameSet::swapTo(ftl::rgbd::FrameSet &fs) { + UNIQUE_LOCK(fs.mtx, lk); + + //if (fs.frames.size() != frames.size()) { + // Assume "this" is correct and "fs" is not. + fs.sources.clear(); + for (auto s : sources) fs.sources.push_back(s); + fs.frames.resize(frames.size()); + //} + + fs.timestamp = timestamp; + fs.count = static_cast<int>(count); + fs.stale = stale; + fs.mask = static_cast<unsigned int>(mask); + + for (size_t i=0; i<frames.size(); ++i) { + frames[i].swapTo(Channels::All(), fs.frames[i]); + } + + stale = true; +} diff --git a/components/rgbd-sources/src/source.cpp b/components/rgbd-sources/src/source.cpp index d33473fc38fbf55670e8b8844ffa5a03487e606d..35d23f27ad7edac18d3e3e02247296f1382be5e2 100644 --- a/components/rgbd-sources/src/source.cpp +++ b/components/rgbd-sources/src/source.cpp @@ -273,71 +273,6 @@ bool Source::compute(int N, int B) { return false; } -void Source::writeFrames(int64_t ts, const cv::Mat &rgb, const cv::Mat &depth) { - if (!impl_) { - UNIQUE_LOCK(mutex_,lk); - rgb.copyTo(rgb_); - depth.copyTo(depth_); - timestamp_ = ts; - } -} - -void Source::writeFrames(int64_t ts, const ftl::cuda::TextureObject<uchar4> &rgb, const ftl::cuda::TextureObject<uint> &depth, cudaStream_t stream) { - if (!impl_) { - UNIQUE_LOCK(mutex_,lk); - timestamp_ = ts; - rgb_.create(rgb.height(), rgb.width(), CV_8UC4); - cudaSafeCall(cudaMemcpy2DAsync(rgb_.data, rgb_.step, rgb.devicePtr(), rgb.pitch(), rgb_.cols * sizeof(uchar4), rgb_.rows, cudaMemcpyDeviceToHost, stream)); - depth_.create(depth.height(), depth.width(), CV_32SC1); - cudaSafeCall(cudaMemcpy2DAsync(depth_.data, depth_.step, depth.devicePtr(), depth.pitch(), depth_.cols * sizeof(uint), depth_.rows, cudaMemcpyDeviceToHost, stream)); - //cudaSafeCall(cudaStreamSynchronize(stream)); // TODO:(Nick) Don't wait here. - stream_ = stream; - //depth_.convertTo(depth_, CV_32F, 1.0f / 1000.0f); - } else { - LOG(ERROR) << "writeFrames cannot be done on this source: " << getURI(); - } -} - -void Source::writeFrames(int64_t ts, const ftl::cuda::TextureObject<uchar4> &rgb, const ftl::cuda::TextureObject<float> &depth, cudaStream_t stream) { - if (!impl_) { - UNIQUE_LOCK(mutex_,lk); - timestamp_ = ts; - rgb.download(rgb_, stream); - //rgb_.create(rgb.height(), rgb.width(), CV_8UC4); - //cudaSafeCall(cudaMemcpy2DAsync(rgb_.data, rgb_.step, rgb.devicePtr(), rgb.pitch(), rgb_.cols * sizeof(uchar4), rgb_.rows, cudaMemcpyDeviceToHost, stream)); - depth.download(depth_, stream); - //depth_.create(depth.height(), depth.width(), CV_32FC1); - //cudaSafeCall(cudaMemcpy2DAsync(depth_.data, depth_.step, depth.devicePtr(), depth.pitch(), depth_.cols * sizeof(float), depth_.rows, cudaMemcpyDeviceToHost, stream)); - - stream_ = stream; - cudaSafeCall(cudaStreamSynchronize(stream_)); - cv::cvtColor(rgb_,rgb_, cv::COLOR_BGRA2BGR); - cv::cvtColor(rgb_,rgb_, cv::COLOR_Lab2BGR); - - if (callback_) callback_(timestamp_, rgb_, depth_); - } -} - -void Source::writeFrames(int64_t ts, const ftl::cuda::TextureObject<uchar4> &rgb, const ftl::cuda::TextureObject<uchar4> &rgb2, cudaStream_t stream) { - if (!impl_) { - UNIQUE_LOCK(mutex_,lk); - timestamp_ = ts; - rgb.download(rgb_, stream); - //rgb_.create(rgb.height(), rgb.width(), CV_8UC4); - //cudaSafeCall(cudaMemcpy2DAsync(rgb_.data, rgb_.step, rgb.devicePtr(), rgb.pitch(), rgb_.cols * sizeof(uchar4), rgb_.rows, cudaMemcpyDeviceToHost, stream)); - rgb2.download(depth_, stream); - //depth_.create(depth.height(), depth.width(), CV_32FC1); - //cudaSafeCall(cudaMemcpy2DAsync(depth_.data, depth_.step, depth.devicePtr(), depth.pitch(), depth_.cols * sizeof(float), depth_.rows, cudaMemcpyDeviceToHost, stream)); - - stream_ = stream; - cudaSafeCall(cudaStreamSynchronize(stream_)); - cv::cvtColor(rgb_,rgb_, cv::COLOR_BGRA2BGR); - cv::cvtColor(rgb_,rgb_, cv::COLOR_Lab2BGR); - cv::cvtColor(depth_,depth_, cv::COLOR_BGRA2BGR); - cv::cvtColor(depth_,depth_, cv::COLOR_Lab2BGR); - } -} - bool Source::thumbnail(cv::Mat &t) { if (!impl_ && stream_ != 0) { cudaSafeCall(cudaStreamSynchronize(stream_)); diff --git a/components/rgbd-sources/src/streamer.cpp b/components/rgbd-sources/src/streamer.cpp index a1019cb8e6ff990ce8cca20afaf8d59cc40571c6..7a9118c9f47975a31d6389982b2adb818ed8a046 100644 --- a/components/rgbd-sources/src/streamer.cpp +++ b/components/rgbd-sources/src/streamer.cpp @@ -47,7 +47,7 @@ Streamer::Streamer(nlohmann::json &config, Universe *net) hq_devices_ = (value("disable_hardware_encode", false)) ? device_t::Software : device_t::Any; //group_.setFPS(value("fps", 20)); - group_.setLatency(10); + group_.setLatency(4); compress_level_ = value("compression", 1); diff --git a/components/rgbd-sources/src/virtual.cpp b/components/rgbd-sources/src/virtual.cpp new file mode 100644 index 0000000000000000000000000000000000000000..62b404155e1f302111e99183347d87c9dbda3a0b --- /dev/null +++ b/components/rgbd-sources/src/virtual.cpp @@ -0,0 +1,138 @@ +#include <ftl/rgbd/virtual.hpp> + +using ftl::rgbd::VirtualSource; +using ftl::rgbd::Source; +using ftl::rgbd::Channel; + +class VirtualImpl : public ftl::rgbd::detail::Source { + public: + explicit VirtualImpl(ftl::rgbd::Source *host, const ftl::rgbd::Camera ¶ms) : ftl::rgbd::detail::Source(host) { + params_ = params; + capabilities_ = ftl::rgbd::kCapMovable | ftl::rgbd::kCapVideo | ftl::rgbd::kCapStereo; + } + + ~VirtualImpl() { + + } + + bool capture(int64_t ts) override { + timestamp_ = ts; + return true; + } + + bool retrieve() override { + return true; + } + + bool compute(int n, int b) override { + if (callback) { + frame.reset(); + + try { + callback(frame); + } catch (std::exception &e) { + LOG(ERROR) << "Exception in render callback: " << e.what(); + } catch (...) { + LOG(ERROR) << "Unknown exception in render callback"; + } + + if (frame.hasChannel(Channel::Colour) && frame.hasChannel(Channel::Depth)) { + frame.download(Channel::Colour + Channel::Depth); + cv::swap(frame.get<cv::Mat>(Channel::Colour), rgb_); + cv::swap(frame.get<cv::Mat>(Channel::Depth), depth_); + LOG(INFO) << "Written: " << rgb_.cols; + } else { + LOG(ERROR) << "Missing colour or depth frame in rendering"; + } + + auto cb = host_->callback(); + if (cb) cb(timestamp_, rgb_, depth_); + } + return true; + } + + bool isReady() override { return true; } + + std::function<void(ftl::rgbd::Frame &)> callback; + ftl::rgbd::Frame frame; +}; + +VirtualSource::VirtualSource(ftl::config::json_t &cfg) : Source(cfg) { + auto params = params_; + impl_ = new VirtualImpl(this, params); +} + +VirtualSource::~VirtualSource() { + +} + +void VirtualSource::onRender(const std::function<void(ftl::rgbd::Frame &)> &f) { + dynamic_cast<VirtualImpl*>(impl_)->callback = f; +} + +/* +void Source::writeFrames(int64_t ts, const cv::Mat &rgb, const cv::Mat &depth) { + if (!impl_) { + UNIQUE_LOCK(mutex_,lk); + rgb.copyTo(rgb_); + depth.copyTo(depth_); + timestamp_ = ts; + } +} + +void Source::writeFrames(int64_t ts, const ftl::cuda::TextureObject<uchar4> &rgb, const ftl::cuda::TextureObject<uint> &depth, cudaStream_t stream) { + if (!impl_) { + UNIQUE_LOCK(mutex_,lk); + timestamp_ = ts; + rgb_.create(rgb.height(), rgb.width(), CV_8UC4); + cudaSafeCall(cudaMemcpy2DAsync(rgb_.data, rgb_.step, rgb.devicePtr(), rgb.pitch(), rgb_.cols * sizeof(uchar4), rgb_.rows, cudaMemcpyDeviceToHost, stream)); + depth_.create(depth.height(), depth.width(), CV_32SC1); + cudaSafeCall(cudaMemcpy2DAsync(depth_.data, depth_.step, depth.devicePtr(), depth.pitch(), depth_.cols * sizeof(uint), depth_.rows, cudaMemcpyDeviceToHost, stream)); + //cudaSafeCall(cudaStreamSynchronize(stream)); // TODO:(Nick) Don't wait here. + stream_ = stream; + //depth_.convertTo(depth_, CV_32F, 1.0f / 1000.0f); + } else { + LOG(ERROR) << "writeFrames cannot be done on this source: " << getURI(); + } +} + +void Source::writeFrames(int64_t ts, const ftl::cuda::TextureObject<uchar4> &rgb, const ftl::cuda::TextureObject<float> &depth, cudaStream_t stream) { + if (!impl_) { + UNIQUE_LOCK(mutex_,lk); + timestamp_ = ts; + rgb.download(rgb_, stream); + //rgb_.create(rgb.height(), rgb.width(), CV_8UC4); + //cudaSafeCall(cudaMemcpy2DAsync(rgb_.data, rgb_.step, rgb.devicePtr(), rgb.pitch(), rgb_.cols * sizeof(uchar4), rgb_.rows, cudaMemcpyDeviceToHost, stream)); + depth.download(depth_, stream); + //depth_.create(depth.height(), depth.width(), CV_32FC1); + //cudaSafeCall(cudaMemcpy2DAsync(depth_.data, depth_.step, depth.devicePtr(), depth.pitch(), depth_.cols * sizeof(float), depth_.rows, cudaMemcpyDeviceToHost, stream)); + + stream_ = stream; + cudaSafeCall(cudaStreamSynchronize(stream_)); + cv::cvtColor(rgb_,rgb_, cv::COLOR_BGRA2BGR); + cv::cvtColor(rgb_,rgb_, cv::COLOR_Lab2BGR); + + if (callback_) callback_(timestamp_, rgb_, depth_); + } +} + +void Source::writeFrames(int64_t ts, const ftl::cuda::TextureObject<uchar4> &rgb, const ftl::cuda::TextureObject<uchar4> &rgb2, cudaStream_t stream) { + if (!impl_) { + UNIQUE_LOCK(mutex_,lk); + timestamp_ = ts; + rgb.download(rgb_, stream); + //rgb_.create(rgb.height(), rgb.width(), CV_8UC4); + //cudaSafeCall(cudaMemcpy2DAsync(rgb_.data, rgb_.step, rgb.devicePtr(), rgb.pitch(), rgb_.cols * sizeof(uchar4), rgb_.rows, cudaMemcpyDeviceToHost, stream)); + rgb2.download(depth_, stream); + //depth_.create(depth.height(), depth.width(), CV_32FC1); + //cudaSafeCall(cudaMemcpy2DAsync(depth_.data, depth_.step, depth.devicePtr(), depth.pitch(), depth_.cols * sizeof(float), depth_.rows, cudaMemcpyDeviceToHost, stream)); + + stream_ = stream; + cudaSafeCall(cudaStreamSynchronize(stream_)); + cv::cvtColor(rgb_,rgb_, cv::COLOR_BGRA2BGR); + cv::cvtColor(rgb_,rgb_, cv::COLOR_Lab2BGR); + cv::cvtColor(depth_,depth_, cv::COLOR_BGRA2BGR); + cv::cvtColor(depth_,depth_, cv::COLOR_Lab2BGR); + } +} +*/ \ No newline at end of file diff --git a/components/rgbd-sources/test/frame_unit.cpp b/components/rgbd-sources/test/frame_unit.cpp index 1d1648b2139440f08c62379bffc6f2dec356112d..6ad528a28fd677e207b05362c0021f7d302784dc 100644 --- a/components/rgbd-sources/test/frame_unit.cpp +++ b/components/rgbd-sources/test/frame_unit.cpp @@ -3,6 +3,7 @@ using ftl::rgbd::Frame; using ftl::rgbd::Channel; +using ftl::rgbd::Channels; using ftl::rgbd::Format; TEST_CASE("Frame::create() cpu mat", "") { @@ -267,3 +268,16 @@ TEST_CASE("Frame::getTexture()", "") { REQUIRE( !hadexception ); } } + +TEST_CASE("Frame::swapTo()", "") { + SECTION("Single host channel to empty frame") { + Frame f1; + Frame f2; + + f1.create<cv::Mat>(Channel::Colour, Format<uchar3>(100,100)); + f1.swapTo(Channels::All(), f2); + + REQUIRE( f2.hasChannel(Channel::Colour) ); + REQUIRE( (f2.get<cv::Mat>(Channel::Colour).cols == 100) ); + } +} diff --git a/components/scene-sources/include/ftl/scene/framescene.hpp b/components/scene-sources/include/ftl/scene/framescene.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0235a60331899ca125514e0905c845bafd1e466c --- /dev/null +++ b/components/scene-sources/include/ftl/scene/framescene.hpp @@ -0,0 +1,28 @@ +#ifndef _FTL_SCENE_FRAMESCENE_HPP_ +#define _FTL_SCENE_FRAMESCENE_HPP_ + +#include <ftl/scene/scene.hpp> + +namespace ftl { +namespace scene { + +/** + * A scene represented internally as a set of image frames that together + * define a point cloud. + */ +class FrameScene : public ftl::scene::Scene { + public: + FrameScene(); + ~FrameScene(); + + bool update(ftl::rgbd::FrameSet &); + + bool render(ftl::rgbd::Source *, ftl::rgbd::Frame &); + bool encode(std::vector<uint8_t> &); + bool decode(const std::vector<uint8_t> &); +}; + +} +} + +#endif // _FTL_SCENE_FRAMESCENE_HPP_ diff --git a/components/scene-sources/include/ftl/scene/scene.hpp b/components/scene-sources/include/ftl/scene/scene.hpp new file mode 100644 index 0000000000000000000000000000000000000000..856819cf403982d54ea3fd12a3cd195b9d437919 --- /dev/null +++ b/components/scene-sources/include/ftl/scene/scene.hpp @@ -0,0 +1,21 @@ +#ifndef _FTL_RECONSTRUCT_SCENE_HPP_ +#define _FTL_RECONSTRUCT_SCENE_HPP_ + +namespace ftl { +namespace scene { + +class Scene { + public: + Scene(); + virtual ~Scene(); + + virtual bool render(ftl::rgbd::Source *, ftl::rgbd::Frame &)=0; + + virtual bool encode(std::vector<uint8_t> &)=0; + virtual bool decode(const std::vector<uint8_t> &)=0; +}; + +} // scene +} // ftl + +#endif // _FTL_RECONSTRUCT_SCENE_HPP_