diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt index ae06aab6d142ba8a2ad3a1e0970905e3a70468e9..1e55c671c87487b995b0dd772495ff8a612a8c78 100644 --- a/applications/reconstruct/CMakeLists.txt +++ b/applications/reconstruct/CMakeLists.txt @@ -10,7 +10,7 @@ set(REPSRC src/garbage.cu src/integrators.cu #src/ray_cast_sdf.cu - src/splat_render.cu + src/voxel_render.cu src/camera_util.cu src/voxel_hash.cu src/voxel_hash.cpp @@ -19,6 +19,7 @@ set(REPSRC #src/virtual_source.cpp src/splat_render.cpp src/dibr.cu + src/mls.cu src/depth_camera.cu src/depth_camera.cpp ) diff --git a/applications/reconstruct/include/ftl/cuda_operators.hpp b/applications/reconstruct/include/ftl/cuda_operators.hpp index b7a3b44426bf6fcf490af67f38461ca849ade428..ae85cfbd785fb72f79225ae28278bf4862b680db 100644 --- a/applications/reconstruct/include/ftl/cuda_operators.hpp +++ b/applications/reconstruct/include/ftl/cuda_operators.hpp @@ -406,6 +406,12 @@ inline __host__ __device__ float length(float3 v) return sqrtf(dot(v, v)); } +// length squared +inline __host__ __device__ float length2(const float3 &v) +{ + return dot(v, v); +} + // normalize inline __host__ __device__ float3 normalize(float3 v) { diff --git a/applications/reconstruct/src/depth_camera.cu b/applications/reconstruct/src/depth_camera.cu index 9a36ea452225c1a174d0ba6ced0baf98471688eb..7a322eaf733230772e72c50722e849335bf7e891 100644 --- a/applications/reconstruct/src/depth_camera.cu +++ b/applications/reconstruct/src/depth_camera.cu @@ -47,6 +47,21 @@ void ftl::cuda::clear_depth(const ftl::cuda::TextureObject<int> &depth, cudaStre clear_depth_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth); } +__global__ void clear_to_zero_kernel(ftl::cuda::TextureObject<float> depth) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < depth.width() && y < depth.height()) { + depth(x,y) = 0.0f; //PINF; + } +} + +void ftl::cuda::clear_to_zero(const ftl::cuda::TextureObject<float> &depth, cudaStream_t stream) { + const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK); + clear_to_zero_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth); +} + __global__ void clear_points_kernel(ftl::cuda::TextureObject<float4> depth) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; @@ -79,6 +94,21 @@ void ftl::cuda::clear_colour(const ftl::cuda::TextureObject<uchar4> &depth, cuda clear_colour_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth); } +__global__ void clear_colour_kernel(ftl::cuda::TextureObject<float4> depth) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < depth.width() && y < depth.height()) { + depth(x,y) = make_float4(0.0f); + } +} + +void ftl::cuda::clear_colour(const ftl::cuda::TextureObject<float4> &depth, cudaStream_t stream) { + const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK); + clear_colour_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth); +} + // ===== Type convert ===== template <typename A, typename B> @@ -103,331 +133,6 @@ void ftl::cuda::int_to_float(const ftl::cuda::TextureObject<int> &in, ftl::cuda: convert_kernel<int,float><<<gridSize, blockSize, 0, stream>>>(in, out, scale); } -/// ===== MLS Smooth - -// TODO:(Nick) Put this in a common location (used in integrators.cu) -extern __device__ float spatialWeighting(float r); -extern __device__ float spatialWeighting(float r, float h); - -/* - * Kim, K., Chalidabhongse, T. H., Harwood, D., & Davis, L. (2005). - * Real-time foreground-background segmentation using codebook model. - * Real-Time Imaging. https://doi.org/10.1016/j.rti.2004.12.004 - */ - __device__ float colordiffFloat(const uchar4 &pa, const uchar4 &pb) { - const float x_2 = pb.x * pb.x + pb.y * pb.y + pb.z * pb.z; - const float v_2 = pa.x * pa.x + pa.y * pa.y + pa.z * pa.z; - const float xv_2 = pow(pb.x * pa.x + pb.y * pa.y + pb.z * pa.z, 2); - const float p_2 = xv_2 / v_2; - return sqrt(x_2 - p_2); -} - -__device__ float colordiffFloat2(const uchar4 &pa, const uchar4 &pb) { - float3 delta = make_float3((float)pa.x - (float)pb.x, (float)pa.y - (float)pb.y, (float)pa.z - (float)pb.z); - return length(delta); -} - -/* - * Colour weighting as suggested in: - * C. Kuster et al. Spatio-Temporal Geometry Fusion for Multiple Hybrid Cameras using Moving Least Squares Surfaces. 2014. - * c = colour distance - */ - __device__ float colourWeighting(float c) { - const float h = c_hashParams.m_colourSmoothing; - if (c >= h) return 0.0f; - float ch = c / h; - ch = 1.0f - ch*ch; - return ch*ch*ch*ch; -} - -#define WINDOW_RADIUS 5 - -__device__ float mlsCamera(int cam, const float3 &mPos, uchar4 c1, float3 &wpos) { - const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - - const float3 pf = camera.poseInverse * mPos; - float3 pos = make_float3(0.0f, 0.0f, 0.0f); - const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf)); - float weights = 0.0f; - - //#pragma unroll - for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) { - for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) { - //if (screenPos.x+u < width && screenPos.y+v < height) { //on screen - float depth = tex2D<float>(camera.depth, screenPos.x+u, screenPos.y+v); - const float3 camPos = camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth); - float weight = spatialWeighting(length(pf - camPos)); - - if (weight > 0.0f) { - uchar4 c2 = tex2D<uchar4>(camera.colour, screenPos.x+u, screenPos.y+v); - weight *= colourWeighting(colordiffFloat2(c1,c2)); - - if (weight > 0.0f) { - wpos += weight* (camera.pose * camPos); - weights += weight; - } - } - //} - } - } - - //wpos += (camera.pose * pos); - - return weights; -} - -__device__ float mlsCameraNoColour(int cam, const float3 &mPos, uchar4 c1, const float4 &norm, float3 &wpos, float h) { - const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - - const float3 pf = camera.poseInverse * mPos; - float3 pos = make_float3(0.0f, 0.0f, 0.0f); - const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf)); - float weights = 0.0f; - - //#pragma unroll - for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) { - for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) { - //if (screenPos.x+u < width && screenPos.y+v < height) { //on creen - float depth = tex2D<float>(camera.depth, screenPos.x+u, screenPos.y+v); - const float3 camPos = camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth); - - // TODO:(Nick) dot product of normals < 0 means the point - // should be ignored with a weight of 0 since it is facing the wrong direction - // May be good to simply weight using the dot product to give - // a stronger weight to those whose normals are closer - - float weight = spatialWeighting(length(pf - camPos), h); - - if (weight > 0.0f) { - float4 n2 = tex2D<float4>(camera.normal, screenPos.x+u, screenPos.y+v); - if (dot(make_float3(norm), make_float3(n2)) > 0.0f) { - - uchar4 c2 = tex2D<uchar4>(camera.colour, screenPos.x+u, screenPos.y+v); - - if (colourWeighting(colordiffFloat2(c1,c2)) > 0.0f) { - pos += weight*camPos; // (camera.pose * camPos); - weights += weight; - } - } - } - //} - } - } - - if (weights > 0.0f) wpos += (camera.pose * (pos / weights)) * weights; - - return weights; -} - -__device__ float mlsCameraBest(int cam, const float3 &mPos, uchar4 c1, float3 &wpos) { - const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - - const float3 pf = camera.poseInverse * mPos; - float3 pos = make_float3(0.0f, 0.0f, 0.0f); - const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf)); - float weights = 0.0f; - - //#pragma unroll - for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) { - for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) { - //if (screenPos.x+u < width && screenPos.y+v < height) { //on screen - float depth = tex2D<float>(camera.depth, screenPos.x+u, screenPos.y+v); - const float3 camPos = camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth); - float weight = spatialWeighting(length(pf - camPos)); - - if (weight > 0.0f) { - uchar4 c2 = tex2D<uchar4>(camera.colour, screenPos.x+u, screenPos.y+v); - weight *= colourWeighting(colordiffFloat2(c1,c2)); - - if (weight > weights) { - pos = weight* (camera.pose * camPos); - weights = weight; - } - } - //} - } - } - - wpos += pos; - //wpos += (camera.pose * pos); - - return weights; -} - -__device__ float mlsCameraPoint(int cam, const float3 &mPos, uchar4 c1, float3 &wpos) { - const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - - const float3 pf = camera.poseInverse * mPos; - float3 pos = make_float3(0.0f, 0.0f, 0.0f); - const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf)); - float weights = 0.0f; - - - //float depth = tex2D<float>(camera.depth, screenPos.x, screenPos.y); - const float3 worldPos = make_float3(tex2D<float4>(camera.points, screenPos.x, screenPos.y)); - if (worldPos.z == MINF) return 0.0f; - - float weight = spatialWeighting(length(mPos - worldPos)); - - if (weight > 0.0f) { - wpos += weight* (worldPos); - weights += weight; - } - - return weights; -} - -__global__ void mls_smooth_kernel(ftl::cuda::TextureObject<float4> output, HashParams hashParams, int numcams, int cam) { - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - const int width = output.width(); - const int height = output.height(); - - const DepthCameraCUDA &mainCamera = c_cameras[cam]; - - if (x < width && y < height) { - - const float depth = tex2D<float>(mainCamera.depth, x, y); - const uchar4 c1 = tex2D<uchar4>(mainCamera.colour, x, y); - const float4 norm = tex2D<float4>(mainCamera.normal, x, y); - //if (x == 400 && y == 200) printf("NORMX: %f\n", norm.x); - - float3 wpos = make_float3(0.0f); - float3 wnorm = make_float3(0.0f); - float weights = 0.0f; - - if (depth >= mainCamera.params.m_sensorDepthWorldMin && depth <= mainCamera.params.m_sensorDepthWorldMax) { - float3 mPos = mainCamera.pose * mainCamera.params.kinectDepthToSkeleton(x, y, depth); - - if ((!(hashParams.m_flags & ftl::voxhash::kFlagClipping)) || (mPos.x > hashParams.m_minBounds.x && mPos.x < hashParams.m_maxBounds.x && - mPos.y > hashParams.m_minBounds.y && mPos.y < hashParams.m_maxBounds.y && - mPos.z > hashParams.m_minBounds.z && mPos.z < hashParams.m_maxBounds.z)) { - - if (hashParams.m_flags & ftl::voxhash::kFlagMLS) { - for (uint cam2=0; cam2<numcams; ++cam2) { - //if (cam2 == cam) weights += mlsCameraNoColour(cam2, mPos, c1, wpos, c_hashParams.m_spatialSmoothing*0.1f); //weights += 0.5*mlsCamera(cam2, mPos, c1, wpos); - weights += mlsCameraNoColour(cam2, mPos, c1, norm, wpos, c_hashParams.m_spatialSmoothing); //*((cam == cam2)? 0.1f : 5.0f)); - - // Previous approach - //if (cam2 == cam) continue; - //weights += mlsCameraBest(cam2, mPos, c1, wpos); - } - wpos /= weights; - } else { - weights = 1000.0f; - wpos = mPos; - } - - //output(x,y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF); - - if (weights >= hashParams.m_confidenceThresh) output(x,y) = make_float4(wpos, 0.0f); - - //const uint2 screenPos = make_uint2(mainCamera.params.cameraToKinectScreenInt(mainCamera.poseInverse * wpos)); - //if (screenPos.x < output.width() && screenPos.y < output.height()) { - // output(screenPos.x,screenPos.y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF); - //} - } - } - } -} - -void ftl::cuda::mls_smooth(TextureObject<float4> &output, const HashParams &hashParams, int numcams, int cam, cudaStream_t stream) { - const dim3 gridSize((output.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (output.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - mls_smooth_kernel<<<gridSize, blockSize, 0, stream>>>(output, hashParams, numcams, cam); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); -#endif -} - -#define RESAMPLE_RADIUS 7 - -__global__ void mls_resample_kernel(ftl::cuda::TextureObject<int> depthin, ftl::cuda::TextureObject<uchar4> colourin, ftl::cuda::TextureObject<float> depthout, HashParams hashParams, int numcams, SplatParams params) { - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - const int width = depthin.width(); - const int height = depthin.height(); - - if (x < width && y < height) { - - //const int depth = depthin.tex2D((int)x, (int)y); - //if (depth != 0x7FFFFFFF) { - // depthout(x,y) = (float)depth / 1000.0f; - // return; - //} - - struct map_t { - int d; - int quad; - }; - - map_t mappings[5]; - int mapidx = 0; - - for (int v=-RESAMPLE_RADIUS; v<=RESAMPLE_RADIUS; ++v) { - for (int u=-RESAMPLE_RADIUS; u<=RESAMPLE_RADIUS; ++u) { - - const int depth = depthin.tex2D((int)x+u, (int)y+v); - const uchar4 c1 = colourin.tex2D((int)x+u, (int)y+v); - - if (depth != 0x7FFFFFFF) { - int i=0; - for (i=0; i<mapidx; ++i) { - if (abs(mappings[i].d - depth) < 100) { - if (u < 0 && v < 0) mappings[i].quad |= 0x1; - if (u > 0 && v < 0) mappings[i].quad |= 0x2; - if (u > 0 && v > 0) mappings[i].quad |= 0x4; - if (u < 0 && v > 0) mappings[i].quad |= 0x8; - break; - } - } - if (i == mapidx && i < 5) { - mappings[mapidx].d = depth; - mappings[mapidx].quad = 0; - if (u < 0 && v < 0) mappings[mapidx].quad |= 0x1; - if (u > 0 && v < 0) mappings[mapidx].quad |= 0x2; - if (u > 0 && v > 0) mappings[mapidx].quad |= 0x4; - if (u < 0 && v > 0) mappings[mapidx].quad |= 0x8; - ++mapidx; - } else { - //printf("EXCEEDED\n"); - } - } - } - } - - int bestdepth = 1000000; - //int count = 0; - for (int i=0; i<mapidx; ++i) { - if (__popc(mappings[i].quad) >= 3 && mappings[i].d < bestdepth) bestdepth = mappings[i].d; - //if (mappings[i].quad == 15 && mappings[i].d < bestdepth) bestdepth = mappings[i].d; - //if (mappings[i].quad == 15) count ++; - } - - //depthout(x,y) = (mapidx == 5) ? 3.0f : 0.0f; - - if (bestdepth < 1000000) { - depthout(x,y) = (float)bestdepth / 1000.0f; - } - } -} - -void ftl::cuda::mls_resample(const TextureObject<int> &depthin, const TextureObject<uchar4> &colourin, TextureObject<float> &depthout, const HashParams &hashParams, int numcams, const SplatParams ¶ms, cudaStream_t stream) { - const dim3 gridSize((depthin.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depthin.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - mls_resample_kernel<<<gridSize, blockSize, 0, stream>>>(depthin, colourin, depthout, hashParams, numcams, params); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); -#endif -} - - /// ===== Median Filter ====== #define WINDOW_SIZE 3 diff --git a/applications/reconstruct/src/depth_camera_cuda.hpp b/applications/reconstruct/src/depth_camera_cuda.hpp index 552c2e59d44206c089fee68124ca814d3e752ad6..26bfcad73f5ae72f20cfe2dd29d0e91c62fca62b 100644 --- a/applications/reconstruct/src/depth_camera_cuda.hpp +++ b/applications/reconstruct/src/depth_camera_cuda.hpp @@ -10,8 +10,10 @@ namespace cuda { void clear_depth(const TextureObject<float> &depth, cudaStream_t stream); void clear_depth(const TextureObject<int> &depth, cudaStream_t stream); +void clear_to_zero(const ftl::cuda::TextureObject<float> &depth, cudaStream_t stream); void clear_points(const ftl::cuda::TextureObject<float4> &depth, cudaStream_t stream); void clear_colour(const ftl::cuda::TextureObject<uchar4> &depth, cudaStream_t stream); +void clear_colour(const ftl::cuda::TextureObject<float4> &depth, cudaStream_t stream); void median_filter(const ftl::cuda::TextureObject<int> &in, ftl::cuda::TextureObject<float> &out, cudaStream_t stream); @@ -21,7 +23,7 @@ void float_to_int(const ftl::cuda::TextureObject<float> &in, ftl::cuda::TextureO void mls_smooth(TextureObject<float4> &output, const ftl::voxhash::HashParams &hashParams, int numcams, int cam, cudaStream_t stream); -void mls_resample(const TextureObject<int> &depthin, const TextureObject<uchar4> &colourin, TextureObject<float> &depthout, const ftl::voxhash::HashParams &hashParams, int numcams, const ftl::render::SplatParams ¶ms, cudaStream_t stream); +void mls_render_depth(const TextureObject<int> &input, TextureObject<int> &output, const ftl::render::SplatParams ¶ms, int numcams, cudaStream_t stream); void hole_fill(const TextureObject<int> &depth_in, const TextureObject<float> &depth_out, const DepthCameraParams ¶ms, cudaStream_t stream); diff --git a/applications/reconstruct/src/dibr.cu b/applications/reconstruct/src/dibr.cu index 9558a0e0d2ac4b7a3978ccd48a6f6791f99a337f..acb86216a7eecd6e10f3ebf3a3b999964b56cf09 100644 --- a/applications/reconstruct/src/dibr.cu +++ b/applications/reconstruct/src/dibr.cu @@ -1,12 +1,20 @@ #include "splat_render_cuda.hpp" +#include "depth_camera_cuda.hpp" #include <cuda_runtime.h> #include <ftl/cuda_matrix_util.hpp> #include "splat_params.hpp" +#include "mls_cuda.hpp" #include <ftl/depth_camera.hpp> #define T_PER_BLOCK 8 +#define UPSAMPLE_FACTOR 1.8f +#define WARP_SIZE 32 +#define DEPTH_THRESHOLD 0.05f +#define UPSAMPLE_MAX 60 +#define MAX_ITERATIONS 10 +#define SPATIAL_SMOOTHING 0.01f using ftl::cuda::TextureObject; using ftl::render::SplatParams; @@ -23,55 +31,343 @@ __global__ void clearColourKernel(TextureObject<uchar4> colour) { } } +__device__ inline bool isStable(const float3 &previous, const float3 &estimate, const SplatParams ¶ms, float d) { + const float psize = 2.0f * d / params.camera.fx; + //printf("PSIZE %f\n", psize); + return fabs(previous.x - estimate.x) <= psize && + fabs(previous.y - estimate.y) <= psize && + fabs(previous.z - estimate.z) <= psize; +} -__global__ void dibr_depthmap_kernel( - TextureObject<int> depth, int numcams, SplatParams params) { +/* + * Pass 1: Determine depth buffer with enough accuracy for a visibility test in pass 2. + * These values are also used as the actual surface estimate during rendering so should + * at least be plane or sphere fitted if not MLS smoothed onto the actual surface. + */ +__global__ void dibr_visibility_kernel(TextureObject<int> depth, int cam, SplatParams params) { + const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; + + const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y)); + const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y)); + if (worldPos.x == MINF) return; + const float r = (camera.poseInverse * worldPos).z / camera.params.fx; + + // Get virtual camera ray for splat centre and backface cull if possible + //const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f); + //const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin); + //if (dot(rayDir, normal) > 0.0f) return; + + // Find the virtual screen position of current point + const float3 camPos = params.m_viewMatrix * worldPos; + if (camPos.z < params.camera.m_sensorDepthWorldMin) return; + if (camPos.z > params.camera.m_sensorDepthWorldMax) return; + const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); + + const int upsample = min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z)); + + // Not on screen so stop now... + if (screenPos.x + upsample < 0 || screenPos.y + upsample < 0 || + screenPos.x - upsample >= depth.width() || screenPos.y - upsample >= depth.height()) return; + + // TODO:(Nick) Check depth buffer and don't do anything if already hidden? - const int i = threadIdx.y*blockDim.y + threadIdx.x; - const int bx = blockIdx.x*blockDim.x; - const int by = blockIdx.y*blockDim.y; - const int x = bx + threadIdx.x; - const int y = by + threadIdx.y; - - for (int j=0; j<numcams; ++j) { - const ftl::voxhash::DepthCameraCUDA camera = c_cameras[j]; + // Each thread in warp takes an upsample point and updates corresponding depth buffer. + const int lane = threadIdx.x % WARP_SIZE; + for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { + const float u = (i % upsample) - (upsample / 2); + const float v = (i / upsample) - (upsample / 2); + + // Make an initial estimate of the points location + // Use centroid depth as estimate...? + float3 nearest = ftl::cuda::screen_centroid<1>(camera.points, make_float2(screenPos.x+u, screenPos.y+v), make_int2(x,y), params, upsample); + + // Use current points z as estimate + //float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,camPos.z); + + // Or calculate upper and lower bounds for depth and do gradient + // descent until the gradient change is too small or max iter is reached + // and depth remains within the bounds. + // How to find min and max depths? + + float ld = nearest.z; + + // TODO: (Nick) Estimate depth using points plane, but needs better normals. + //float t; + //if (ftl::cuda::intersectPlane(normal, worldPos, rayOrigin, rayDir, t)) { + // Plane based estimate of surface at this pixel + //const float3 nearest = rayOrigin + rayDir * camPos.z; + float3 output; + + // Use MLS of camera neighbor points to get more exact estimate + // Iterate until pixel is stable on the surface. + for (int k=0; k<MAX_ITERATIONS; ++k) { + + // TODO:(Nick) Should perhaps use points from all cameras? + // Instead of doing each camera separately... + // If the depth already is close then it has already been done and can skip this point + if (ftl::cuda::mls_point_surface<1>(camera.points, make_int2(x,y), params.m_viewMatrixInverse * nearest, output, SPATIAL_SMOOTHING) <= 0.0f) { + /*const unsigned int cx = screenPos.x; + const unsigned int cy = screenPos.y; + if (cx < depth.width() && cy < depth.height()) { + atomicMax(&depth(cx,cy), 10000.0f); + }*/ + break; + } + + //ftl::cuda::render_depth(depth, params, output); + + output = params.m_viewMatrix * output; + + // This is essentially the SDF function f(x), only the normal should be estimated also from the weights + //const float d = nearest.z + (normal.x*output.x + normal.y*output.y + normal.z*output.z); + + const float d = nearest.z + copysignf(0.5f*length(output - nearest), output.z - nearest.z); + nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d); + + const float2 sp = params.camera.cameraToKinectScreenFloat(output); + + //if (isStable(nearest, output, params, d)) { + //if (fabs(sp.x - float(screenPos.x+u)) < 2.0f && fabs(sp.y - float(screenPos.y+v)) < 2.0f) { + if (length(output - nearest) <= 2.0f * params.camera.fx / camPos.z) { + const unsigned int cx = screenPos.x+u; + const unsigned int cy = screenPos.y+v; + + if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { + // Transform estimated point to virtual cam space and output z + atomicMin(&depth(cx,cy), d * 1000.0f); + } + break; + } + + /*if (k >= MAX_ITERATIONS-1 && length(output - nearest) <= SPATIAL_SMOOTHING) { + const unsigned int cx = screenPos.x+u; + const unsigned int cy = screenPos.y+v; + if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { + //atomicMin(&depth(cx,cy), d * 1000.0f); + printf("ERR = %f, %f\n", fabs(sp.x - float(screenPos.x+u)), fabs(sp.y - float(screenPos.y+v))); + } + }*/ + + //nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d); // ld + (d - ld)*0.8f + ld = d; + } + //} + } +} + +// ------ Alternative for pass 1: principle surfaces --------------------------- + +#define NEIGHBOR_RADIUS 1 +#define MAX_NEIGHBORS ((NEIGHBOR_RADIUS*2+1)*(NEIGHBOR_RADIUS*2+1)) + +/* + * Pass 1: Determine depth buffer with enough accuracy for a visibility test in pass 2. + * These values are also used as the actual surface estimate during rendering so should + * at least be plane or sphere fitted if not MLS smoothed onto the actual surface. + */ + __global__ void dibr_visibility_principal_kernel(TextureObject<int> depth, int cam, SplatParams params) { + __shared__ float3 neighborhood_cache[2*T_PER_BLOCK][MAX_NEIGHBORS]; + __shared__ int minimum[2*T_PER_BLOCK]; + __shared__ int maximum[2*T_PER_BLOCK]; + + const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; + + const int warp = threadIdx.x / WARP_SIZE + threadIdx.y*2; + const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y)); + //const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y)); + if (worldPos.x == MINF) return; + const float r = (camera.poseInverse * worldPos).z / camera.params.fx; + + // Get virtual camera ray for splat centre and backface cull if possible + //const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f); + //const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin); + //if (dot(rayDir, normal) > 0.0f) return; + + // Find the virtual screen position of current point + const float3 camPos = params.m_viewMatrix * worldPos; + if (camPos.z < params.camera.m_sensorDepthWorldMin) return; + if (camPos.z > params.camera.m_sensorDepthWorldMax) return; + const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); + + const int upsample = min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z)); + + // Not on screen so stop now... + if (screenPos.x + upsample < 0 || screenPos.y + upsample < 0 || + screenPos.x - upsample >= depth.width() || screenPos.y - upsample >= depth.height()) return; + + // TODO:(Nick) Check depth buffer and don't do anything if already hidden? - float4 d = tex2D<float4>(camera.points, x, y); - if (d.z < 0.0f) continue; - //if (d >= params.camera.m_sensorDepthWorldMax) continue; - - //const float3 worldPos = camera.pose * camera.params.kinectDepthToSkeleton(x, y, d); + // TODO:(Nick) Preload point neighbors and transform to eye + const int lane = threadIdx.x % WARP_SIZE; + if (lane == 0) { + minimum[warp] = 100000000; + maximum[warp] = -100000000; + } + + __syncwarp(); + + for (int i=lane; i<MAX_NEIGHBORS; i+=WARP_SIZE) { + const int u = (i % (2*NEIGHBOR_RADIUS+1)) - NEIGHBOR_RADIUS; + const int v = (i / (2*NEIGHBOR_RADIUS+1)) - NEIGHBOR_RADIUS; + const float3 point = params.m_viewMatrix * make_float3(tex2D<float4>(camera.points, x+u, y+v)); + neighborhood_cache[warp][i] = point; + + if (length(point - camPos) <= 0.04f) { + atomicMin(&minimum[warp], point.z*1000.0f); + atomicMax(&maximum[warp], point.z*1000.0f); + } + } + + __syncwarp(); + + const float interval = (float(maximum[warp])/1000.0f - float(minimum[warp]) / 1000.0f) / float(MAX_ITERATIONS); + //if (y == 200) printf("interval: %f\n", interval); + + // TODO:(Nick) Find min and max depths of neighbors to estimate z bounds + + // Each thread in warp takes an upsample point and updates corresponding depth buffer. + for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { + const float u = (i % upsample) - (upsample / 2); + const float v = (i / upsample) - (upsample / 2); + + // Make an initial estimate of the points location + // Use centroid depth as estimate...? + //float3 nearest = ftl::cuda::screen_centroid<1>(camera.points, make_float2(screenPos.x+u, screenPos.y+v), make_int2(x,y), params, upsample); + + // Use current points z as estimate + // TODO: Use min point as estimate + float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,float(minimum[warp])/1000.0f); + + // Or calculate upper and lower bounds for depth and do gradient + // descent until the gradient change is too small or max iter is reached + // and depth remains within the bounds. + // How to find min and max depths? + + // TODO: (Nick) Estimate depth using points plane, but needs better normals. + //float t; + //if (ftl::cuda::intersectPlane(normal, worldPos, rayOrigin, rayDir, t)) { + // Plane based estimate of surface at this pixel + //const float3 nearest = rayOrigin + rayDir * camPos.z; + + // Use MLS of camera neighbor points to get more exact estimate + // Iterate until pixel is stable on the surface. + for (int k=0; k<MAX_ITERATIONS; ++k) { + + // TODO:(Nick) Should perhaps use points from all cameras? + // Instead of doing each camera separately... + // If the depth already is close then it has already been done and can skip this point + const float energy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS>(neighborhood_cache[warp], nearest, SPATIAL_SMOOTHING); + + if (energy <= 0.0f) break; + + //ftl::cuda::render_depth(depth, params, output); + + // This is essentially the SDF function f(x), only the normal should be estimated also from the weights + //const float d = nearest.z + (normal.x*output.x + normal.y*output.y + normal.z*output.z); + + const float d = nearest.z; + nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d+interval); + + if (energy >= 0.1f) { + const unsigned int cx = screenPos.x+u; + const unsigned int cy = screenPos.y+v; + if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { + // Transform estimated point to virtual cam space and output z + atomicMin(&depth(cx,cy), d * 1000.0f); + } + break; + } + } + //} + } +} + +__device__ inline float4 make_float4(const uchar4 &c) { + return make_float4(c.x,c.y,c.z,c.w); +} + +/* + * Pass 2: Accumulate attribute contributions if the points pass a visibility test. + */ +__global__ void dibr_attribute_contrib_kernel( + TextureObject<int> depth_in, + TextureObject<float4> colour_out, + TextureObject<float4> normal_out, + TextureObject<float> contrib_out, int cam, + SplatParams params) { - const float3 worldPos = make_float3(d); - const float3 camPos = params.m_viewMatrix * worldPos; - const float2 screenPosf = params.camera.cameraToKinectScreenFloat(camPos); - const uint2 screenPos = make_uint2(make_int2(screenPosf)); + const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; + + const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; + const int y = blockIdx.y*blockDim.y + threadIdx.y; - if (camPos.z < params.camera.m_sensorDepthWorldMin) continue; + const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y)); + //const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y)); + if (worldPos.x == MINF) return; + const float r = (camera.poseInverse * worldPos).z / camera.params.fx; - const unsigned int cx = screenPos.x; - const unsigned int cy = screenPos.y; + const float3 camPos = params.m_viewMatrix * worldPos; + if (camPos.z < params.camera.m_sensorDepthWorldMin) return; + if (camPos.z > params.camera.m_sensorDepthWorldMax) return; + const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); + const int upsample = min(UPSAMPLE_MAX, int((10.0f*r) * params.camera.fx / camPos.z)); - if (cx < depth.width() && cy < depth.height()) { - //float camd = depth_in.tex2D((int)cx,(int)cy); - //atomicMin(&depth(x,y), idepth); - //float camdiff = fabs(camPos.z-camd); - //if (camdiff < 0.1f) { - //colour_out(cx,cy) = tex2D<uchar4>(camera.colour,x,y); - //} else { - //colour_out(cx,cy) = make_uchar4(camdiff * 100, 0, 0, 255); - //} + // Not on screen so stop now... + if (screenPos.x + upsample < 0 || screenPos.y + upsample < 0 || + screenPos.x - upsample >= depth_in.width() || screenPos.y - upsample >= depth_in.height()) return; - atomicMin(&depth(cx,cy), camPos.z * 1000.0f); + // Is this point near the actual surface and therefore a contributor? + const float d = ((float)depth_in.tex2D((int)screenPos.x, (int)screenPos.y)/1000.0f); + if (abs(d - camPos.z) > DEPTH_THRESHOLD) return; + + // TODO:(Nick) Should just one thread load these to shared mem? + const float4 colour = make_float4(tex2D<uchar4>(camera.colour, x, y)); + const float4 normal = tex2D<float4>(camera.normal, x, y); + + // Each thread in warp takes an upsample point and updates corresponding depth buffer. + const int lane = threadIdx.x % WARP_SIZE; + for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { + const float u = (i % upsample) - (upsample / 2); + const float v = (i / upsample) - (upsample / 2); + + // Use the depth buffer to determine this pixels 3D position in camera space + const float d = ((float)depth_in.tex2D(screenPos.x+u, screenPos.y+v)/1000.0f); + const float3 nearest = params.camera.kinectDepthToSkeleton((int)(screenPos.x+u),(int)(screenPos.y+v),d); + + // What is contribution of our current point at this pixel? + const float weight = ftl::cuda::spatialWeighting(length(nearest - camPos), SPATIAL_SMOOTHING); + if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) { // TODO: Use confidence threshold here + const float4 wcolour = colour * weight; + const float4 wnormal = normal * weight; + + // Add this points contribution to the pixel buffer + atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v), wcolour.x); + atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+1, wcolour.y); + atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+2, wcolour.z); + atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+3, wcolour.w); + atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v), wnormal.x); + atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+1, wnormal.y); + atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+2, wnormal.z); + atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+3, wnormal.w); + atomicAdd(&contrib_out(screenPos.x+u, screenPos.y+v), weight); } - } + } } - -__global__ void dibr_kernel( +/* + * Pass 2: Accumulate attribute contributions if the points pass a visibility test. + */ +/*__global__ void dibr_attribute_contrib_kernel( TextureObject<int> depth_in, - TextureObject<uchar4> colour_out, int numcams, SplatParams params) { + TextureObject<uchar4> colour_out, + TextureObject<float4> normal_out, int numcams, SplatParams params) { const int i = threadIdx.y*blockDim.y + threadIdx.x; const int bx = blockIdx.x*blockDim.x; @@ -80,119 +376,89 @@ __global__ void dibr_kernel( const int y = by + threadIdx.y; for (int j=0; j<numcams; ++j) { - const ftl::voxhash::DepthCameraCUDA camera = c_cameras[j]; + const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[j]; - float4 d = tex2D<float4>(camera.points, x, y); - if (d.z < 0.0f) continue; - //if (d >= params.camera.m_sensorDepthWorldMax) continue; + float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y)); + float r = (camera.poseInverse * worldPos).z; + //if (ftl::cuda::mls_point_surface<3>(camera.points, make_int2(x,y), worldPos, 0.02f) < 0.001f) continue; + if (worldPos.x == MINF) continue; - //const float3 worldPos = camera.pose * camera.params.kinectDepthToSkeleton(x, y, d); - - const float3 worldPos = make_float3(d); const float3 camPos = params.m_viewMatrix * worldPos; - const float2 screenPosf = params.camera.cameraToKinectScreenFloat(camPos); - const uint2 screenPos = make_uint2(make_int2(screenPosf)); - - if (camPos.z < params.camera.m_sensorDepthWorldMin) continue; - const unsigned int cx = screenPos.x; - const unsigned int cy = screenPos.y; + // Estimate upsample factor using ratio of source depth and output depth + const int upsample = min(15, (int)(UPSAMPLE_FACTOR * (r / camPos.z))+1); + const float upfactor = 2.0f / (float)(upsample); - if (cx < colour_out.width() && cy < colour_out.height()) { - //float camd = depth_in.tex2D((int)cx,(int)cy); - //atomicMin(&depth(x,y), idepth); - //float camdiff = fabs(camPos.z-camd); - //if (camdiff < 0.1f) { - - if (depth_in(cx,cy) == (int)(camPos.z * 1000.0f)) { - colour_out(cx,cy) = tex2D<uchar4>(camera.colour,x,y); - //colour_out(cx,cy) = (j==0) ? make_uchar4(20,255,0,255) : make_uchar4(20,0,255,255); + for (int v=0; v<upsample; ++v) { + for (int u=0; u<upsample; ++u) { + float3 point; + const ftl::cuda::fragment nearest = ftl::cuda::upsampled_point(camera.points, camera.normal, camera.colour, + make_float2((float)x-1.0f+u*upfactor,(float)y-1.0f+v*upfactor)); + //if (ftl::cuda::mls_point_surface<3>(camera.points, make_int2(x,y), nearest, point, 0.02f) < 0.001f) continue; + ftl::cuda::render_fragment(depth_in, normal_out, colour_out, params, nearest); } - - - //} else { - //colour_out(cx,cy) = make_uchar4(camdiff * 100, 0, 0, 255); - //} } } -} - -__device__ inline float4 make_float4(const uchar4 &c) { - return make_float4(c.x,c.y,c.z,c.w); -} - -__global__ void dibr_kernel_rev( - TextureObject<float> depth_in, - TextureObject<uchar4> colour_out, int numcams, SplatParams params) { - - const int i = threadIdx.y*blockDim.y + threadIdx.x; - const int bx = blockIdx.x*blockDim.x; - const int by = blockIdx.y*blockDim.y; - const int x = bx + threadIdx.x; - const int y = by + threadIdx.y; - - float camd = depth_in.tex2D((int)x,(int)y); - if (camd < 0.01f) return; - if (camd >= params.camera.m_sensorDepthWorldMax) return; - - const float3 worldPos = params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x, y, camd); - float mindiff = 1000.0f; - float4 col = make_float4(0.0f,0.0f,0.0f,0.0f); - int count = 0; +}*/ - for (int j=0; j<numcams; ++j) { - const ftl::voxhash::DepthCameraCUDA camera = c_cameras[j]; - - const float3 camPos = camera.poseInverse * worldPos; - const float2 screenPosf = camera.params.cameraToKinectScreenFloat(camPos); - const uint2 screenPos = make_uint2(make_int2(screenPosf)); - - if (camPos.z < params.camera.m_sensorDepthWorldMin) continue; - const unsigned int cx = screenPos.x; - const unsigned int cy = screenPos.y; - if (cx < camera.params.m_imageWidth && cy < camera.params.m_imageHeight) { - float d = tex2D<float>(camera.depth, (int)cx, (int)cy); - float camdiff = fabs(camPos.z-d); - - if (camdiff < mindiff) { - mindiff = camdiff; - col += make_float4(tex2D<uchar4>(camera.colour,cx,cy)); - ++count; - } +__global__ void dibr_normalise_kernel( + TextureObject<float4> colour_in, + TextureObject<uchar4> colour_out, + TextureObject<float4> normals, + TextureObject<float> contribs) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - //if (camdiff < 0.1f) { - // colour_out(x,y) = tex2D<uchar4>(camera.colour,cx,cy); - //} else { - //colour_out(x,y) = make_uchar4(camdiff * 100, 0, 0, 255); - //} - } - } + if (x < colour_in.width() && y < colour_in.height()) { + const float4 colour = colour_in.tex2D((int)x,(int)y); + const float4 normal = normals.tex2D((int)x,(int)y); + const float contrib = contribs.tex2D((int)x,(int)y); - if (count > 0) { - col = col / (float)count; - colour_out(x,y) = make_uchar4(col.x,col.y,col.z,255); - } else { - colour_out(x,y) = make_uchar4(76,76,76,255); - } + if (contrib > 0.0f) { + colour_out(x,y) = make_uchar4(colour.x / contrib, colour.y / contrib, colour.z / contrib, 0); + normals(x,y) = normal / contrib; + } + } } void ftl::cuda::dibr(const TextureObject<int> &depth_out, - const TextureObject<uchar4> &colour_out, int numcams, const SplatParams ¶ms, cudaStream_t stream) { - + const TextureObject<uchar4> &colour_out, + const TextureObject<float4> &normal_out, + const TextureObject<float> &confidence_out, + const TextureObject<float4> &tmp_colour, + int numcams, + const SplatParams ¶ms, + cudaStream_t stream) { + + const dim3 sgridSize((depth_out.width() + 2 - 1)/2, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 sblockSize(2*WARP_SIZE, T_PER_BLOCK); const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - clearColourKernel<<<gridSize, blockSize, 0, stream>>>(colour_out); + clearColourKernel<<<gridSize, blockSize, 0, stream>>>(colour_out); + ftl::cuda::clear_to_zero(confidence_out, stream); + ftl::cuda::clear_colour(tmp_colour, stream); + ftl::cuda::clear_colour(normal_out, stream); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); #endif - dibr_depthmap_kernel<<<gridSize, blockSize, 0, stream>>>(depth_out, numcams, params); - dibr_kernel<<<gridSize, blockSize, 0, stream>>>(depth_out, colour_out, numcams, params); + int i=3; + // Pass 1, merge a depth map from each camera. + for (int i=0; i<numcams; ++i) + dibr_visibility_principal_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, i, params); + + // Pass 2, accumulate all point contributions to pixels + for (int i=0; i<numcams; ++i) + dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, tmp_colour, normal_out, confidence_out, i, params); + + // Pass 3, normalise contributions + dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(tmp_colour, colour_out, normal_out, confidence_out); + cudaSafeCall( cudaGetLastError() ); #ifdef _DEBUG @@ -200,22 +466,21 @@ void ftl::cuda::dibr(const TextureObject<int> &depth_out, #endif } -void ftl::cuda::dibr(const TextureObject<float> &depth_out, - const TextureObject<uchar4> &colour_out, int numcams, const SplatParams ¶ms, cudaStream_t stream) { +void ftl::cuda::dibr_raw(const TextureObject<int> &depth_out, + int numcams, const SplatParams ¶ms, cudaStream_t stream) { const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - clearColourKernel<<<gridSize, blockSize, 0, stream>>>(colour_out); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); #endif - dibr_kernel_rev<<<gridSize, blockSize, 0, stream>>>(depth_out, colour_out, numcams, params); + //dibr_depthmap_direct_kernel<<<gridSize, blockSize, 0, stream>>>(depth_out, numcams, params); cudaSafeCall( cudaGetLastError() ); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); #endif } + diff --git a/applications/reconstruct/src/mls.cu b/applications/reconstruct/src/mls.cu new file mode 100644 index 0000000000000000000000000000000000000000..71201a017cf1ebab6e06257e9425403df50427d3 --- /dev/null +++ b/applications/reconstruct/src/mls.cu @@ -0,0 +1,273 @@ +#include <ftl/cuda_common.hpp> +#include <ftl/cuda_util.hpp> +#include <ftl/depth_camera.hpp> +#include "depth_camera_cuda.hpp" + +#include "mls_cuda.hpp" + +#define T_PER_BLOCK 16 +#define MINF __int_as_float(0xff800000) + +using ftl::voxhash::DepthCameraCUDA; +using ftl::voxhash::HashData; +using ftl::voxhash::HashParams; +using ftl::cuda::TextureObject; +using ftl::render::SplatParams; + +extern __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS]; +extern __constant__ HashParams c_hashParams; + +/// ===== MLS Smooth + +/* + * Kim, K., Chalidabhongse, T. H., Harwood, D., & Davis, L. (2005). + * Real-time foreground-background segmentation using codebook model. + * Real-Time Imaging. https://doi.org/10.1016/j.rti.2004.12.004 + */ + __device__ float colordiffFloat(const uchar4 &pa, const uchar4 &pb) { + const float x_2 = pb.x * pb.x + pb.y * pb.y + pb.z * pb.z; + const float v_2 = pa.x * pa.x + pa.y * pa.y + pa.z * pa.z; + const float xv_2 = pow(pb.x * pa.x + pb.y * pa.y + pb.z * pa.z, 2); + const float p_2 = xv_2 / v_2; + return sqrt(x_2 - p_2); +} + +__device__ float colordiffFloat2(const uchar4 &pa, const uchar4 &pb) { + float3 delta = make_float3((float)pa.x - (float)pb.x, (float)pa.y - (float)pb.y, (float)pa.z - (float)pb.z); + return length(delta); +} + +/* + * Colour weighting as suggested in: + * C. Kuster et al. Spatio-Temporal Geometry Fusion for Multiple Hybrid Cameras using Moving Least Squares Surfaces. 2014. + * c = colour distance + */ + __device__ float colourWeighting(float c) { + const float h = c_hashParams.m_colourSmoothing; + if (c >= h) return 0.0f; + float ch = c / h; + ch = 1.0f - ch*ch; + return ch*ch*ch*ch; +} + +#define WINDOW_RADIUS 5 + +__device__ float mlsCamera(int cam, const float3 &mPos, uchar4 c1, float3 &wpos) { + const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; + + const float3 pf = camera.poseInverse * mPos; + float3 pos = make_float3(0.0f, 0.0f, 0.0f); + const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf)); + float weights = 0.0f; + + //#pragma unroll + for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) { + for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) { + //if (screenPos.x+u < width && screenPos.y+v < height) { //on screen + float depth = tex2D<float>(camera.depth, screenPos.x+u, screenPos.y+v); + const float3 camPos = camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth); + float weight = ftl::cuda::spatialWeighting(length(pf - camPos), c_hashParams.m_spatialSmoothing); + + if (weight > 0.0f) { + uchar4 c2 = tex2D<uchar4>(camera.colour, screenPos.x+u, screenPos.y+v); + weight *= colourWeighting(colordiffFloat2(c1,c2)); + + if (weight > 0.0f) { + wpos += weight* (camera.pose * camPos); + weights += weight; + } + } + //} + } + } + + //wpos += (camera.pose * pos); + + return weights; +} + +__device__ float mlsCameraNoColour(int cam, const float3 &mPos, uchar4 c1, const float4 &norm, float3 &wpos, float h) { + const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; + + const float3 pf = camera.poseInverse * mPos; + float3 pos = make_float3(0.0f, 0.0f, 0.0f); + const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf)); + float weights = 0.0f; + + //#pragma unroll + for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) { + for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) { + //if (screenPos.x+u < width && screenPos.y+v < height) { //on creen + float depth = tex2D<float>(camera.depth, screenPos.x+u, screenPos.y+v); + const float3 camPos = camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth); + + // TODO:(Nick) dot product of normals < 0 means the point + // should be ignored with a weight of 0 since it is facing the wrong direction + // May be good to simply weight using the dot product to give + // a stronger weight to those whose normals are closer + + float weight = ftl::cuda::spatialWeighting(length(pf - camPos), h); + + if (weight > 0.0f) { + float4 n2 = tex2D<float4>(camera.normal, screenPos.x+u, screenPos.y+v); + if (dot(make_float3(norm), make_float3(n2)) > 0.0f) { + + uchar4 c2 = tex2D<uchar4>(camera.colour, screenPos.x+u, screenPos.y+v); + + if (colourWeighting(colordiffFloat2(c1,c2)) > 0.0f) { + pos += weight*camPos; // (camera.pose * camPos); + weights += weight; + } + } + } + //} + } + } + + if (weights > 0.0f) wpos += (camera.pose * (pos / weights)) * weights; + + return weights; +} + + +__global__ void mls_smooth_kernel(ftl::cuda::TextureObject<float4> output, HashParams hashParams, int numcams, int cam) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + const int width = output.width(); + const int height = output.height(); + + const DepthCameraCUDA &mainCamera = c_cameras[cam]; + + if (x < width && y < height) { + + const float depth = tex2D<float>(mainCamera.depth, x, y); + const uchar4 c1 = tex2D<uchar4>(mainCamera.colour, x, y); + const float4 norm = tex2D<float4>(mainCamera.normal, x, y); + //if (x == 400 && y == 200) printf("NORMX: %f\n", norm.x); + + float3 wpos = make_float3(0.0f); + float3 wnorm = make_float3(0.0f); + float weights = 0.0f; + + if (depth >= mainCamera.params.m_sensorDepthWorldMin && depth <= mainCamera.params.m_sensorDepthWorldMax) { + float3 mPos = mainCamera.pose * mainCamera.params.kinectDepthToSkeleton(x, y, depth); + + if ((!(hashParams.m_flags & ftl::voxhash::kFlagClipping)) || (mPos.x > hashParams.m_minBounds.x && mPos.x < hashParams.m_maxBounds.x && + mPos.y > hashParams.m_minBounds.y && mPos.y < hashParams.m_maxBounds.y && + mPos.z > hashParams.m_minBounds.z && mPos.z < hashParams.m_maxBounds.z)) { + + if (hashParams.m_flags & ftl::voxhash::kFlagMLS) { + for (uint cam2=0; cam2<numcams; ++cam2) { + //if (cam2 == cam) weights += mlsCameraNoColour(cam2, mPos, c1, wpos, c_hashParams.m_spatialSmoothing*0.1f); //weights += 0.5*mlsCamera(cam2, mPos, c1, wpos); + weights += mlsCameraNoColour(cam2, mPos, c1, norm, wpos, c_hashParams.m_spatialSmoothing); //*((cam == cam2)? 0.1f : 5.0f)); + + // Previous approach + //if (cam2 == cam) continue; + //weights += mlsCameraBest(cam2, mPos, c1, wpos); + } + wpos /= weights; + } else { + weights = 1000.0f; + wpos = mPos; + } + + //output(x,y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF); + + if (weights >= hashParams.m_confidenceThresh) output(x,y) = make_float4(wpos, 0.0f); + + //const uint2 screenPos = make_uint2(mainCamera.params.cameraToKinectScreenInt(mainCamera.poseInverse * wpos)); + //if (screenPos.x < output.width() && screenPos.y < output.height()) { + // output(screenPos.x,screenPos.y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF); + //} + } + } + } +} + +void ftl::cuda::mls_smooth(TextureObject<float4> &output, const HashParams &hashParams, int numcams, int cam, cudaStream_t stream) { + const dim3 gridSize((output.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (output.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + mls_smooth_kernel<<<gridSize, blockSize, 0, stream>>>(output, hashParams, numcams, cam); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif +} + + +// ===== Render Depth using MLS ================================================ + +#define MAX_UPSAMPLE 5 +#define SAMPLE_BUFFER ((2*MAX_UPSAMPLE+1)*(2*MAX_UPSAMPLE+1)) +#define WARP_SIZE 32 +#define BLOCK_WIDTH 4 +#define MLS_RADIUS 5 +#define MLS_WIDTH (2*MLS_RADIUS+1) +#define MLS_SAMPLES (MLS_WIDTH*MLS_WIDTH) + +__global__ void mls_render_depth_kernel(const TextureObject<int> input, TextureObject<int> output, SplatParams params, int numcams) { + /*const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + const int width = output.width(); + const int height = output.height(); + + if (x < width && y < height) { + + const float depth = tex2D<float>(mainCamera.depth, x, y); + const uchar4 c1 = tex2D<uchar4>(mainCamera.colour, x, y); + const float4 norm = tex2D<float4>(mainCamera.normal, x, y); + //if (x == 400 && y == 200) printf("NORMX: %f\n", norm.x); + + float3 wpos = make_float3(0.0f); + float3 wnorm = make_float3(0.0f); + float weights = 0.0f; + + if (depth >= mainCamera.params.m_sensorDepthWorldMin && depth <= mainCamera.params.m_sensorDepthWorldMax) { + float3 mPos = mainCamera.pose * mainCamera.params.kinectDepthToSkeleton(x, y, depth); + + if ((!(hashParams.m_flags & ftl::voxhash::kFlagClipping)) || (mPos.x > hashParams.m_minBounds.x && mPos.x < hashParams.m_maxBounds.x && + mPos.y > hashParams.m_minBounds.y && mPos.y < hashParams.m_maxBounds.y && + mPos.z > hashParams.m_minBounds.z && mPos.z < hashParams.m_maxBounds.z)) { + + if (hashParams.m_flags & ftl::voxhash::kFlagMLS) { + for (uint cam2=0; cam2<numcams; ++cam2) { + //if (cam2 == cam) weights += mlsCameraNoColour(cam2, mPos, c1, wpos, c_hashParams.m_spatialSmoothing*0.1f); //weights += 0.5*mlsCamera(cam2, mPos, c1, wpos); + weights += mlsCameraNoColour(cam2, mPos, c1, norm, wpos, c_hashParams.m_spatialSmoothing); //*((cam == cam2)? 0.1f : 5.0f)); + + // Previous approach + //if (cam2 == cam) continue; + //weights += mlsCameraBest(cam2, mPos, c1, wpos); + } + wpos /= weights; + } else { + weights = 1000.0f; + wpos = mPos; + } + + //output(x,y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF); + + //if (weights >= hashParams.m_confidenceThresh) output(x,y) = make_float4(wpos, 0.0f); + + const uint2 screenPos = make_uint2(mainCamera.params.cameraToKinectScreenInt(mainCamera.poseInverse * wpos)); + if (screenPos.x < output.width() && screenPos.y < output.height()) { + output(screenPos.x,screenPos.y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF); + } + } + } + }*/ +} + + +void ftl::cuda::mls_render_depth(const TextureObject<int> &input, TextureObject<int> &output, const SplatParams ¶ms, int numcams, cudaStream_t stream) { + const dim3 gridSize((output.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (output.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + mls_render_depth_kernel<<<gridSize, blockSize, 0, stream>>>(input, output, params, numcams); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif +} diff --git a/applications/reconstruct/src/mls_cuda.hpp b/applications/reconstruct/src/mls_cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..420b3beb0a5603abbc1a34e5cfeba5b99912408d --- /dev/null +++ b/applications/reconstruct/src/mls_cuda.hpp @@ -0,0 +1,377 @@ +#ifndef _FTL_MLS_CUDA_HPP_ +#define _FTL_MLS_CUDA_HPP_ + +#include <ftl/cuda_util.hpp> +#include <ftl/cuda_common.hpp> +#include <ftl/cuda_matrix_util.hpp> +#include "splat_params.hpp" + +__device__ inline float3 make_float3(const uchar4 &c) { + return make_float3((float)c.x,(float)c.y,(float)c.z); +} + +__device__ inline uchar4 make_uchar4(const float3 &c) { + return make_uchar4(c.x,c.y,c.z,255); +} + +namespace ftl { +namespace cuda { + +/* + * Guennebaud, G.; Gross, M. Algebraic point set surfaces. ACMTransactions on Graphics Vol. 26, No. 3, Article No. 23, 2007. + * Used in: FusionMLS: Highly dynamic 3D reconstruction with consumer-grade RGB-D cameras + * r = distance between points + * h = smoothing parameter in meters (default 4cm) + */ +__device__ inline float spatialWeighting(float r, float h) { + if (r >= h) return 0.0f; + float rh = r / h; + rh = 1.0f - rh*rh; + return rh*rh*rh*rh; +} + +__device__ float colourWeighting(float c); + +struct fragment { + float3 point; + float3 normal; + uchar4 colour; +}; + +__device__ inline float3 upsampled_point(cudaTextureObject_t pointset, const float2 &uv) { + float3 R = make_float3(0.0f, 0.0f, 0.0f); + const float3 P1 = make_float3(tex2D<float4>(pointset, int(uv.x), int(uv.y))); + const float D1 = 1.0f - length(uv - make_float2(int(uv.x), int(uv.y))); + R += D1 * P1; + + const float3 P2 = make_float3(tex2D<float4>(pointset, int(uv.x), int(uv.y+1.0f))); + const float D2 = 1.0f - length(uv - make_float2(int(uv.x), int(uv.y+1.0f))); + R += D2 * P2; + + const float3 P3 = make_float3(tex2D<float4>(pointset, int(uv.x+1.0f), int(uv.y))); + const float D3 = 1.0f - length(uv - make_float2(int(uv.x+1.0f), int(uv.y))); + R += D3 * P3; + + const float3 P4 = make_float3(tex2D<float4>(pointset, int(uv.x+1.0f), int(uv.y+1.0f))); + const float D4 = 1.0f - length(uv - make_float2(int(uv.x+1.0f), int(uv.y+1.0f))); + R += D4 * P4; + + // R is the centroid of surrounding points. + R /= (D1+D2+D3+D4); + + // FIXME: Should not use centroid but instead sample the surface at this point + // Use plane estimate at point to get "centroid" and then do the spatial weighted sample? + return R; +} + +__device__ inline fragment upsampled_point(cudaTextureObject_t pointset, + cudaTextureObject_t normals, cudaTextureObject_t colours, const float2 &uv) { + float3 R = make_float3(0.0f, 0.0f, 0.0f); + float3 N = make_float3(0.0f, 0.0f, 0.0f); + float3 C = make_float3(0.0f, 0.0f, 0.0f); + + // TODO:(Nick) Don't upsample points if distance is too great + + const float3 P1 = make_float3(tex2D<float4>(pointset, int(uv.x), int(uv.y))); + const float D1 = 1.0f - length(uv - make_float2(int(uv.x), int(uv.y))); + R += D1 * P1; + N += D1 * make_float3(tex2D<float4>(normals, int(uv.x), int(uv.y))); + C += D1 * make_float3(tex2D<uchar4>(colours, int(uv.x), int(uv.y))); + + const float3 P2 = make_float3(tex2D<float4>(pointset, int(uv.x), int(uv.y+1.0f))); + const float D2 = 1.0f - length(uv - make_float2(int(uv.x), int(uv.y+1.0f))); + R += D2 * P2; + N += D2 * make_float3(tex2D<float4>(normals, int(uv.x), int(uv.y+1.0f))); + C += D2 * make_float3(tex2D<uchar4>(colours, int(uv.x), int(uv.y+1.0f))); + + const float3 P3 = make_float3(tex2D<float4>(pointset, int(uv.x+1.0f), int(uv.y))); + const float D3 = 1.0f - length(uv - make_float2(int(uv.x+1.0f), int(uv.y))); + R += D3 * P3; + N += D3 * make_float3(tex2D<float4>(normals, int(uv.x+1.0f), int(uv.y))); + C += D3 * make_float3(tex2D<uchar4>(colours, int(uv.x+1.0f), int(uv.y))); + + const float3 P4 = make_float3(tex2D<float4>(pointset, int(uv.x+1.0f), int(uv.y+1.0f))); + const float D4 = 1.0f - length(uv - make_float2(int(uv.x+1.0f), int(uv.y+1.0f))); + R += D4 * P4; + N += D4 * make_float3(tex2D<float4>(normals, int(uv.x+1.0f), int(uv.y+1.0f))); + C += D4 * make_float3(tex2D<uchar4>(colours, int(uv.x+1.0f), int(uv.y+1.0f))); + + return {R / (D1+D2+D3+D4), N / (D1+D2+D3+D4), make_uchar4(C / (D1+D2+D3+D4))}; +} + +__device__ inline void render_depth(ftl::cuda::TextureObject<int> &depth, ftl::render::SplatParams ¶ms, const float3 &worldPos) { + const float3 camPos = params.m_viewMatrix * worldPos; + const float d = camPos.z; + if (d < params.camera.m_sensorDepthWorldMin) return; + + const float2 screenPosf = params.camera.cameraToKinectScreenFloat(camPos); + const uint2 screenPos = make_uint2(make_int2(screenPosf)); + + const unsigned int cx = screenPos.x; + const unsigned int cy = screenPos.y; + + if (cx < depth.width() && cy < depth.height()) { + atomicMin(&depth(cx,cy), d * 1000.0f); + } +} + +__device__ inline void render_fragment( + ftl::cuda::TextureObject<int> &depth_in, + ftl::cuda::TextureObject<float4> &normal_out, + ftl::cuda::TextureObject<uchar4> &colour_out, + ftl::render::SplatParams ¶ms, const fragment &frag) { + const float3 camPos = params.m_viewMatrix * frag.point; + const float d = camPos.z; + if (d < params.camera.m_sensorDepthWorldMin) return; + + const float2 screenPosf = params.camera.cameraToKinectScreenFloat(camPos); + const uint2 screenPos = make_uint2(make_int2(screenPosf)); + + const unsigned int cx = screenPos.x; + const unsigned int cy = screenPos.y; + + if (cx < depth_in.width() && cy < depth_in.height()) { + if (depth_in(cx,cy) == (int)(d * 1000.0f)) { + colour_out(cx,cy) = frag.colour; + normal_out(cx,cy) = make_float4(frag.normal, 0.0f); + } + } +} + +/** + * Estimate the point set surface location near to a given point. + */ +template <int R> +__device__ float3 screen_centroid( + cudaTextureObject_t pointset, + const float2 &suv, + const int2 &uv, + const ftl::render::SplatParams ¶ms, + float smoothing) { + + float3 pos = make_float3(0.0f, 0.0f, 0.0f); + float weights = 0.0f; + + //#pragma unroll + for (int v=-R; v<=R; ++v) { + for (int u=-R; u<=R; ++u) { + //if (uv.x+u >= 0 && uv.x+u < pointset.width() && uv.y+v >= 0 && uv.y+v < pointset.height()) { + const float3 samplePoint = params.m_viewMatrix * make_float3(tex2D<float4>(pointset, uv.x+u, uv.y+v)); + const float weight = ftl::cuda::spatialWeighting(length(suv - params.camera.cameraToKinectScreenFloat(samplePoint)), smoothing); + pos += weight*samplePoint; + weights += weight; + //} + } + } + + if (weights > 0.0f) pos = pos / weights; + return pos; +} + +/** + * Estimate a point set surface point from an existing point in the set. + */ +template <int R> +__device__ float mls_point_surface( + cudaTextureObject_t pointset, + const int2 &uv, + float3 &estPoint, + float smoothing) { + + float3 pos = make_float3(0.0f, 0.0f, 0.0f); + float weights = 0.0f; + const float3 nearPoint = make_float3(tex2D<float4>(pointset, uv.x, uv.y)); + + //#pragma unroll + for (int v=-R; v<=R; ++v) { + for (int u=-R; u<=R; ++u) { + //if (uv.x+u >= 0 && uv.x+u < pointset.width() && uv.y+v >= 0 && uv.y+v < pointset.height()) { + const float3 samplePoint = make_float3(tex2D<float4>(pointset, uv.x+u, uv.y+v)); + const float weight = ftl::cuda::spatialWeighting(length(nearPoint - samplePoint), smoothing); + pos += weight*samplePoint; + weights += weight; + //} + } + } + + if (weights > 0.0f) estPoint = pos / weights; + return weights; +}; + +/** + * Estimate the point set surface location near to a given point. + */ +template <int R> +__device__ float mls_point_surface( + cudaTextureObject_t pointset, + const int2 &uv, + const float3 &nearPoint, + float3 &estPoint, + float smoothing) { + + float3 pos = make_float3(0.0f, 0.0f, 0.0f); + float weights = 0.0f; + + //#pragma unroll + for (int v=-R; v<=R; ++v) { + for (int u=-R; u<=R; ++u) { + //if (uv.x+u >= 0 && uv.x+u < pointset.width() && uv.y+v >= 0 && uv.y+v < pointset.height()) { + const float3 samplePoint = make_float3(tex2D<float4>(pointset, uv.x+u, uv.y+v)); + const float weight = ftl::cuda::spatialWeighting(length(nearPoint - samplePoint), smoothing); + pos += weight*samplePoint; + weights += weight; + //} + } + } + + if (weights > 0.0f) estPoint = pos / weights; + return weights; +} + +/** + * Calculate the point sample energy. + */ +template <int R> +__device__ float mls_point_energy( + cudaTextureObject_t pointset, + const int2 &uv, + const float3 &nearPoint, + float smoothing) { + + float weights = 0.0f; + + //#pragma unroll + for (int v=-R; v<=R; ++v) { + for (int u=-R; u<=R; ++u) { + //if (uv.x+u >= 0 && uv.x+u < pointset.width() && uv.y+v >= 0 && uv.y+v < pointset.height()) { + const float3 samplePoint = make_float3(tex2D<float4>(pointset, uv.x+u, uv.y+v)); + const float weight = ftl::cuda::spatialWeighting(length(nearPoint - samplePoint), smoothing); + weights += weight; + //} + } + } + + return weights; +} + +/** + * Calculate the point sample energy. + */ +template <int M> +__device__ float mls_point_energy( + const float3 (&pointset)[M], + const float3 &nearPoint, + float smoothing) { + + float weights = 0.0f; + + //#pragma unroll + for (int i=0; i<M; ++i) { + const float3 samplePoint = pointset[i]; + const float weight = ftl::cuda::spatialWeighting(length(nearPoint - samplePoint), smoothing); + weights += weight; + } + + return weights; +} + +/** + * Estimate a point set surface location near an existing and return also + * an estimate of the normal and colour of that point. + */ +template <int R> +__device__ float mls_point_surface( + cudaTextureObject_t pointset, + cudaTextureObject_t normalset, + cudaTextureObject_t colourset, + const int2 &uv, + float3 &estPoint, + float3 &estNormal, + uchar4 &estColour, + float smoothing) { + + float3 pos = make_float3(0.0f, 0.0f, 0.0f); + float3 normal = make_float3(0.0f, 0.0f, 0.0f); + float3 colour = make_float3(0.0f, 0.0f, 0.0f); + float weights = 0.0f; + const float3 nearPoint = make_float3(tex2D<float4>(pointset, uv.x, uv.y)); + + //#pragma unroll + for (int v=-R; v<=R; ++v) { + for (int u=-R; u<=R; ++u) { + //if (uv.x+u >= 0 && uv.x+u < pointset.width() && uv.y+v >= 0 && uv.y+v < pointset.height()) { + const float3 samplePoint = make_float3(tex2D<float4>(pointset, uv.x+u, uv.y+v)); + const float weight = spatialWeighting(length(nearPoint - samplePoint), smoothing); + + if (weight > 0.0f) { + pos += weight*samplePoint; + weights += weight; + + normal += weight * make_float3(tex2D<float4>(normalset, uv.x+u, uv.y+v)); + const uchar4 c = tex2D<uchar4>(colourset, uv.x+u, uv.y+v); + colour += weight * make_float3(c.x, c.y, c.z); + } + //} + } + } + + if (weights > 0.0f) { + estPoint = pos / weights; + estNormal = normal / weights; + estColour = make_uchar4(colour.x / weights, colour.y / weights, colour.z / weights, 255); + } + return weights; +} + +/** + * Estimate a point set surface location near a given point and return also + * an estimate of the normal and colour of that point. + */ +template <int R> +__device__ float mls_point_surface( + cudaTextureObject_t pointset, + cudaTextureObject_t normalset, + cudaTextureObject_t colourset, + const int2 &uv, + const float3 &nearPoint, + float3 &estPoint, + float3 &estNormal, + uchar4 &estColour, + float smoothing) { + + float3 pos = make_float3(0.0f, 0.0f, 0.0f); + float3 normal = make_float3(0.0f, 0.0f, 0.0f); + float3 colour = make_float3(0.0f, 0.0f, 0.0f); + float weights = 0.0f; + + //#pragma unroll + for (int v=-R; v<=R; ++v) { + for (int u=-R; u<=R; ++u) { + //if (uv.x+u >= 0 && uv.x+u < pointset.width() && uv.y+v >= 0 && uv.y+v < pointset.height()) { + const float3 samplePoint = make_float3(tex2D<float4>(pointset, uv.x+u, uv.y+v)); + const float weight = spatialWeighting(length(nearPoint - samplePoint), smoothing); + + if (weight > 0.0f) { + pos += weight*samplePoint; + weights += weight; + + normal += weight * make_float3(tex2D<float4>(normalset, uv.x+u, uv.y+v)); + const uchar4 c = tex2D<uchar4>(colourset, uv.x+u, uv.y+v); + colour += weight * make_float3(c.x, c.y, c.z); + } + //} + } + } + + if (weights > 0.0f) { + estPoint = pos / weights; + estNormal = normal / weights; + estColour = make_uchar4(colour.x / weights, colour.y / weights, colour.z / weights, 255); + } + return weights; +} + +} +} + +#endif // _FTL_MLS_CUDA_HPP_ diff --git a/applications/reconstruct/src/splat_params.hpp b/applications/reconstruct/src/splat_params.hpp index e38e4447286bf6c70f6c753deed35154b6aafd8a..cb2d3febda3364a3407264711620a25f9dc116a1 100644 --- a/applications/reconstruct/src/splat_params.hpp +++ b/applications/reconstruct/src/splat_params.hpp @@ -9,6 +9,7 @@ namespace ftl { namespace render { static const uint kShowBlockBorders = 0x0001; +static const uint kNoSplatting = 0x0002; struct __align__(16) SplatParams { float4x4 m_viewMatrix; @@ -16,6 +17,7 @@ struct __align__(16) SplatParams { uint m_flags; float voxelSize; + float depthThreshold; DepthCameraParams camera; }; diff --git a/applications/reconstruct/src/splat_render.cpp b/applications/reconstruct/src/splat_render.cpp index 92d70a1c50d06e985fcb0faa1ef8f0bd5b961f86..1f7a18f66af2a0e6cd30fac3e4c139355028f6db 100644 --- a/applications/reconstruct/src/splat_render.cpp +++ b/applications/reconstruct/src/splat_render.cpp @@ -24,9 +24,18 @@ void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) { if ((unsigned int)depth1_.width() != camera.width || (unsigned int)depth1_.height() != camera.height) { depth1_ = ftl::cuda::TextureObject<int>(camera.width, camera.height); } + if ((unsigned int)depth3_.width() != camera.width || (unsigned int)depth3_.height() != camera.height) { + depth3_ = ftl::cuda::TextureObject<int>(camera.width, camera.height); + } if ((unsigned int)colour1_.width() != camera.width || (unsigned int)colour1_.height() != camera.height) { colour1_ = ftl::cuda::TextureObject<uchar4>(camera.width, camera.height); } + if ((unsigned int)colour_tmp_.width() != camera.width || (unsigned int)colour_tmp_.height() != camera.height) { + colour_tmp_ = ftl::cuda::TextureObject<float4>(camera.width, camera.height); + } + if ((unsigned int)normal1_.width() != camera.width || (unsigned int)normal1_.height() != camera.height) { + normal1_ = ftl::cuda::TextureObject<float4>(camera.width, camera.height); + } if ((unsigned int)depth2_.width() != camera.width || (unsigned int)depth2_.height() != camera.height) { depth2_ = ftl::cuda::TextureObject<float>(camera.width, camera.height); } @@ -56,30 +65,31 @@ void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) { if (scene_->value("voxels", false)) { // TODO:(Nick) Stereo for voxel version ftl::cuda::isosurface_point_image(scene_->getHashData(), depth1_, params, stream); - ftl::cuda::splat_points(depth1_, depth2_, params, stream); - ftl::cuda::dibr(depth2_, colour1_, scene_->cameraCount(), params, stream); + //ftl::cuda::splat_points(depth1_, depth2_, params, stream); + //ftl::cuda::dibr(depth2_, colour1_, scene_->cameraCount(), params, stream); src->writeFrames(colour1_, depth2_, stream); } else { - //ftl::cuda::clear_colour(colour1_, stream); ftl::cuda::clear_depth(depth1_, stream); + ftl::cuda::clear_depth(depth3_, stream); ftl::cuda::clear_depth(depth2_, stream); - ftl::cuda::dibr(depth1_, colour1_, scene_->cameraCount(), params, stream); - //ftl::cuda::hole_fill(depth1_, depth2_, params.camera, stream); + ftl::cuda::clear_colour(colour2_, stream); + ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, scene_->cameraCount(), params, stream); - // Splat the depth from first DIBR to expand the points - ftl::cuda::splat_points(depth1_, depth2_, params, stream); + // Step 1: Put all points into virtual view to gather them + //ftl::cuda::dibr_raw(depth1_, scene_->cameraCount(), params, stream); - // Alternative to above... - //ftl::cuda::mls_resample(depth1_, colour1_, depth2_, scene_->getHashParams(), scene_->cameraCount(), params, stream); - - - // Use reverse sampling to obtain more colour details - // Should use nearest neighbor point depths to verify which camera to use - //ftl::cuda::dibr(depth2_, colour1_, scene_->cameraCount(), params, stream); + // Step 2: For each point, use a warp to do MLS and up sample + //ftl::cuda::mls_render_depth(depth1_, depth3_, params, scene_->cameraCount(), stream); if (src->getChannel() == ftl::rgbd::kChanDepth) { - ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); - src->writeFrames(colour1_, depth2_, stream); + //ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); + if (src->value("splatting", false)) { + //ftl::cuda::splat_points(depth1_, colour1_, normal1_, depth2_, colour2_, params, stream); + src->writeFrames(colour2_, depth2_, stream); + } else { + ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); + src->writeFrames(colour1_, depth2_, stream); + } } else if (src->getChannel() == ftl::rgbd::kChanRight) { // Adjust pose to right eye position Eigen::Affine3f transform(Eigen::Translation3f(camera.baseline,0.0f,0.0f)); @@ -88,10 +98,16 @@ void Splatter::render(ftl::rgbd::Source *src, cudaStream_t stream) { params.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix); ftl::cuda::clear_depth(depth1_, stream); - ftl::cuda::dibr(depth1_, colour2_, scene_->cameraCount(), params, stream); + ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, scene_->cameraCount(), params, stream); src->writeFrames(colour1_, colour2_, stream); } else { - src->writeFrames(colour1_, depth2_, stream); + if (src->value("splatting", false)) { + //ftl::cuda::splat_points(depth1_, colour1_, normal1_, depth2_, colour2_, params, stream); + src->writeFrames(colour2_, depth2_, stream); + } else { + ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); + src->writeFrames(colour1_, depth2_, stream); + } } } diff --git a/applications/reconstruct/src/splat_render.hpp b/applications/reconstruct/src/splat_render.hpp index 7737bb5d903d725a55b81d75812b99b52e7ace66..7511e859ff6471dcba18d5aee5518f55045eef9b 100644 --- a/applications/reconstruct/src/splat_render.hpp +++ b/applications/reconstruct/src/splat_render.hpp @@ -33,9 +33,12 @@ class Splatter { private: int device_; ftl::cuda::TextureObject<int> depth1_; + ftl::cuda::TextureObject<int> depth3_; ftl::cuda::TextureObject<uchar4> colour1_; + ftl::cuda::TextureObject<float4> colour_tmp_; ftl::cuda::TextureObject<float> depth2_; ftl::cuda::TextureObject<uchar4> colour2_; + ftl::cuda::TextureObject<float4> normal1_; SplatParams params_; ftl::voxhash::SceneRep *scene_; }; diff --git a/applications/reconstruct/src/splat_render_cuda.hpp b/applications/reconstruct/src/splat_render_cuda.hpp index d8fd9cf55649280c20805c8d4f8b9f7b758b2223..f8cbdbb6bd87f7ea9e2275e000ad75de87e9dd11 100644 --- a/applications/reconstruct/src/splat_render_cuda.hpp +++ b/applications/reconstruct/src/splat_render_cuda.hpp @@ -10,6 +10,80 @@ namespace ftl { namespace cuda { +__device__ inline bool intersectPlane(const float3 &n, const float3 &p0, const float3 &l0, const float3 &l, float &t) { + // assuming vectors are all normalized + float denom = dot(n, l); + if (denom > 1e-6) { + t = dot(p0 - l0, n) / denom; + return (t >= 0); + } + + return false; +} + +__device__ inline bool intersectPlane(const float3 &n, const float3 &p0, const float3 &l, float &t) { + // assuming vectors are all normalized + float denom = dot(n, l); + if (denom > 1e-6) { + t = dot(p0, n) / denom; + return (t >= 0); + } + return false; +} + +__device__ inline bool intersectDisk(const float3 &n, const float3 &p0, float radius, const float3 &l0, const float3 &l) { + float t = 0; + if (intersectPlane(n, p0, l0, l, t)) { + float3 p = l0 + l * t; + float3 v = p - p0; + float d2 = dot(v, v); + return (sqrt(d2) <= radius); + // or you can use the following optimisation (and precompute radius^2) + // return d2 <= radius2; // where radius2 = radius * radius + } + return false; +} + +/** + * Get the radius of a ray intersection with a disk. + * @param n Normalised normal of disk. + * @param p0 Centre of disk in camera space + * @param l Normalised ray direction in camera space + * @return Radius from centre of disk where intersection occurred. + */ +__device__ inline float intersectDistance(const float3 &n, const float3 &p0, const float3 &l0, const float3 &l) { + float t = 0; + if (intersectPlane(n, p0, l0, l, t)) { + const float3 p = l0 + l * t; + const float3 v = p - p0; + const float d2 = dot(v, v); + return sqrt(d2); + // or you can use the following optimisation (and precompute radius^2) + // return d2 <= radius2; // where radius2 = radius * radius + } + return PINF; +} + +/** + * Get the radius of a ray intersection with a disk. + * @param n Normalised normal of disk. + * @param p0 Centre of disk in camera space + * @param l Normalised ray direction in camera space + * @return Radius from centre of disk where intersection occurred. + */ +__device__ inline float intersectDistance(const float3 &n, const float3 &p0, const float3 &l) { + float t = 0; + if (intersectPlane(n, p0, l, t)) { + const float3 p = l * t; + const float3 v = p - p0; + const float d2 = dot(v, v); + return sqrt(d2); + // or you can use the following optimisation (and precompute radius^2) + // return d2 <= radius2; // where radius2 = radius * radius + } + return PINF; +} + /** * NOTE: Not strictly isosurface currently since it includes the internals * of objects up to at most truncation depth. @@ -26,11 +100,22 @@ void isosurface_point_image(const ftl::voxhash::HashData& hashData, // TODO: isosurface_point_cloud void splat_points(const ftl::cuda::TextureObject<int> &depth_in, + const ftl::cuda::TextureObject<uchar4> &colour_in, + const ftl::cuda::TextureObject<float4> &normal_in, const ftl::cuda::TextureObject<float> &depth_out, - const ftl::render::SplatParams ¶ms, cudaStream_t stream); + const ftl::cuda::TextureObject<uchar4> &colour_out, const ftl::render::SplatParams ¶ms, cudaStream_t stream); void dibr(const ftl::cuda::TextureObject<int> &depth_out, - const ftl::cuda::TextureObject<uchar4> &colour_out, int numcams, + const ftl::cuda::TextureObject<uchar4> &colour_out, + const ftl::cuda::TextureObject<float4> &normal_out, + const ftl::cuda::TextureObject<float> &confidence_out, + const ftl::cuda::TextureObject<float4> &tmp_colour, int numcams, + const ftl::render::SplatParams ¶ms, cudaStream_t stream); + +/** + * Directly render input depth maps to virtual view with clipping. + */ +void dibr_raw(const ftl::cuda::TextureObject<int> &depth_out, int numcams, const ftl::render::SplatParams ¶ms, cudaStream_t stream); void dibr(const ftl::cuda::TextureObject<float> &depth_out, diff --git a/applications/reconstruct/src/voxel_hash.cpp b/applications/reconstruct/src/voxel_hash.cpp index 14cb75078652001275404e87c09f77f0b9d19385..6f929c746d66cae5c382bf15b2d25e26f6b46702 100644 --- a/applications/reconstruct/src/voxel_hash.cpp +++ b/applications/reconstruct/src/voxel_hash.cpp @@ -1,4 +1,5 @@ #include <ftl/voxel_hash.hpp> +#include <loguru.hpp> using ftl::voxhash::HashData; using ftl::voxhash::HashParams; diff --git a/applications/reconstruct/src/splat_render.cu b/applications/reconstruct/src/voxel_render.cu similarity index 65% rename from applications/reconstruct/src/splat_render.cu rename to applications/reconstruct/src/voxel_render.cu index 3addf108dcf89ea8f3069e3ba0daea59e67ae918..46eb6f57c031b0541add1dcd982a9c6d1e690140 100644 --- a/applications/reconstruct/src/splat_render.cu +++ b/applications/reconstruct/src/voxel_render.cu @@ -258,143 +258,4 @@ void ftl::cuda::isosurface_point_image(const ftl::voxhash::HashData& hashData, #endif } -// ---- Pass 2: Expand the point splats ---------------------------------------- - -#define SPLAT_RADIUS 7 -#define SPLAT_BOUNDS (2*SPLAT_RADIUS+T_PER_BLOCK+1) -#define SPLAT_BUFFER_SIZE (SPLAT_BOUNDS*SPLAT_BOUNDS) -#define MAX_VALID 100 - -__device__ float distance2(float3 a, float3 b) { - const float x = a.x-b.x; - const float y = a.y-b.y; - const float z = a.z-b.z; - return x*x+y*y+z*z; -} - -__global__ void splatting_kernel( - TextureObject<int> depth_in, - TextureObject<float> depth_out, SplatParams params) { - // Read an NxN region and - // - interpolate a depth value for this pixel - // - interpolate an rgb value for this pixel - // Must respect depth discontinuities. - // How much influence a given neighbour has depends on its depth value - - __shared__ float3 positions[SPLAT_BUFFER_SIZE]; - - const int i = threadIdx.y*blockDim.y + threadIdx.x; - const int bx = blockIdx.x*blockDim.x; - const int by = blockIdx.y*blockDim.y; - const int x = bx + threadIdx.x; - const int y = by + threadIdx.y; - - // const float camMinDepth = params.camera.m_sensorDepthWorldMin; - // const float camMaxDepth = params.camera.m_sensorDepthWorldMax; - - for (int j=i; j<SPLAT_BUFFER_SIZE; j+=T_PER_BLOCK) { - const unsigned int sx = (j % SPLAT_BOUNDS)+bx-SPLAT_RADIUS; - const unsigned int sy = (j / SPLAT_BOUNDS)+by-SPLAT_RADIUS; - if (sx >= depth_in.width() || sy >= depth_in.height()) { - positions[j] = make_float3(1000.0f,1000.0f, 1000.0f); - } else { - positions[j] = params.camera.kinectDepthToSkeleton(sx, sy, (float)depth_in.tex2D((int)sx,(int)sy) / 1000.0f); - } - } - __syncthreads(); - - if (x >= depth_in.width() || y >= depth_in.height()) return; - - const float voxelSquared = params.voxelSize*params.voxelSize; - float mindepth = 1000.0f; - int minidx = -1; - // float3 minpos; - - //float3 validPos[MAX_VALID]; - int validIndices[MAX_VALID]; - int validix = 0; - - for (int v=-SPLAT_RADIUS; v<=SPLAT_RADIUS; ++v) { - for (int u=-SPLAT_RADIUS; u<=SPLAT_RADIUS; ++u) { - //const int idx = (threadIdx.y+v)*SPLAT_BOUNDS+threadIdx.x+u; - const int idx = (threadIdx.y+v+SPLAT_RADIUS)*SPLAT_BOUNDS+threadIdx.x+u+SPLAT_RADIUS; - - float3 posp = positions[idx]; - const float d = posp.z; - //if (d < camMinDepth || d > camMaxDepth) continue; - - float3 pos = params.camera.kinectDepthToSkeleton(x, y, d); - float dist = distance2(pos, posp); - - if (dist < voxelSquared) { - // Valid so check for minimum - //validPos[validix] = pos; - //validIndices[validix++] = idx; - if (d < mindepth) { - mindepth = d; - minidx = idx; - // minpos = pos; - } - } - } - } - - if (minidx == -1) { - depth_out(x,y) = 0.0f; - //colour_out(x,y) = make_uchar4(76,76,82,255); - return; - } - - //float3 colour = make_float3(0.0f, 0.0f, 0.0f); - float depth = 0.0f; - float contrib = 0.0f; - float3 pos = params.camera.kinectDepthToSkeleton(x, y, mindepth); // TODO:(Nick) Mindepth assumption is poor choice. - - //for (int j=0; j<validix; ++j) { - const int idx = minidx; //validIndices[j]; - float3 posp = positions[idx]; - //float3 pos = params.camera.kinectDepthToSkeleton(x, y, posp.z); - float3 delta = (posp - pos) / 2*params.voxelSize; - float dist = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; - - // It contributes to pixel - if (dist < 1.0f && fabs(posp.z - mindepth) < 2*params.voxelSize) { - const unsigned int sx = (idx % SPLAT_BOUNDS)+bx-SPLAT_RADIUS; - const unsigned int sy = (idx / SPLAT_BOUNDS)+by-SPLAT_RADIUS; - - // Fast and simple trilinear interpolation - float c = fabs((1.0f - delta.x) * (1.0f - delta.y) * (1.0f - delta.z)); - //uchar4 col = colour_in.tex2D((int)sx, (int)sy); - //colour.x += col.x*c; - //colour.y += col.y*c; - //colour.z += col.z*c; - contrib += c; - depth += posp.z * c; - } - //} - - // Normalise - //colour.x /= contrib; - //colour.y /= contrib; - //colour.z /= contrib; - depth /= contrib; - - depth_out(x,y) = depth; - //colour_out(x,y) = make_uchar4(colour.x, colour.y, colour.z, 255); -} - -void ftl::cuda::splat_points(const TextureObject<int> &depth_in, - const TextureObject<float> &depth_out, const SplatParams ¶ms, cudaStream_t stream) -{ - - const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - splatting_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, params); - cudaSafeCall( cudaGetLastError() ); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); -#endif -} diff --git a/components/common/cpp/include/ftl/configurable.hpp b/components/common/cpp/include/ftl/configurable.hpp index 5bae67b4a9972d5f69d0947871d5be259df36be3..94a080eaed480fa2e57b113e9c8c3d7bc04388bd 100644 --- a/components/common/cpp/include/ftl/configurable.hpp +++ b/components/common/cpp/include/ftl/configurable.hpp @@ -68,7 +68,9 @@ class Configurable { template <typename T> T value(const std::string &name, T def) { auto r = get<T>(name); - return (r) ? *r : def; + if (r) return *r; + (*config_)[name] = def; + return def; } /** diff --git a/components/net/cpp/src/universe.cpp b/components/net/cpp/src/universe.cpp index 8c50709e2d6253f73849f5560c72eee9a6ff5b83..402c5bed22cc394fd08251f2d2625983b89ed48e 100644 --- a/components/net/cpp/src/universe.cpp +++ b/components/net/cpp/src/universe.cpp @@ -138,9 +138,7 @@ int Universe::_setDescriptors() { n = s->_socket(); } - //if (s->isWaiting()) { - FD_SET(s->_socket(), &sfdread_); - //} + FD_SET(s->_socket(), &sfdread_); FD_SET(s->_socket(), &sfderror_); } } @@ -154,17 +152,7 @@ void Universe::_installBindings(Peer *p) { } void Universe::_installBindings() { - /*bind("__subscribe__", [this](const UUID &id, const string &uri) -> bool { - LOG(INFO) << "Subscription to " << uri << " by " << id.to_string(); - unique_lock<shared_mutex> lk(net_mutex_); - subscribers_[ftl::URI(uri).to_string()].push_back(id); - return true; - }); - - bind("__owner__", [this](const std::string &res) -> optional<UUID> { - if (owned_.count(res) > 0) return this_peer; - else return {}; - });*/ + } // Note: should be called inside a net lock @@ -173,6 +161,8 @@ void Universe::_cleanupPeers() { if (ftl::pool.n_idle() == ftl::pool.size()) { if (garbage_.size() > 0) LOG(INFO) << "Garbage collection"; while (garbage_.size() > 0) { + // FIXME: There is possibly still something with a peer pointer + // that is causing this throw an exception sometimes? delete garbage_.front(); garbage_.pop_front(); } @@ -287,8 +277,8 @@ void Universe::_run() { continue; } - // CHECK Could this mutex be the problem!? { + // TODO:(Nick) Shared lock unless connection is made UNIQUE_LOCK(net_mutex_,lk); //If connection request is waiting @@ -304,7 +294,7 @@ void Universe::_run() { if (csock != INVALID_SOCKET) { auto p = new Peer(csock, this, &disp_); peers_.push_back(p); - _installBindings(p); + //_installBindings(p); } } } diff --git a/components/rgbd-sources/include/ftl/rgbd/streamer.hpp b/components/rgbd-sources/include/ftl/rgbd/streamer.hpp index 0143bc0b0a030e509629e5bb6185dd553d1c9683..d2beacb2105c38dfa7d3c8317f2e6512cb07c570 100644 --- a/components/rgbd-sources/include/ftl/rgbd/streamer.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/streamer.hpp @@ -29,7 +29,9 @@ struct StreamClient { static const unsigned int kGrabbed = 0x1; static const unsigned int kRGB = 0x2; -static const unsigned int kDepth = 0x4; +static const unsigned int kDepth = 0x4; + +static const unsigned int kFrameDropLimit = 5; struct StreamSource { ftl::rgbd::Source *src; @@ -119,12 +121,19 @@ class Streamer : public ftl::Configurable { int64_t last_frame_; int64_t frame_no_; + int64_t mspf_; + float actual_fps_; + //int64_t last_dropped_; + //int drop_count_; + void _schedule(); + void _schedule(detail::StreamSource *); void _swap(detail::StreamSource *); void _addClient(const std::string &source, int N, int rate, const ftl::UUID &peer, const std::string &dest); void _encodeAndTransmit(detail::StreamSource *src, int chunk); void _encodeChannel1(const cv::Mat &in, std::vector<unsigned char> &out, unsigned int b); bool _encodeChannel2(const cv::Mat &in, std::vector<unsigned char> &out, ftl::rgbd::channel_t c, unsigned int b); + void _decideFrameRate(int64_t framesdropped, int64_t msremainder); }; } diff --git a/components/rgbd-sources/src/calibrate.cpp b/components/rgbd-sources/src/calibrate.cpp index acc580e2aefdd63a3531e168e02b69c06aa855ca..c31fea5c5c2b7885b450a0ebcd51f6e0540dd67f 100644 --- a/components/rgbd-sources/src/calibrate.cpp +++ b/components/rgbd-sources/src/calibrate.cpp @@ -159,7 +159,6 @@ void Calibrate::_updateIntrinsics() { // TODO: pass parameters? Mat R1, R2, P1, P2; - std::pair<Mat, Mat> map1, map2; ftl::rgbd::Camera params(); if (this->value("use_intrinsics", true)) { @@ -182,14 +181,14 @@ void Calibrate::_updateIntrinsics() { C_l_ = P1; C_r_ = P2; - initUndistortRectifyMap(M1_, D1_, R1, P1, img_size_, CV_32FC1, map1.first, map2.first); - initUndistortRectifyMap(M2_, D2_, R2, P2, img_size_, CV_32FC1, map1.second, map2.second); + initUndistortRectifyMap(M1_, D1_, R1, P1, img_size_, CV_32FC1, map1_.first, map2_.first); + initUndistortRectifyMap(M2_, D2_, R2, P2, img_size_, CV_32FC1, map1_.second, map2_.second); // CHECK Is this thread safe!!!! - map1_gpu_.first.upload(map1.first); - map1_gpu_.second.upload(map1.second); - map2_gpu_.first.upload(map2.first); - map2_gpu_.second.upload(map2.second); + map1_gpu_.first.upload(map1_.first); + map1_gpu_.second.upload(map1_.second); + map2_gpu_.first.upload(map2_.first); + map2_gpu_.second.upload(map2_.second); } void Calibrate::rectifyStereo(GpuMat &l, GpuMat &r, Stream &stream) { @@ -204,6 +203,21 @@ void Calibrate::rectifyStereo(GpuMat &l, GpuMat &r, Stream &stream) { r = r_tmp; } +void Calibrate::rectifyStereo(cv::Mat &l, cv::Mat &r) { + // cv::cuda::remap() can not use same Mat for input and output + + cv::remap(l, l, map1_.first, map2_.first, cv::INTER_LINEAR, 0, cv::Scalar()); + cv::remap(r, r, map1_.second, map2_.second, cv::INTER_LINEAR, 0, cv::Scalar()); + + /*GpuMat l_tmp(l.size(), l.type()); + GpuMat r_tmp(r.size(), r.type()); + cv::cuda::remap(l, l_tmp, map1_gpu_.first, map2_gpu_.first, cv::INTER_LINEAR, 0, cv::Scalar(), stream); + cv::cuda::remap(r, r_tmp, map1_gpu_.second, map2_gpu_.second, cv::INTER_LINEAR, 0, cv::Scalar(), stream); + stream.waitForCompletion(); + l = l_tmp; + r = r_tmp;*/ +} + bool Calibrate::isCalibrated() { return calibrated_; } \ No newline at end of file diff --git a/components/rgbd-sources/src/calibrate.hpp b/components/rgbd-sources/src/calibrate.hpp index c5f4786831edb77435a4d205702e7909517829a9..7f6b262c1e7afcd2852f65a6d62333b5389f7615 100644 --- a/components/rgbd-sources/src/calibrate.hpp +++ b/components/rgbd-sources/src/calibrate.hpp @@ -40,6 +40,11 @@ class Calibrate : public ftl::Configurable { */ void rectifyStereo(cv::cuda::GpuMat &l, cv::cuda::GpuMat &r, cv::cuda::Stream &stream); + /** + * Rectify and remove distortions from from images l and r using cv::remap() + */ + void rectifyStereo(cv::Mat &l, cv::Mat &r); + bool isCalibrated(); void updateCalibration(const ftl::rgbd::Camera &p); @@ -60,6 +65,8 @@ private: private: bool calibrated_; + std::pair<cv::Mat, cv::Mat> map1_; + std::pair<cv::Mat, cv::Mat> map2_; std::pair<cv::cuda::GpuMat, cv::cuda::GpuMat> map1_gpu_; std::pair<cv::cuda::GpuMat, cv::cuda::GpuMat> map2_gpu_; diff --git a/components/rgbd-sources/src/local.cpp b/components/rgbd-sources/src/local.cpp index 410cb9f96c0a1c1ca48cadd52300e25b260c3aad..cf5521352d4689798cfc12ada0787f27db252c89 100644 --- a/components/rgbd-sources/src/local.cpp +++ b/components/rgbd-sources/src/local.cpp @@ -9,11 +9,13 @@ #include <thread> #include "local.hpp" +#include "calibrate.hpp" #include <opencv2/core.hpp> #include <opencv2/opencv.hpp> #include <opencv2/xphoto.hpp> using ftl::rgbd::detail::LocalSource; +using ftl::rgbd::detail::Calibrate; using cv::Mat; using cv::VideoCapture; using cv::Rect; @@ -27,28 +29,15 @@ using std::this_thread::sleep_for; LocalSource::LocalSource(nlohmann::json &config) : Configurable(config), timestamp_(0.0) { - REQUIRED({ - {"flip","Switch left and right views","boolean"}, - {"flip_vert","Rotate image 180 degrees","boolean"}, - {"nostereo","Force single camera mode","boolean"}, - {"width","Pixel width of camera source","number"}, - {"height","Pixel height of camera source","number"}, - {"max_fps","Maximum frames per second","number"}, - {"scale","Change the input image or video scaling","number"} - }); - - flip_ = value("flip", false); - flip_v_ = value("flip_vert", false); nostereo_ = value("nostereo", false); - downsize_ = value("scale", 1.0f); // Use cameras camera_a_ = new VideoCapture; LOG(INFO) << "Cameras check... "; - camera_a_->open((flip_) ? 1 : 0); + camera_a_->open(0); if (!nostereo_) { - camera_b_ = new VideoCapture((flip_) ? 0 : 1); + camera_b_ = new VideoCapture(1); } else { camera_b_ = nullptr; } @@ -82,26 +71,18 @@ LocalSource::LocalSource(nlohmann::json &config) stereo_ = true; } - tps_ = 1.0 / value("max_fps", 25.0); + // Allocate page locked host memory for fast GPU transfer + left_hm_ = cv::cuda::HostMem(height_, width_, CV_8UC3); + right_hm_ = cv::cuda::HostMem(height_, width_, CV_8UC3); } LocalSource::LocalSource(nlohmann::json &config, const string &vid) : Configurable(config), timestamp_(0.0) { - REQUIRED({ - {"flip","Switch left and right views","boolean"}, - {"flip_vert","Rotate image 180 degrees","boolean"}, - {"nostereo","Force single camera mode","boolean"}, - {"width","Pixel width of camera source","number"}, - {"height","Pixel height of camera source","number"}, - {"max_fps","Maximum frames per second","number"}, - {"scale","Change the input image or video scaling","number"} - }); - - flip_ = value("flip", false); - flip_v_ = value("flip_vert", false); + //flip_ = value("flip", false); + //flip_v_ = value("flip_vert", false); nostereo_ = value("nostereo", false); - downsize_ = value("scale", 1.0f); + //downsize_ = value("scale", 1.0f); if (vid == "") { LOG(FATAL) << "No video file specified"; @@ -138,10 +119,14 @@ LocalSource::LocalSource(nlohmann::json &config, const string &vid) stereo_ = false; } - tps_ = 1.0 / value("max_fps", 25.0); + // Allocate page locked host memory for fast GPU transfer + left_hm_ = cv::cuda::HostMem(height_, width_, CV_8UC3); + right_hm_ = cv::cuda::HostMem(height_, width_, CV_8UC3); + + //tps_ = 1.0 / value("max_fps", 25.0); } -bool LocalSource::left(cv::Mat &l) { +/*bool LocalSource::left(cv::Mat &l) { if (!camera_a_) return false; if (!camera_a_->grab()) { @@ -174,9 +159,9 @@ bool LocalSource::left(cv::Mat &l) { } return true; -} +}*/ -bool LocalSource::right(cv::Mat &r) { +/*bool LocalSource::right(cv::Mat &r) { if (!camera_a_->grab()) { LOG(ERROR) << "Unable to grab from camera A"; return false; @@ -212,10 +197,15 @@ bool LocalSource::right(cv::Mat &r) { } return true; -} +}*/ -bool LocalSource::get(cv::cuda::GpuMat &l_out, cv::cuda::GpuMat &r_out, cv::cuda::Stream &stream) { +bool LocalSource::get(cv::cuda::GpuMat &l_out, cv::cuda::GpuMat &r_out, Calibrate *c, cv::cuda::Stream &stream) { Mat l, r; + + // Use page locked memory + l = left_hm_.createMatHeader(); + r = right_hm_.createMatHeader(); + if (!camera_a_) return false; if (!camera_a_->grab()) { @@ -239,7 +229,7 @@ bool LocalSource::get(cv::cuda::GpuMat &l_out, cv::cuda::GpuMat &r_out, cv::cuda timestamp_ = timestamp; if (camera_b_ || !stereo_) { - if (!camera_a_->retrieve(left_)) { + if (!camera_a_->retrieve(l)) { LOG(ERROR) << "Unable to read frame from camera A"; return false; } @@ -255,23 +245,23 @@ bool LocalSource::get(cv::cuda::GpuMat &l_out, cv::cuda::GpuMat &r_out, cv::cuda } int resx = frame.cols / 2; - if (flip_) { - r = Mat(frame, Rect(0, 0, resx, frame.rows)); - left_ = Mat(frame, Rect(resx, 0, frame.cols-resx, frame.rows)); - } else { - left_ = Mat(frame, Rect(0, 0, resx, frame.rows)); + //if (flip_) { + // r = Mat(frame, Rect(0, 0, resx, frame.rows)); + // l = Mat(frame, Rect(resx, 0, frame.cols-resx, frame.rows)); + //} else { + l = Mat(frame, Rect(0, 0, resx, frame.rows)); r = Mat(frame, Rect(resx, 0, frame.cols-resx, frame.rows)); - } + //} } - if (downsize_ != 1.0f) { + /*if (downsize_ != 1.0f) { // cv::cuda::resize() cv::resize(left_, left_, cv::Size((int)(left_.cols * downsize_), (int)(left_.rows * downsize_)), 0, 0, cv::INTER_LINEAR); cv::resize(r, r, cv::Size((int)(r.cols * downsize_), (int)(r.rows * downsize_)), 0, 0, cv::INTER_LINEAR); - } + }*/ // Note: this seems to be too slow on CPU... /*cv::Ptr<cv::xphoto::WhiteBalancer> wb; @@ -279,15 +269,17 @@ bool LocalSource::get(cv::cuda::GpuMat &l_out, cv::cuda::GpuMat &r_out, cv::cuda wb->balanceWhite(l, l); wb->balanceWhite(r, r);*/ - if (flip_v_) { + /*if (flip_v_) { Mat tl, tr; cv::flip(left_, tl, 0); cv::flip(r, tr, 0); left_ = tl; r = tr; - } + }*/ + + c->rectifyStereo(l, r); - l_out.upload(left_, stream); + l_out.upload(l, stream); r_out.upload(r, stream); return true; diff --git a/components/rgbd-sources/src/local.hpp b/components/rgbd-sources/src/local.hpp index e3fcb91bd585d8090cfb50a8ee83bf49dd78f98d..4fd71c5eede52acc480f8915c3370046aaccfd76 100644 --- a/components/rgbd-sources/src/local.hpp +++ b/components/rgbd-sources/src/local.hpp @@ -15,19 +15,19 @@ namespace ftl { namespace rgbd { namespace detail { +class Calibrate; + class LocalSource : public Configurable { public: explicit LocalSource(nlohmann::json &config); LocalSource(nlohmann::json &config, const std::string &vid); - bool left(cv::Mat &m); - bool right(cv::Mat &m); - bool get(cv::cuda::GpuMat &l, cv::cuda::GpuMat &r, cv::cuda::Stream &stream); + //bool left(cv::Mat &m); + //bool right(cv::Mat &m); + bool get(cv::cuda::GpuMat &l, cv::cuda::GpuMat &r, Calibrate *c, cv::cuda::Stream &stream); unsigned int width() const { return width_; } unsigned int height() const { return height_; } - - cv::Mat &cachedLeft() { return left_; } //void setFramerate(float fps); //float getFramerate() const; @@ -38,18 +38,20 @@ class LocalSource : public Configurable { private: double timestamp_; - double tps_; + //double tps_; bool stereo_; //float fps_; - bool flip_; - bool flip_v_; + //bool flip_; + //bool flip_v_; bool nostereo_; - float downsize_; + //float downsize_; cv::VideoCapture *camera_a_; cv::VideoCapture *camera_b_; unsigned int width_; unsigned int height_; - cv::Mat left_; + + cv::cuda::HostMem left_hm_; + cv::cuda::HostMem right_hm_; }; } diff --git a/components/rgbd-sources/src/net.cpp b/components/rgbd-sources/src/net.cpp index 135395c57a1bc1f9983308719e88f7f49d5df8cf..12a76cc40d38f876f11cbc4919cf7bcb13a4bee5 100644 --- a/components/rgbd-sources/src/net.cpp +++ b/components/rgbd-sources/src/net.cpp @@ -161,7 +161,7 @@ void NetSource::_recvChunk(int64_t frame, int chunk, bool delta, const vector<un depth_ = d_depth_; d_depth_ = tmp; - timestamp_ = current_frame_*40; // FIXME: Don't hardcode 40ms + timestamp_ = current_frame_; current_frame_ = frame; } @@ -277,13 +277,6 @@ void NetSource::_updateURI() { N_ = 0; - // Initiate stream with request for first 10 frames - //try { - // host_->getNet()->send(peer_, "get_stream", *uri, N_, 0, host_->getNet()->id(), *uri); - //} catch(...) { - // LOG(ERROR) << "Could not connect to stream " << *uri; - //} - // Update chunk details chunks_dim_ = ftl::rgbd::kChunkDim; chunk_width_ = params_.width / chunks_dim_; diff --git a/components/rgbd-sources/src/stereovideo.cpp b/components/rgbd-sources/src/stereovideo.cpp index 0d20780c275a43a2338fd9d81a6a98f13619c3a2..d83fdb194a426659b33166aef64d30d367653a0e 100644 --- a/components/rgbd-sources/src/stereovideo.cpp +++ b/components/rgbd-sources/src/stereovideo.cpp @@ -154,7 +154,7 @@ static void disparityToDepth(const cv::cuda::GpuMat &disparity, cv::cuda::GpuMat } bool StereoVideoSource::capture() { - lsrc_->get(cap_left_, cap_right_, stream2_); + lsrc_->get(cap_left_, cap_right_, calib_, stream2_); stream2_.waitForCompletion(); return true; } @@ -178,7 +178,7 @@ bool StereoVideoSource::compute(int n, int b) { //lsrc_->get(left_, right_, stream_); if (depth_tmp_.empty()) depth_tmp_ = cv::cuda::GpuMat(left_.size(), CV_32FC1); if (disp_tmp_.empty()) disp_tmp_ = cv::cuda::GpuMat(left_.size(), CV_32FC1); - calib_->rectifyStereo(left_, right_, stream_); + //calib_->rectifyStereo(left_, right_, stream_); disp_->compute(left_, right_, disp_tmp_, stream_); ftl::cuda::disparity_to_depth(disp_tmp_, depth_tmp_, params_, stream_); left_.download(rgb_, stream_); @@ -188,13 +188,13 @@ bool StereoVideoSource::compute(int n, int b) { stream_.waitForCompletion(); // TODO:(Nick) Move to getFrames } else if (chan == ftl::rgbd::kChanRight) { //lsrc_->get(left_, right_, stream_); - calib_->rectifyStereo(left_, right_, stream_); + //calib_->rectifyStereo(left_, right_, stream_); left_.download(rgb_, stream_); right_.download(depth_, stream_); stream_.waitForCompletion(); // TODO:(Nick) Move to getFrames } else { //lsrc_->get(left_, right_, stream_); - calib_->rectifyStereo(left_, right_, stream_); + //calib_->rectifyStereo(left_, right_, stream_); //rgb_ = lsrc_->cachedLeft(); left_.download(rgb_, stream_); stream_.waitForCompletion(); // TODO:(Nick) Move to getFrames diff --git a/components/rgbd-sources/src/streamer.cpp b/components/rgbd-sources/src/streamer.cpp index 3fd69afe3fd6568f16ed9a9c9b489fde36bdc64a..ab03f2a6527163d3413e7a1d2ada8aed9803f07f 100644 --- a/components/rgbd-sources/src/streamer.cpp +++ b/components/rgbd-sources/src/streamer.cpp @@ -4,6 +4,7 @@ #include <thread> #include <chrono> #include <tuple> +#include <algorithm> #include "bitrate_settings.hpp" @@ -31,6 +32,9 @@ Streamer::Streamer(nlohmann::json &config, Universe *net) net_ = net; time_peer_ = ftl::UUID(0); clock_adjust_ = 0; + mspf_ = 1000 / value("fps", 25); + //last_dropped_ = 0; + //drop_count_ = 0; compress_level_ = value("compression", 1); @@ -199,42 +203,43 @@ void Streamer::remove(const std::string &) { } +void Streamer::_decideFrameRate(int64_t framesdropped, int64_t msremainder) { + actual_fps_ = 1000.0f / (float)((framesdropped+1)*mspf_+(msremainder)); + LOG(INFO) << "Actual FPS = " << actual_fps_; + + /*if (framesdropped > 0) { + // If N consecutive frames are dropped, work out new rate + if (last_dropped_/mspf_ >= last_frame_/mspf_ - 2*framesdropped) drop_count_++; + else drop_count_ = 0; + + last_dropped_ = last_frame_+mspf_; + + if (drop_count_ >= ftl::rgbd::detail::kFrameDropLimit) { + drop_count_ = 0; + + const int64_t actualmspf = std::min((int64_t)1000, framesdropped*mspf_+(mspf_ - msremainder)); + + LOG(WARNING) << "Suggest FPS @ " << (1000 / actualmspf); + //mspf_ = actualmspf; + + // Also notify all clients of change + } + } else { + // Perhaps we can boost framerate? + const int64_t actualmspf = std::min((int64_t)1000, framesdropped*mspf_+(mspf_ - msremainder)); + LOG(INFO) << "Boost framerate: " << (1000 / actualmspf); + //mspf_ = actualmspf; + }*/ +} + void Streamer::stop() { active_ = false; wait(); } void Streamer::poll() { - //double wait = 1.0f / 25.0f; // TODO:(Nick) Should be in config - //auto start = std::chrono::high_resolution_clock::now(); - //int64_t now = std::chrono::time_point_cast<std::chrono::milliseconds>(start).time_since_epoch().count()+clock_adjust_; - - //int64_t msdelay = 40 - (now % 40); - //while (msdelay >= 20) { - // sleep_for(milliseconds(10)); - // now = std::chrono::time_point_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()).time_since_epoch().count()+clock_adjust_; - // msdelay = 40 - (now % 40); - //} - //LOG(INFO) << "Required Delay: " << (now / 40) << " = " << msdelay; - // Create frame jobs at correct FPS interval _schedule(); - //std::function<void(int)> j = ftl::pool.pop(); - //if (j) j(-1); - - //std::chrono::duration<double> elapsed = - // std::chrono::high_resolution_clock::now() - start; - - //if (elapsed.count() >= wait) { - //LOG(WARNING) << "Frame rate below optimal @ " << (1.0f / elapsed.count()); - //} else { - //LOG(INFO) << "Frame rate @ " << (1.0f / elapsed.count()); - // Otherwise, wait until next frame should start. - // FIXME:(Nick) Is this accurate enough? Almost certainly not - // TODO:(Nick) Synchronise by time corrections and use of fixed time points - // but this only works if framerate can be achieved. - //sleep_for(milliseconds((long long)((wait - elapsed.count()) * 1000.0f))); - //} } void Streamer::run(bool block) { @@ -304,131 +309,138 @@ void Streamer::wait() { frame_no_ = last_frame_; } -void Streamer::_schedule() { - wait(); - //std::mutex job_mtx; - //std::condition_variable job_cv; - //int jobs = 0; +void Streamer::_schedule(StreamSource *src) { + // There will be two jobs for this source... + //UNIQUE_LOCK(job_mtx_,lk); + jobs_ += 2 + kChunkCount; + //lk.unlock(); - //auto now = std::chrono::time_point_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()).time_since_epoch().count()+clock_adjust_; - //LOG(INFO) << "Frame time = " << (now-(last_frame_*40)) << "ms"; + //StreamSource *src = sources_[uri]; + if (src == nullptr || src->jobs != 0) return; + src->jobs = 2 + kChunkCount; - // Prevent new clients during processing. - SHARED_LOCK(mutex_,slk); + // Grab / capture job + ftl::pool.push([this,src](int id) { + //auto start = std::chrono::high_resolution_clock::now(); - for (auto s : sources_) { - string uri = s.first; - - // No point in doing work if no clients - if (s.second->clientCount == 0) { - continue; - } - - // There will be two jobs for this source... - //UNIQUE_LOCK(job_mtx_,lk); - jobs_ += 2 + kChunkCount; - //lk.unlock(); + auto start = std::chrono::high_resolution_clock::now(); + int64_t now = std::chrono::time_point_cast<std::chrono::milliseconds>(start).time_since_epoch().count()+clock_adjust_; + int64_t target = now / mspf_; + int64_t msdelay = mspf_ - (now % mspf_); - StreamSource *src = sources_[uri]; - if (src == nullptr || src->jobs != 0) continue; - src->jobs = 2 + kChunkCount; + if (target != last_frame_ && msdelay != mspf_) LOG(WARNING) << "Frame " << "(" << (target-last_frame_) << ") dropped by " << (now%mspf_) << "ms"; - // Grab / capture job - ftl::pool.push([this,src](int id) { - //auto start = std::chrono::high_resolution_clock::now(); - - auto start = std::chrono::high_resolution_clock::now(); - int64_t now = std::chrono::time_point_cast<std::chrono::milliseconds>(start).time_since_epoch().count()+clock_adjust_; - int64_t target = now / 40; - - // TODO:(Nick) A now%40 == 0 should be accepted - if (target != last_frame_) LOG(WARNING) << "Frame " << "(" << (target-last_frame_) << ") dropped by " << (now%40) << "ms"; + // Use sleep_for for larger delays + + //LOG(INFO) << "Required Delay: " << (now / 40) << " = " << msdelay; + while (msdelay >= 20 && msdelay < mspf_) { + sleep_for(milliseconds(10)); + now = std::chrono::time_point_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()).time_since_epoch().count()+clock_adjust_; + msdelay = mspf_ - (now % mspf_); + } - // Use sleep_for for larger delays - int64_t msdelay = 40 - (now % 40); - //LOG(INFO) << "Required Delay: " << (now / 40) << " = " << msdelay; - while (msdelay >= 20) { - sleep_for(milliseconds(10)); - now = std::chrono::time_point_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()).time_since_epoch().count()+clock_adjust_; - msdelay = 40 - (now % 40); - } + // Spin loop until exact grab time + //LOG(INFO) << "Spin Delay: " << (now / 40) << " = " << (40 - (now%40)); - // Spin loop until exact grab time - //LOG(INFO) << "Spin Delay: " << (now / 40) << " = " << (40 - (now%40)); - target = now / 40; - while ((now/40) == target) { + if (msdelay != mspf_) { + target = now / mspf_; + while ((now/mspf_) == target) { _mm_pause(); // SSE2 nano pause intrinsic now = std::chrono::time_point_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()).time_since_epoch().count()+clock_adjust_; }; - last_frame_ = now/40; + } + last_frame_ = now/mspf_; - try { - src->src->capture(); - } catch (std::exception &ex) { - LOG(ERROR) << "Exception when grabbing frame"; - LOG(ERROR) << ex.what(); - } - catch (...) { - LOG(ERROR) << "Unknown exception when grabbing frame"; - } + try { + src->src->capture(); + } catch (std::exception &ex) { + LOG(ERROR) << "Exception when grabbing frame"; + LOG(ERROR) << ex.what(); + } + catch (...) { + LOG(ERROR) << "Unknown exception when grabbing frame"; + } - //now = std::chrono::time_point_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()).time_since_epoch().count()+clock_adjust_; - //if (now%40 > 0) LOG(INFO) << "Grab in: " << (now%40) << "ms"; + //now = std::chrono::time_point_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()).time_since_epoch().count()+clock_adjust_; + //if (now%40 > 0) LOG(INFO) << "Grab in: " << (now%40) << "ms"; - //std::chrono::duration<double> elapsed = - // std::chrono::high_resolution_clock::now() - start; - //LOG(INFO) << "Grab in " << elapsed.count() << "s"; + //std::chrono::duration<double> elapsed = + // std::chrono::high_resolution_clock::now() - start; + //LOG(INFO) << "Grab in " << elapsed.count() << "s"; - src->jobs--; - _swap(src); + src->jobs--; + _swap(src); - // Mark job as finished - std::unique_lock<std::mutex> lk(job_mtx_); - --jobs_; - job_cv_.notify_one(); - }); + // Mark job as finished + std::unique_lock<std::mutex> lk(job_mtx_); + --jobs_; + job_cv_.notify_one(); + }); + + // Compute job + ftl::pool.push([this,src](int id) { + try { + src->src->compute(); + } catch (std::exception &ex) { + LOG(ERROR) << "Exception when computing frame"; + LOG(ERROR) << ex.what(); + } + catch (...) { + LOG(ERROR) << "Unknown exception when computing frame"; + } + + src->jobs--; + _swap(src); + + // Mark job as finished + std::unique_lock<std::mutex> lk(job_mtx_); + --jobs_; + job_cv_.notify_one(); + }); - // Compute job - ftl::pool.push([this,src](int id) { + // Create jobs for each chunk + for (int i=0; i<kChunkCount; ++i) { + // Add chunk job to thread pool + ftl::pool.push([this,src,i](int id) { + int chunk = i; try { - src->src->compute(); - } catch (std::exception &ex) { - LOG(ERROR) << "Exception when computing frame"; - LOG(ERROR) << ex.what(); + if (!src->rgb.empty() && (src->src->getChannel() == ftl::rgbd::kChanNone || !src->depth.empty())) { + _encodeAndTransmit(src, chunk); } - catch (...) { - LOG(ERROR) << "Unknown exception when computing frame"; + } catch(...) { + LOG(ERROR) << "Encode Exception: " << chunk; } src->jobs--; _swap(src); - - // Mark job as finished std::unique_lock<std::mutex> lk(job_mtx_); --jobs_; job_cv_.notify_one(); }); + } +} - // Create jobs for each chunk - for (int i=0; i<kChunkCount; ++i) { - // Add chunk job to thread pool - ftl::pool.push([this,src,i](int id) { - int chunk = i; - try { - if (!src->rgb.empty() && (src->src->getChannel() == ftl::rgbd::kChanNone || !src->depth.empty())) { - _encodeAndTransmit(src, chunk); - } - } catch(...) { - LOG(ERROR) << "Encode Exception: " << chunk; - } +void Streamer::_schedule() { + wait(); + //std::mutex job_mtx; + //std::condition_variable job_cv; + //int jobs = 0; - src->jobs--; - _swap(src); - std::unique_lock<std::mutex> lk(job_mtx_); - --jobs_; - job_cv_.notify_one(); - }); + //auto now = std::chrono::time_point_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()).time_since_epoch().count()+clock_adjust_; + //LOG(INFO) << "Frame time = " << (now-(last_frame_*40)) << "ms"; + + // Prevent new clients during processing. + SHARED_LOCK(mutex_,slk); + + for (auto s : sources_) { + string uri = s.first; + + // No point in doing work if no clients + if (s.second->clientCount == 0) { + continue; } + + _schedule(s.second); } } @@ -490,8 +502,8 @@ void Streamer::_encodeAndTransmit(StreamSource *src, int chunk) { auto c = src->clients[b].begin(); while (c != src->clients[b].end()) { try { - // TODO:(Nick) Send pose and timestamp - if (!net_->send((*c).peerid, (*c).uri, frame_no_, chunk, delta, rgb_buf, d_buf)) { + // TODO:(Nick) Send pose + if (!net_->send((*c).peerid, (*c).uri, frame_no_*mspf_, chunk, delta, rgb_buf, d_buf)) { // Send failed so mark as client stream completed (*c).txcount = (*c).txmax; } else {