Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • bug/335/nettimeadjust
  • bug/feedrecord
  • bug/mlsdestruct2
  • bug/optflow-disparity
  • calibration
  • censuseval
  • cherry-pick-75b9f6b3
  • chromatest
  • enhancement/235
  • exp/327/colourcor
  • exp/candidatemask
  • exp/labcolours
  • exp/multires-sgm
  • exp/triangleperf-2
  • exp/triangleperf3
  • feature/134/usegroup-nb
  • feature/231/segment
  • feature/274/lossyfilter
  • feature/329/dualoptflow-doffs
  • feature/330/audiocompress
  • feature/375/fullres-fstream
  • feature/SKR
  • feature/aruco
  • feature/autocalibration
  • feature/ceres
  • feature/compiletime
  • feature/corr_smooth
  • feature/depth-touch
  • feature/disconflow
  • feature/fixedres
  • feature/gui2-nanogui-mitsuba
  • feature/gui2-pch
  • feature/multiplexer-pose
  • feature/poses
  • feature/python
  • feature/sdk-python
  • feature/sgm-experimental
  • feature/stereocalib
  • feature/use-new-frame
  • feature/use10bit
  • feature/vr
  • feature/warpcorr-costing
  • feature/web-service/camMover
  • feature/web-service/configurations
  • feature/web-service/vanillaClient
  • feature/websocket-pose
  • guirefactor
  • master
  • v0.0.1
  • v0.0.2
  • v0.0.3
  • v0.0.4
  • v0.0.5
  • v0.0.6
54 results

Target

Select target project
  • nicolaspope/ftl
1 result
Select Git revision
  • bug/335/nettimeadjust
  • bug/feedrecord
  • bug/mlsdestruct2
  • bug/optflow-disparity
  • calibration
  • censuseval
  • cherry-pick-75b9f6b3
  • chromatest
  • enhancement/235
  • exp/327/colourcor
  • exp/candidatemask
  • exp/labcolours
  • exp/multires-sgm
  • exp/triangleperf-2
  • exp/triangleperf3
  • feature/134/usegroup-nb
  • feature/231/segment
  • feature/274/lossyfilter
  • feature/329/dualoptflow-doffs
  • feature/330/audiocompress
  • feature/375/fullres-fstream
  • feature/SKR
  • feature/aruco
  • feature/autocalibration
  • feature/ceres
  • feature/compiletime
  • feature/corr_smooth
  • feature/depth-touch
  • feature/disconflow
  • feature/fixedres
  • feature/gui2-nanogui-mitsuba
  • feature/gui2-pch
  • feature/multiplexer-pose
  • feature/poses
  • feature/python
  • feature/sdk-python
  • feature/sgm-experimental
  • feature/stereocalib
  • feature/use-new-frame
  • feature/use10bit
  • feature/vr
  • feature/warpcorr-costing
  • feature/web-service/camMover
  • feature/web-service/configurations
  • feature/web-service/vanillaClient
  • feature/websocket-pose
  • guirefactor
  • master
  • v0.0.1
  • v0.0.2
  • v0.0.3
  • v0.0.4
  • v0.0.5
  • v0.0.6
54 results
Show changes
Commits on Source (11)
Showing
with 7147 additions and 60 deletions
......@@ -14,15 +14,14 @@ stages:
# paths:
# - build/
docker:
linux:
stage: all
tags:
- docker
image: ubuntu:18.04
before_script:
- export DEBIAN_FRONTEND=noninteractive
- apt-get update -qq && apt-get install -y -qq g++ cmake git
- apt-get install -y -qq libopencv-dev libgoogle-glog-dev liburiparser-dev libreadline-dev libmsgpack-dev uuid-dev libpcl-dev
- linux
# before_script:
# - export DEBIAN_FRONTEND=noninteractive
# - apt-get update -qq && apt-get install -y -qq g++ cmake git
# - apt-get install -y -qq libopencv-dev libgoogle-glog-dev liburiparser-dev libreadline-dev libmsgpack-dev uuid-dev libpcl-dev
script:
- mkdir build
- cd build
......
# Need to include staged files and libs
include_directories(${PROJECT_SOURCE_DIR}/applications/reconstruct/include)
#include_directories(${PROJECT_SOURCE_DIR}/reconstruct/include)
#include_directories(${PROJECT_BINARY_DIR})
set(REPSRC
src/main.cpp
src/scene_rep_hash_sdf.cu
src/ray_cast_sdf.cu
src/camera_util.cu
src/ray_cast_sdf.cpp
src/registration.cpp
)
......@@ -11,6 +15,15 @@ add_executable(ftl-reconstruct ${REPSRC})
add_dependencies(ftl-reconstruct ftlnet)
add_dependencies(ftl-reconstruct ftlcommon)
target_include_directories(ftl-reconstruct PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>
PRIVATE src)
if (CUDA_FOUND)
set_property(TARGET ftl-reconstruct PROPERTY CUDA_SEPARABLE_COMPILATION ON)
endif()
#target_include_directories(cv-node PUBLIC ${PROJECT_SOURCE_DIR}/include)
target_link_libraries(ftl-reconstruct ftlcommon ftlrgbd Threads::Threads ${OpenCV_LIBS} glog::glog ftlnet ftlrender)
......
This diff is collapsed.
/*
* Copyright 1993-2009 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation and
* any modifications thereto. Any use, reproduction, disclosure, or distribution
* of this software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*
*/
/*
This file implements common mathematical operations on vector types
(float3, float4 etc.) since these are not provided as standard by CUDA.
The syntax is modelled on the Cg standard library.
*/
#ifndef _FTL_CUDA_OPERATORS_HPP_
#define _FTL_CUDA_OPERATORS_HPP_
#include <cuda_runtime.h>
////////////////////////////////////////////////////////////////////////////////
typedef unsigned int uint;
typedef unsigned short ushort;
#ifndef __CUDACC__
#include <math.h>
inline float fminf(float a, float b)
{
return a < b ? a : b;
}
inline float fmaxf(float a, float b)
{
return a > b ? a : b;
}
inline int max(int a, int b)
{
return a > b ? a : b;
}
inline int min(int a, int b)
{
return a < b ? a : b;
}
inline float rsqrtf(float x)
{
return 1.0f / sqrtf(x);
}
#endif
// float functions
////////////////////////////////////////////////////////////////////////////////
// lerp
inline __device__ __host__ float lerp(float a, float b, float t)
{
return a + t*(b-a);
}
// clamp
inline __device__ __host__ float clamp(float f, float a, float b)
{
return fmaxf(a, fminf(f, b));
}
inline __device__ __host__ int sign(float x) {
int t = x<0 ? -1 : 0;
return x > 0 ? 1 : t;
}
// int2 functions
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ int2 make_int2(float2 f)
{
int2 t; t.x = static_cast<int>(f.x); t.y = static_cast<int>(f.y); return t;
}
inline __host__ __device__ uint2 make_uint2(int2 i)
{
uint2 t; t.x = static_cast<uint>(i.x); t.y = static_cast<uint>(i.y); return t;
}
// negate
inline __host__ __device__ int2 operator-(int2 &a)
{
return make_int2(-a.x, -a.y);
}
// addition
inline __host__ __device__ int2 operator+(int2 a, int2 b)
{
return make_int2(a.x + b.x, a.y + b.y);
}
inline __host__ __device__ void operator+=(int2 &a, int2 b)
{
a.x += b.x; a.y += b.y;
}
// subtract
inline __host__ __device__ int2 operator-(int2 a, int2 b)
{
return make_int2(a.x - b.x, a.y - b.y);
}
inline __host__ __device__ void operator-=(int2 &a, int2 b)
{
a.x -= b.x; a.y -= b.y;
}
// multiply
inline __host__ __device__ int2 operator*(int2 a, int2 b)
{
return make_int2(a.x * b.x, a.y * b.y);
}
inline __host__ __device__ int2 operator*(int2 a, int s)
{
return make_int2(a.x * s, a.y * s);
}
inline __host__ __device__ int2 operator*(int s, int2 a)
{
return make_int2(a.x * s, a.y * s);
}
inline __host__ __device__ void operator*=(int2 &a, int s)
{
a.x *= s; a.y *= s;
}
// float2 functions
////////////////////////////////////////////////////////////////////////////////
// negate
inline __host__ __device__ float2 operator-(float2 &a)
{
return make_float2(-a.x, -a.y);
}
// addition
inline __host__ __device__ float2 operator+(float2 a, float2 b)
{
return make_float2(a.x + b.x, a.y + b.y);
}
inline __host__ __device__ void operator+=(float2 &a, float2 b)
{
a.x += b.x; a.y += b.y;
}
// subtract
inline __host__ __device__ float2 operator-(float2 a, float2 b)
{
return make_float2(a.x - b.x, a.y - b.y);
}
inline __host__ __device__ void operator-=(float2 &a, float2 b)
{
a.x -= b.x; a.y -= b.y;
}
// multiply
inline __host__ __device__ float2 operator*(float2 a, float2 b)
{
return make_float2(a.x * b.x, a.y * b.y);
}
inline __host__ __device__ float2 operator*(float2 a, float s)
{
return make_float2(a.x * s, a.y * s);
}
inline __host__ __device__ float2 operator*(float s, float2 a)
{
return make_float2(a.x * s, a.y * s);
}
inline __host__ __device__ void operator*=(float2 &a, float s)
{
a.x *= s; a.y *= s;
}
// divide
inline __host__ __device__ float2 operator/(float2 a, float2 b)
{
return make_float2(a.x / b.x, a.y / b.y);
}
inline __host__ __device__ float2 operator/(float2 a, float s)
{
float inv = 1.0f / s;
return a * inv;
}
inline __host__ __device__ float2 operator/(float s, float2 a)
{
float inv = 1.0f / s;
return a * inv;
}
inline __host__ __device__ void operator/=(float2 &a, float s)
{
float inv = 1.0f / s;
a *= inv;
}
// lerp
inline __device__ __host__ float2 lerp(float2 a, float2 b, float t)
{
return a + t*(b-a);
}
// clamp
inline __device__ __host__ float2 clamp(float2 v, float a, float b)
{
return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));
}
inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)
{
return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
}
// dot product
inline __host__ __device__ float dot(float2 a, float2 b)
{
return a.x * b.x + a.y * b.y;
}
// length
inline __host__ __device__ float length(float2 v)
{
return sqrtf(dot(v, v));
}
// normalize
inline __host__ __device__ float2 normalize(float2 v)
{
float invLen = rsqrtf(dot(v, v));
return v * invLen;
}
// floor
inline __host__ __device__ float2 floor(const float2 v)
{
return make_float2(floor(v.x), floor(v.y));
}
// reflect
inline __host__ __device__ float2 reflect(float2 i, float2 n)
{
return i - 2.0f * n * dot(n,i);
}
// absolute value
inline __host__ __device__ float2 fabs(float2 v)
{
return make_float2(fabs(v.x), fabs(v.y));
}
inline __device__ __host__ int2 sign(float2 f) {
return make_int2(sign(f.x), sign(f.y));
}
// float3 functions
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float3 make_float3(int3 i)
{
float3 t; t.x = static_cast<float>(i.x); t.y = static_cast<float>(i.y); t.z = static_cast<float>(i.z); return t;
}
inline __host__ __device__ float3 make_float3(float4 f)
{
return make_float3(f.x,f.y,f.z);
}
inline __host__ __device__ float3 make_float3(uchar3 c)
{
return make_float3(static_cast<float>(c.x), static_cast<float>(c.y), static_cast<float>(c.z));
}
inline __host__ __device__ uchar3 make_uchar3(float3 f)
{
return make_uchar3(static_cast<unsigned char>(f.x), static_cast<unsigned char>(f.y), static_cast<unsigned char>(f.z));
}
inline __host__ __device__ float3 make_float3(float f)
{
return make_float3(f,f,f);
}
// negate
inline __host__ __device__ float3 operator-(const float3 &a)
{
return make_float3(-a.x, -a.y, -a.z);
}
// min
static __inline__ __host__ __device__ float3 fminf(float3 a, float3 b)
{
return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z));
}
// max
static __inline__ __host__ __device__ float3 fmaxf(float3 a, float3 b)
{
return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z));
}
// addition
inline __host__ __device__ float3 operator+(float3 a, float3 b)
{
return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
}
inline __host__ __device__ float3 operator+(float3 a, float b)
{
return make_float3(a.x + b, a.y + b, a.z + b);
}
inline __host__ __device__ void operator+=(float3 &a, float3 b)
{
a.x += b.x; a.y += b.y; a.z += b.z;
}
// subtract
inline __host__ __device__ float3 operator-(float3 a, float3 b)
{
return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
}
inline __host__ __device__ float3 operator-(float3 a, float b)
{
return make_float3(a.x - b, a.y - b, a.z - b);
}
inline __host__ __device__ void operator-=(float3 &a, float3 b)
{
a.x -= b.x; a.y -= b.y; a.z -= b.z;
}
// multiply
inline __host__ __device__ float3 operator*(float3 a, float3 b)
{
return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
}
inline __host__ __device__ float3 operator*(float3 a, float s)
{
return make_float3(a.x * s, a.y * s, a.z * s);
}
inline __host__ __device__ float3 operator*(float s, float3 a)
{
return make_float3(a.x * s, a.y * s, a.z * s);
}
inline __host__ __device__ void operator*=(float3 &a, float s)
{
a.x *= s; a.y *= s; a.z *= s;
}
// divide
inline __host__ __device__ float3 operator/(float3 a, float3 b)
{
return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
}
inline __host__ __device__ float3 operator/(float3 a, float s)
{
float inv = 1.0f / s;
return a * inv;
}
inline __host__ __device__ float3 operator/(float s, float3 a)
{
float inv = 1.0f / s;
return a * inv;
}
inline __host__ __device__ void operator/=(float3 &a, float s)
{
float inv = 1.0f / s;
a *= inv;
}
// lerp
inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
{
return a + t*(b-a);
}
// clamp
inline __device__ __host__ float3 clamp(float3 v, float a, float b)
{
return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
}
inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
{
return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
}
// dot product
inline __host__ __device__ float dot(float3 a, float3 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
// cross product
inline __host__ __device__ float3 cross(float3 a, float3 b)
{
return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
}
// length
inline __host__ __device__ float length(float3 v)
{
return sqrtf(dot(v, v));
}
// normalize
inline __host__ __device__ float3 normalize(float3 v)
{
float invLen = rsqrtf(dot(v, v));
return v * invLen;
}
// floor
inline __host__ __device__ float3 floor(const float3 v)
{
return make_float3(floor(v.x), floor(v.y), floor(v.z));
}
// reflect
inline __host__ __device__ float3 reflect(float3 i, float3 n)
{
return i - 2.0f * n * dot(n,i);
}
// absolute value
inline __host__ __device__ float3 fabs(float3 v)
{
return make_float3(fabs(v.x), fabs(v.y), fabs(v.z));
}
inline __device__ __host__ int3 sign(float3 f) {
return make_int3(sign(f.x), sign(f.y), sign(f.z));
}
// float4 functions
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float4 make_float4(float a)
{
return make_float4(a,a,a,a);
}
inline __host__ __device__ float4 make_float4(float3 f, float a)
{
return make_float4(f.x,f.y,f.z,a);
}
// negate
inline __host__ __device__ float4 operator-(float4 &a)
{
return make_float4(-a.x, -a.y, -a.z, -a.w);
}
// min
static __inline__ __host__ __device__ float4 fminf(float4 a, float4 b)
{
return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w));
}
// max
static __inline__ __host__ __device__ float4 fmaxf(float4 a, float4 b)
{
return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w));
}
// addition
inline __host__ __device__ float4 operator+(float4 a, float4 b)
{
return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
}
inline __host__ __device__ void operator+=(float4 &a, float4 b)
{
a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
}
// subtract
inline __host__ __device__ float4 operator-(float4 a, float4 b)
{
return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
}
inline __host__ __device__ void operator-=(float4 &a, float4 b)
{
a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
}
// multiply
inline __host__ __device__ float4 operator*(float4 a, float s)
{
return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
}
inline __host__ __device__ float4 operator*(float s, float4 a)
{
return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
}
inline __host__ __device__ void operator*=(float4 &a, float s)
{
a.x *= s; a.y *= s; a.z *= s; a.w *= s;
}
// divide
inline __host__ __device__ float4 operator/(float4 a, float4 b)
{
return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
}
inline __host__ __device__ float4 operator/(float4 a, float s)
{
float inv = 1.0f / s;
return a * inv;
}
inline __host__ __device__ float4 operator/(float s, float4 a)
{
float inv = 1.0f / s;
return a * inv;
}
inline __host__ __device__ void operator/=(float4 &a, float s)
{
float inv = 1.0f / s;
a *= inv;
}
// lerp
inline __device__ __host__ float4 lerp(float4 a, float4 b, float t)
{
return a + t*(b-a);
}
// clamp
inline __device__ __host__ float4 clamp(float4 v, float a, float b)
{
return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
}
inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)
{
return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
}
// dot product
inline __host__ __device__ float dot(float4 a, float4 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
}
// length
inline __host__ __device__ float length(float4 r)
{
return sqrtf(dot(r, r));
}
// normalize
inline __host__ __device__ float4 normalize(float4 v)
{
float invLen = rsqrtf(dot(v, v));
return v * invLen;
}
// floor
inline __host__ __device__ float4 floor(const float4 v)
{
return make_float4(floor(v.x), floor(v.y), floor(v.z), floor(v.w));
}
// absolute value
inline __host__ __device__ float4 fabs(float4 v)
{
return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w));
}
// int3 functions
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ int3 make_int3(float3 f)
{
int3 t; t.x = static_cast<int>(f.x); t.y = static_cast<int>(f.y); t.z = static_cast<int>(f.z); return t;
}
inline __host__ __device__ int3 make_int3(uint3 i)
{
int3 t; t.x = static_cast<int>(i.x); t.y = static_cast<int>(i.y); t.z = static_cast<int>(i.z); return t;
}
inline __host__ __device__ int3 make_int3(int i)
{
int3 t; t.x = i; t.y = i; t.z = i; return t;
}
// negate
inline __host__ __device__ int3 operator-(int3 &a)
{
return make_int3(-a.x, -a.y, -a.z);
}
// min
inline __host__ __device__ int3 min(int3 a, int3 b)
{
return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
}
// max
inline __host__ __device__ int3 max(int3 a, int3 b)
{
return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
}
// addition
inline __host__ __device__ int3 operator+(int3 a, int3 b)
{
return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
}
inline __host__ __device__ void operator+=(int3 &a, int3 b)
{
a.x += b.x; a.y += b.y; a.z += b.z;
}
// subtract
inline __host__ __device__ int3 operator-(int3 a, int3 b)
{
return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
}
inline __host__ __device__ void operator-=(int3 &a, int3 b)
{
a.x -= b.x; a.y -= b.y; a.z -= b.z;
}
// multiply
inline __host__ __device__ int3 operator*(int3 a, int3 b)
{
return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
}
inline __host__ __device__ int3 operator*(int3 a, int s)
{
return make_int3(a.x * s, a.y * s, a.z * s);
}
inline __host__ __device__ int3 operator*(int s, int3 a)
{
return make_int3(a.x * s, a.y * s, a.z * s);
}
inline __host__ __device__ void operator*=(int3 &a, int s)
{
a.x *= s; a.y *= s; a.z *= s;
}
// divide
inline __host__ __device__ int3 operator/(int3 a, int3 b)
{
return make_int3(a.x / b.x, a.y / b.y, a.z / b.z);
}
inline __host__ __device__ int3 operator/(int3 a, int s)
{
return make_int3(a.x / s, a.y / s, a.z / s);
}
inline __host__ __device__ int3 operator/(int s, int3 a)
{
return make_int3(a.x / s, a.y / s, a.z / s);
}
inline __host__ __device__ void operator/=(int3 &a, int s)
{
a.x /= s; a.y /= s; a.z /= s;
}
// clamp
inline __device__ __host__ int clamp(int f, int a, int b)
{
return max(a, min(f, b));
}
inline __device__ __host__ int3 clamp(int3 v, int a, int b)
{
return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
}
inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)
{
return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
}
// uint3 functions
////////////////////////////////////////////////////////////////////////////////
// min
inline __host__ __device__ uint3 min(uint3 a, uint3 b)
{
return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
}
// max
inline __host__ __device__ uint3 max(uint3 a, uint3 b)
{
return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
}
// addition
inline __host__ __device__ uint3 operator+(uint3 a, uint3 b)
{
return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
}
inline __host__ __device__ void operator+=(uint3 &a, uint3 b)
{
a.x += b.x; a.y += b.y; a.z += b.z;
}
// subtract
inline __host__ __device__ uint3 operator-(uint3 a, uint3 b)
{
return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
}
inline __host__ __device__ void operator-=(uint3 &a, uint3 b)
{
a.x -= b.x; a.y -= b.y; a.z -= b.z;
}
// multiply
inline __host__ __device__ uint3 operator*(uint3 a, uint3 b)
{
return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
}
inline __host__ __device__ uint3 operator*(uint3 a, uint s)
{
return make_uint3(a.x * s, a.y * s, a.z * s);
}
inline __host__ __device__ uint3 operator*(uint s, uint3 a)
{
return make_uint3(a.x * s, a.y * s, a.z * s);
}
inline __host__ __device__ void operator*=(uint3 &a, uint s)
{
a.x *= s; a.y *= s; a.z *= s;
}
// divide
inline __host__ __device__ uint3 operator/(uint3 a, uint3 b)
{
return make_uint3(a.x / b.x, a.y / b.y, a.z / b.z);
}
inline __host__ __device__ uint3 operator/(uint3 a, uint s)
{
return make_uint3(a.x / s, a.y / s, a.z / s);
}
inline __host__ __device__ uint3 operator/(uint s, uint3 a)
{
return make_uint3(a.x / s, a.y / s, a.z / s);
}
inline __host__ __device__ void operator/=(uint3 &a, uint s)
{
a.x /= s; a.y /= s; a.z /= s;
}
// clamp
inline __device__ __host__ uint clamp(uint f, uint a, uint b)
{
return max(a, min(f, b));
}
inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)
{
return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
}
inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)
{
return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
}
#endif // _FTL_CUDA_OPERATORS_HPP_
#pragma once
#ifndef _CUDA_UTIL_
#define _CUDA_UTIL_
#undef max
#undef min
#include <cuda_runtime.h>
#include <ftl/cuda_operators.hpp>
// Enable run time assertion checking in kernel code
#define cudaAssert(condition) if (!(condition)) { printf("ASSERT: %s %s\n", #condition, __FILE__); }
//#define cudaAssert(condition)
#if defined(__CUDA_ARCH__)
#define __CONDITIONAL_UNROLL__ #pragma unroll
#else
#define __CONDITIONAL_UNROLL__
#endif
#endif
// From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/DepthCameraUtil.h
#pragma once
//#include <cutil_inline.h>
//#include <cutil_math.h>
#include <vector_types.h>
#include <cuda_runtime.h>
#include <ftl/cuda_matrix_util.hpp>
#include <ftl/cuda_common.hpp>
#include <ftl/depth_camera_params.hpp>
extern "C" void updateConstantDepthCameraParams(const DepthCameraParams& params);
extern __constant__ DepthCameraParams c_depthCameraParams;
struct DepthCameraData {
///////////////
// Host part //
///////////////
__device__ __host__
DepthCameraData() {
/*d_depthData = NULL;
d_colorData = NULL;
d_depthArray = NULL;
d_colorArray = NULL;*/
depth_mat_ = nullptr;
colour_mat_ = nullptr;
depth_tex_ = nullptr;
colour_tex_ = nullptr;
}
__host__
void alloc(const DepthCameraParams& params) { //! todo resizing???
/*cudaSafeCall(cudaMalloc(&d_depthData, sizeof(float) * params.m_imageWidth * params.m_imageHeight));
cudaSafeCall(cudaMalloc(&d_colorData, sizeof(float4) * params.m_imageWidth * params.m_imageHeight));
h_depthChannelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
cudaSafeCall(cudaMallocArray(&d_depthArray, &h_depthChannelDesc, params.m_imageWidth, params.m_imageHeight));
h_colorChannelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);
cudaSafeCall(cudaMallocArray(&d_colorArray, &h_colorChannelDesc, params.m_imageWidth, params.m_imageHeight));*/
std::cout << "Create texture objects: " << params.m_imageWidth << "," << params.m_imageHeight << std::endl;
depth_mat_ = new cv::cuda::GpuMat(params.m_imageHeight, params.m_imageWidth, CV_32FC1);
colour_mat_ = new cv::cuda::GpuMat(params.m_imageHeight, params.m_imageWidth, CV_8UC4);
depth_tex_ = new ftl::cuda::TextureObject<float>((cv::cuda::PtrStepSz<float>)*depth_mat_);
colour_tex_ = new ftl::cuda::TextureObject<uchar4>((cv::cuda::PtrStepSz<uchar4>)*colour_mat_);
depth_obj_ = depth_tex_->cudaTexture();
colour_obj_ = colour_tex_->cudaTexture();
}
__host__
void updateParams(const DepthCameraParams& params) {
updateConstantDepthCameraParams(params);
}
__host__
void updateData(const cv::Mat &depth, const cv::Mat &rgb) {
depth_mat_->upload(depth);
colour_mat_->upload(rgb);
}
__host__
void free() {
/*if (d_depthData) cudaSafeCall(cudaFree(d_depthData));
if (d_colorData) cudaSafeCall(cudaFree(d_colorData));
if (d_depthArray) cudaSafeCall(cudaFreeArray(d_depthArray));
if (d_colorArray) cudaSafeCall(cudaFreeArray(d_colorArray));*/
/*d_depthData = NULL;
d_colorData = NULL;
d_depthArray = NULL;
d_colorArray = NULL;*/
if (depth_mat_) delete depth_mat_;
if (colour_mat_) delete colour_mat_;
delete depth_tex_;
delete colour_tex_;
}
/////////////////
// Device part //
/////////////////
static inline const DepthCameraParams& params() {
return c_depthCameraParams;
}
///////////////////////////////////////////////////////////////
// Camera to Screen
///////////////////////////////////////////////////////////////
__device__
static inline float2 cameraToKinectScreenFloat(const float3& pos) {
//return make_float2(pos.x*c_depthCameraParams.fx/pos.z + c_depthCameraParams.mx, c_depthCameraParams.my - pos.y*c_depthCameraParams.fy/pos.z);
return make_float2(
pos.x*c_depthCameraParams.fx/pos.z + c_depthCameraParams.mx,
pos.y*c_depthCameraParams.fy/pos.z + c_depthCameraParams.my);
}
__device__
static inline int2 cameraToKinectScreenInt(const float3& pos) {
float2 pImage = cameraToKinectScreenFloat(pos);
return make_int2(pImage + make_float2(0.5f, 0.5f));
}
__device__
static inline uint2 cameraToKinectScreen(const float3& pos) {
int2 p = cameraToKinectScreenInt(pos);
return make_uint2(p.x, p.y);
}
__device__
static inline float cameraToKinectProjZ(float z) {
return (z - c_depthCameraParams.m_sensorDepthWorldMin)/(c_depthCameraParams.m_sensorDepthWorldMax - c_depthCameraParams.m_sensorDepthWorldMin);
}
__device__
static inline float3 cameraToKinectProj(const float3& pos) {
float2 proj = cameraToKinectScreenFloat(pos);
float3 pImage = make_float3(proj.x, proj.y, pos.z);
pImage.x = (2.0f*pImage.x - (c_depthCameraParams.m_imageWidth- 1.0f))/(c_depthCameraParams.m_imageWidth- 1.0f);
//pImage.y = (2.0f*pImage.y - (c_depthCameraParams.m_imageHeight-1.0f))/(c_depthCameraParams.m_imageHeight-1.0f);
pImage.y = ((c_depthCameraParams.m_imageHeight-1.0f) - 2.0f*pImage.y)/(c_depthCameraParams.m_imageHeight-1.0f);
pImage.z = cameraToKinectProjZ(pImage.z);
return pImage;
}
///////////////////////////////////////////////////////////////
// Screen to Camera (depth in meters)
///////////////////////////////////////////////////////////////
__device__
static inline float3 kinectDepthToSkeleton(uint ux, uint uy, float depth) {
const float x = ((float)ux-c_depthCameraParams.mx) / c_depthCameraParams.fx;
const float y = ((float)uy-c_depthCameraParams.my) / c_depthCameraParams.fy;
//const float y = (c_depthCameraParams.my-(float)uy) / c_depthCameraParams.fy;
return make_float3(depth*x, depth*y, depth);
}
///////////////////////////////////////////////////////////////
// RenderScreen to Camera -- ATTENTION ASSUMES [1,0]-Z range!!!!
///////////////////////////////////////////////////////////////
__device__
static inline float kinectProjToCameraZ(float z) {
return z * (c_depthCameraParams.m_sensorDepthWorldMax - c_depthCameraParams.m_sensorDepthWorldMin) + c_depthCameraParams.m_sensorDepthWorldMin;
}
// z has to be in [0, 1]
__device__
static inline float3 kinectProjToCamera(uint ux, uint uy, float z) {
float fSkeletonZ = kinectProjToCameraZ(z);
return kinectDepthToSkeleton(ux, uy, fSkeletonZ);
}
__device__
static inline bool isInCameraFrustumApprox(const float4x4& viewMatrixInverse, const float3& pos) {
float3 pCamera = viewMatrixInverse * pos;
float3 pProj = cameraToKinectProj(pCamera);
//pProj *= 1.5f; //TODO THIS IS A HACK FIX IT :)
pProj *= 0.95;
return !(pProj.x < -1.0f || pProj.x > 1.0f || pProj.y < -1.0f || pProj.y > 1.0f || pProj.z < 0.0f || pProj.z > 1.0f);
}
//float* d_depthData; //depth data of the current frame (in screen space):: TODO data allocation lives in RGBD Sensor
//float4* d_colorData;
//uchar4* d_colorData; //color data of the current frame (in screen space):: TODO data allocation lives in RGBD Sensor
cv::cuda::GpuMat *depth_mat_;
cv::cuda::GpuMat *colour_mat_;
ftl::cuda::TextureObject<float> *depth_tex_;
ftl::cuda::TextureObject<uchar4> *colour_tex_;
cudaTextureObject_t depth_obj_;
cudaTextureObject_t colour_obj_;
// cuda arrays for texture access
/*cudaArray* d_depthArray;
cudaArray* d_colorArray;
cudaChannelFormatDesc h_depthChannelDesc;
cudaChannelFormatDesc h_colorChannelDesc;*/
};
// From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/CUDADepthCameraParams.h
//#include <cutil_inline.h>
//#include <cutil_math.h>
#include <vector_types.h>
#include <cuda_runtime.h>
#include <ftl/cuda_matrix_util.hpp>
#include <ftl/camera_params.hpp>
struct __align__(16) DepthCameraParams {
float fx;
float fy;
float mx;
float my;
unsigned short m_imageWidth;
unsigned short m_imageHeight;
unsigned int flags;
float m_sensorDepthWorldMin;
float m_sensorDepthWorldMax;
};
// From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/MatrixConversion.h
#pragma once
#include <eigen3/Eigen/Eigen>
// #include "d3dx9math.h"
#include <ftl/cuda_matrix_util.hpp>
namespace MatrixConversion
{
//static D3DXMATRIX EigToMat(const Eigen::Matrix4f& mat)
//{
// D3DXMATRIX m(mat.data());
// D3DXMATRIX res; D3DXMatrixTranspose(&res, &m);
// return res;
//}
//static Eigen::Matrix4f MatToEig(const D3DXMATRIX& mat)
//{
// return Eigen::Matrix4f((float*)mat.m).transpose();
//}
/*static mat4f EigToMat(const Eigen::Matrix4f& mat)
{
return mat4f(mat.data()).getTranspose();
}
static Eigen::Matrix4f MatToEig(const mat4f& mat)
{
return Eigen::Matrix4f(mat.ptr()).transpose();
}*/
static Eigen::Vector4f VecH(const Eigen::Vector3f& v)
{
return Eigen::Vector4f(v[0], v[1], v[2], 1.0);
}
static Eigen::Vector3f VecDH(const Eigen::Vector4f& v)
{
return Eigen::Vector3f(v[0]/v[3], v[1]/v[3], v[2]/v[3]);
}
/*static Eigen::Vector3f VecToEig(const vec3f& v)
{
return Eigen::Vector3f(v[0], v[1], v[2]);
}
static vec3f EigToVec(const Eigen::Vector3f& v)
{
return vec3f(v[0], v[1], v[2]);
}
static vec3f EigToVec(const D3DXMATRIX v)
{
return vec3f(v[0], v[1], v[2]);
}*/
// dx/cuda conversion
/*static vec3f toMlib(const D3DXVECTOR3& v) {
return vec3f(v.x, v.y, v.z);
}
static vec4f toMlib(const D3DXVECTOR4& v) {
return vec4f(v.x, v.y, v.z, v.w);
}
static mat4f toMlib(const D3DXMATRIX& m) {
mat4f c((const float*)&m);
return c.getTranspose();
}
static D3DXVECTOR3 toDX(const vec3f& v) {
return D3DXVECTOR3(v.x, v.y, v.z);
}
static D3DXVECTOR4 toDX(const vec4f& v) {
return D3DXVECTOR4(v.x, v.y, v.z, v.w);
}
static D3DXMATRIX toDX(const mat4f& m) {
D3DXMATRIX c((const float*)m.ptr());
D3DXMatrixTranspose(&c, &c);
return c;
}*/
/*static mat4f toMlib(const float4x4& m) {
return mat4f(m.ptr());
}
static vec4f toMlib(const float4& v) {
return vec4f(v.x, v.y, v.z, v.w);
}
static vec3f toMlib(const float3& v) {
return vec3f(v.x, v.y, v.z);
}
static vec4i toMlib(const int4& v) {
return vec4i(v.x, v.y, v.z, v.w);
}
static vec3i toMlib(const int3& v) {
return vec3i(v.x, v.y, v.z);
}
static float4x4 toCUDA(const mat4f& m) {
return float4x4(m.ptr());
}*/
static float4x4 toCUDA(const Eigen::Matrix4f& mat) {
return float4x4(mat.data()).getTranspose();
}
static Eigen::Matrix4f toEigen(const float4x4& m) {
return Eigen::Matrix4f(m.ptr()).transpose();
}
/*static float4 toCUDA(const vec4f& v) {
return make_float4(v.x, v.y, v.z, v.w);
}
static float3 toCUDA(const vec3f& v) {
return make_float3(v.x, v.y, v.z);
}
static int4 toCUDA(const vec4i& v) {
return make_int4(v.x, v.y, v.z, v.w);
}
static int3 toCUDA(const vec3i& v) {
return make_int3(v.x, v.y, v.z);
}*/
//doesnt really work
//template<class FloatType>
//point3d<FloatType> eulerAngles(const Matrix3x3<FloatType>& r) {
// point3d<FloatType> res;
// //check for gimbal lock
// if (r(0,2) == (FloatType)-1.0) {
// FloatType x = 0; //gimbal lock, value of x doesn't matter
// FloatType y = (FloatType)M_PI / 2;
// FloatType z = x + atan2(r(1,0), r(2,0));
// res = point3d<FloatType>(x, y, z);
// } else if (r(0,2) == (FloatType)1.0) {
// FloatType x = 0;
// FloatType y = -(FloatType)M_PI / 2;
// FloatType z = -x + atan2(-r(1,0), -r(2,0));
// res = point3d<FloatType>(x, y, z);
// } else { //two solutions exist
// FloatType x1 = -asin(r(0,2));
// FloatType x2 = (FloatType)M_PI - x1;
// FloatType y1 = atan2(r(1,2) / cos(x1), r(2,2) / cos(x1));
// FloatType y2 = atan2(r(1,2) / cos(x2), r(2,2) / cos(x2));
// FloatType z2 = atan2(r(0,1) / cos(x2), r(0,0) / cos(x2));
// FloatType z1 = atan2(r(0,1) / cos(x1), r(0,0) / cos(x1));
// //choose one solution to return
// //for example the "shortest" rotation
// if ((std::abs(x1) + std::abs(y1) + std::abs(z1)) <= (std::abs(x2) + std::abs(y2) + std::abs(z2))) {
// res = point3d<FloatType>(x1, y1, z1);
// } else {
// res = point3d<FloatType>(x2, y2, z2);
// }
// }
// res.x = math::radiansToDegrees(-res.x);
// res.y = math::radiansToDegrees(-res.y);
// res.z = math::radiansToDegrees(-res.z);
// return res;
//}
}
#include <ftl/cuda_util.hpp>
#include <ftl/cuda_matrix_util.hpp>
struct __align__(16) RayCastParams {
float4x4 m_viewMatrix;
float4x4 m_viewMatrixInverse;
float4x4 m_intrinsics;
float4x4 m_intrinsicsInverse;
unsigned int m_width;
unsigned int m_height;
unsigned int m_numOccupiedSDFBlocks;
unsigned int m_maxNumVertices;
int m_splatMinimum;
float m_minDepth;
float m_maxDepth;
float m_rayIncrement;
float m_thresSampleDist;
float m_thresDist;
bool m_useGradients;
uint dummy0;
};
#pragma once
#include <ftl/configurable.hpp>
#include <ftl/matrix_conversion.hpp>
#include <ftl/cuda_matrix_util.hpp>
#include <ftl/depth_camera.hpp>
#include <ftl/ray_cast_util.hpp>
#include <nlohmann/json.hpp>
// #include "DX11RayIntervalSplatting.h"
class CUDARayCastSDF : public ftl::Configurable
{
public:
CUDARayCastSDF(nlohmann::json& config) : ftl::Configurable(config) {
create(parametersFromConfig(config));
hash_render_ = config.value("hash_renderer", false);
}
~CUDARayCastSDF(void) {
destroy();
}
static RayCastParams parametersFromConfig(const nlohmann::json& gas) {
RayCastParams params;
params.m_width = gas["adapterWidth"].get<unsigned int>();
params.m_height = gas["adapterHeight"].get<unsigned int>();
params.m_intrinsics = MatrixConversion::toCUDA(Eigen::Matrix4f());
params.m_intrinsicsInverse = MatrixConversion::toCUDA(Eigen::Matrix4f());
params.m_minDepth = gas["sensorDepthMin"].get<float>();
params.m_maxDepth = gas["sensorDepthMax"].get<float>();
params.m_rayIncrement = gas["SDFRayIncrementFactor"].get<float>() * gas["SDFTruncation"].get<float>();
params.m_thresSampleDist = gas["SDFRayThresSampleDistFactor"].get<float>() * params.m_rayIncrement;
params.m_thresDist = gas["SDFRayThresDistFactor"].get<float>() * params.m_rayIncrement;
params.m_useGradients = gas["SDFUseGradients"].get<bool>();
params.m_maxNumVertices = gas["hashNumSDFBlocks"].get<unsigned int>() * 6;
return params;
}
void render(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& cameraData, const Eigen::Matrix4f& lastRigidTransform);
const RayCastData& getRayCastData(void) {
return m_data;
}
const RayCastParams& getRayCastParams() const {
return m_params;
}
// debugging
void convertToCameraSpace(const DepthCameraData& cameraData);
private:
void create(const RayCastParams& params);
void destroy(void);
void rayIntervalSplatting(const ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& cameraData, const Eigen::Matrix4f& lastRigidTransform); // rasterize
RayCastParams m_params;
RayCastData m_data;
bool hash_render_;
// DX11RayIntervalSplatting m_rayIntervalSplatting;
};
#pragma once
#include <ftl/cuda_matrix_util.hpp>
#include <ftl/depth_camera.hpp>
#include <ftl/voxel_hash.hpp>
#include <ftl/ray_cast_params.hpp>
struct RayCastSample
{
float sdf;
float alpha;
uint weight;
//uint3 color;
};
#ifndef MINF
#define MINF asfloat(0xff800000)
#endif
extern __constant__ RayCastParams c_rayCastParams;
extern "C" void updateConstantRayCastParams(const RayCastParams& params);
struct RayCastData {
///////////////
// Host part //
///////////////
__device__ __host__
RayCastData() {
d_depth = NULL;
d_depth3 = NULL;
d_normals = NULL;
d_colors = NULL;
//d_vertexBuffer = NULL;
point_cloud_ = nullptr;
d_rayIntervalSplatMinArray = NULL;
d_rayIntervalSplatMaxArray = NULL;
}
#ifndef __CUDACC__
__host__
void allocate(const RayCastParams& params) {
//point_cloud_ = new cv::cuda::GpuMat(params.m_width*params.m_height, 1, CV_32FC3);
cudaSafeCall(cudaMalloc(&d_depth, sizeof(float) * params.m_width * params.m_height));
cudaSafeCall(cudaMalloc(&d_depth3, sizeof(float3) * params.m_width * params.m_height));
cudaSafeCall(cudaMalloc(&d_normals, sizeof(float4) * params.m_width * params.m_height));
cudaSafeCall(cudaMalloc(&d_colors, sizeof(uchar3) * params.m_width * params.m_height));
//cudaSafeCall(cudaMalloc(&point_cloud_, sizeof(float3) * params.m_width * params.m_height));
//printf("Allocate ray cast data: %lld \n", (unsigned long long)point_cloud_);
}
__host__
void updateParams(const RayCastParams& params) {
updateConstantRayCastParams(params);
}
__host__ void download(float3 *points, uchar3 *colours, const RayCastParams& params) const {
//printf("Download: %d,%d\n", params.m_width, params.m_height);
//cudaSafeCall(cudaMemcpy(points, d_depth3, sizeof(float3) * params.m_width * params.m_height, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(colours, d_colors, sizeof(uchar3) * params.m_width * params.m_height, cudaMemcpyDeviceToHost));
}
__host__
void free() {
//cudaFree(point_cloud_);
cudaFree(d_depth);
cudaFree(d_depth3);
cudaFree(d_normals);
cudaFree(d_colors);
}
#endif
/////////////////
// Device part //
/////////////////
#ifdef __CUDACC__
__device__
const RayCastParams& params() const {
return c_rayCastParams;
}
__device__
float frac(float val) const {
return (val - floorf(val));
}
__device__
float3 frac(const float3& val) const {
return make_float3(frac(val.x), frac(val.y), frac(val.z));
}
__device__
bool trilinearInterpolationSimpleFastFast(const ftl::voxhash::HashData& hash, const float3& pos, float& dist, uchar3& color) const {
const float oSet = c_hashParams.m_virtualVoxelSize;
const float3 posDual = pos-make_float3(oSet/2.0f, oSet/2.0f, oSet/2.0f);
float3 weight = frac(hash.worldToVirtualVoxelPosFloat(pos));
dist = 0.0f;
float3 colorFloat = make_float3(0.0f, 0.0f, 0.0f);
ftl::voxhash::Voxel v = hash.getVoxel(posDual+make_float3(0.0f, 0.0f, 0.0f)); if(v.weight == 0) return false; float3 vColor = make_float3(v.color.x, v.color.y, v.color.z); dist+= (1.0f-weight.x)*(1.0f-weight.y)*(1.0f-weight.z)*v.sdf; colorFloat+= (1.0f-weight.x)*(1.0f-weight.y)*(1.0f-weight.z)*vColor;
v = hash.getVoxel(posDual+make_float3(oSet, 0.0f, 0.0f)); if(v.weight == 0) return false; vColor = make_float3(v.color.x, v.color.y, v.color.z); dist+= weight.x *(1.0f-weight.y)*(1.0f-weight.z)*v.sdf; colorFloat+= weight.x *(1.0f-weight.y)*(1.0f-weight.z)*vColor;
v = hash.getVoxel(posDual+make_float3(0.0f, oSet, 0.0f)); if(v.weight == 0) return false; vColor = make_float3(v.color.x, v.color.y, v.color.z); dist+= (1.0f-weight.x)* weight.y *(1.0f-weight.z)*v.sdf; colorFloat+= (1.0f-weight.x)* weight.y *(1.0f-weight.z)*vColor;
v = hash.getVoxel(posDual+make_float3(0.0f, 0.0f, oSet)); if(v.weight == 0) return false; vColor = make_float3(v.color.x, v.color.y, v.color.z); dist+= (1.0f-weight.x)*(1.0f-weight.y)* weight.z *v.sdf; colorFloat+= (1.0f-weight.x)*(1.0f-weight.y)* weight.z *vColor;
v = hash.getVoxel(posDual+make_float3(oSet, oSet, 0.0f)); if(v.weight == 0) return false; vColor = make_float3(v.color.x, v.color.y, v.color.z); dist+= weight.x * weight.y *(1.0f-weight.z)*v.sdf; colorFloat+= weight.x * weight.y *(1.0f-weight.z)*vColor;
v = hash.getVoxel(posDual+make_float3(0.0f, oSet, oSet)); if(v.weight == 0) return false; vColor = make_float3(v.color.x, v.color.y, v.color.z); dist+= (1.0f-weight.x)* weight.y * weight.z *v.sdf; colorFloat+= (1.0f-weight.x)* weight.y * weight.z *vColor;
v = hash.getVoxel(posDual+make_float3(oSet, 0.0f, oSet)); if(v.weight == 0) return false; vColor = make_float3(v.color.x, v.color.y, v.color.z); dist+= weight.x *(1.0f-weight.y)* weight.z *v.sdf; colorFloat+= weight.x *(1.0f-weight.y)* weight.z *vColor;
v = hash.getVoxel(posDual+make_float3(oSet, oSet, oSet)); if(v.weight == 0) return false; vColor = make_float3(v.color.x, v.color.y, v.color.z); dist+= weight.x * weight.y * weight.z *v.sdf; colorFloat+= weight.x * weight.y * weight.z *vColor;
color = make_uchar3(colorFloat.x, colorFloat.y, colorFloat.z);//v.color;
return true;
}
//__device__
//bool trilinearInterpolationSimpleFastFast(const HashData& hash, const float3& pos, float& dist, uchar3& color) const {
// const float oSet = c_hashParams.m_virtualVoxelSize;
// const float3 posDual = pos-make_float3(oSet/2.0f, oSet/2.0f, oSet/2.0f);
// float3 weight = frac(hash.worldToVirtualVoxelPosFloat(pos));
// dist = 0.0f;
// Voxel v = hash.getVoxel(posDual+make_float3(0.0f, 0.0f, 0.0f)); if(v.weight == 0) return false; dist+= (1.0f-weight.x)*(1.0f-weight.y)*(1.0f-weight.z)*v.sdf;
// v = hash.getVoxel(posDual+make_float3(oSet, 0.0f, 0.0f)); if(v.weight == 0) return false; dist+= weight.x *(1.0f-weight.y)*(1.0f-weight.z)*v.sdf;
// v = hash.getVoxel(posDual+make_float3(0.0f, oSet, 0.0f)); if(v.weight == 0) return false; dist+= (1.0f-weight.x)* weight.y *(1.0f-weight.z)*v.sdf;
// v = hash.getVoxel(posDual+make_float3(0.0f, 0.0f, oSet)); if(v.weight == 0) return false; dist+= (1.0f-weight.x)*(1.0f-weight.y)* weight.z *v.sdf;
// v = hash.getVoxel(posDual+make_float3(oSet, oSet, 0.0f)); if(v.weight == 0) return false; dist+= weight.x * weight.y *(1.0f-weight.z)*v.sdf;
// v = hash.getVoxel(posDual+make_float3(0.0f, oSet, oSet)); if(v.weight == 0) return false; dist+= (1.0f-weight.x)* weight.y * weight.z *v.sdf;
// v = hash.getVoxel(posDual+make_float3(oSet, 0.0f, oSet)); if(v.weight == 0) return false; dist+= weight.x *(1.0f-weight.y)* weight.z *v.sdf;
// v = hash.getVoxel(posDual+make_float3(oSet, oSet, oSet)); if(v.weight == 0) return false; dist+= weight.x * weight.y * weight.z *v.sdf;
// color = v.color;
// return true;
//}
__device__
float findIntersectionLinear(float tNear, float tFar, float dNear, float dFar) const
{
return tNear+(dNear/(dNear-dFar))*(tFar-tNear);
}
static const unsigned int nIterationsBisection = 3;
// d0 near, d1 far
__device__
bool findIntersectionBisection(const ftl::voxhash::HashData& hash, const float3& worldCamPos, const float3& worldDir, float d0, float r0, float d1, float r1, float& alpha, uchar3& color) const
{
float a = r0; float aDist = d0;
float b = r1; float bDist = d1;
float c = 0.0f;
#pragma unroll 1
for(uint i = 0; i<nIterationsBisection; i++)
{
c = findIntersectionLinear(a, b, aDist, bDist);
float cDist;
if(!trilinearInterpolationSimpleFastFast(hash, worldCamPos+c*worldDir, cDist, color)) return false;
if(aDist*cDist > 0.0) { a = c; aDist = cDist; }
else { b = c; bDist = cDist; }
}
alpha = c;
return true;
}
__device__
float3 gradientForPoint(const ftl::voxhash::HashData& hash, const float3& pos) const
{
const float voxelSize = c_hashParams.m_virtualVoxelSize;
float3 offset = make_float3(voxelSize, voxelSize, voxelSize);
float distp00; uchar3 colorp00; trilinearInterpolationSimpleFastFast(hash, pos-make_float3(0.5f*offset.x, 0.0f, 0.0f), distp00, colorp00);
float dist0p0; uchar3 color0p0; trilinearInterpolationSimpleFastFast(hash, pos-make_float3(0.0f, 0.5f*offset.y, 0.0f), dist0p0, color0p0);
float dist00p; uchar3 color00p; trilinearInterpolationSimpleFastFast(hash, pos-make_float3(0.0f, 0.0f, 0.5f*offset.z), dist00p, color00p);
float dist100; uchar3 color100; trilinearInterpolationSimpleFastFast(hash, pos+make_float3(0.5f*offset.x, 0.0f, 0.0f), dist100, color100);
float dist010; uchar3 color010; trilinearInterpolationSimpleFastFast(hash, pos+make_float3(0.0f, 0.5f*offset.y, 0.0f), dist010, color010);
float dist001; uchar3 color001; trilinearInterpolationSimpleFastFast(hash, pos+make_float3(0.0f, 0.0f, 0.5f*offset.z), dist001, color001);
float3 grad = make_float3((distp00-dist100)/offset.x, (dist0p0-dist010)/offset.y, (dist00p-dist001)/offset.z);
float l = length(grad);
if(l == 0.0f) {
return make_float3(0.0f, 0.0f, 0.0f);
}
return -grad/l;
}
__device__
void traverseCoarseGridSimpleSampleAll(const ftl::voxhash::HashData& hash, const DepthCameraData& cameraData, const float3& worldCamPos, const float3& worldDir, const float3& camDir, const int3& dTid, float minInterval, float maxInterval) const
{
const RayCastParams& rayCastParams = c_rayCastParams;
// Last Sample
RayCastSample lastSample; lastSample.sdf = 0.0f; lastSample.alpha = 0.0f; lastSample.weight = 0; // lastSample.color = int3(0, 0, 0);
const float depthToRayLength = 1.0f/camDir.z; // scale factor to convert from depth to ray length
float rayCurrent = depthToRayLength * max(rayCastParams.m_minDepth, minInterval); // Convert depth to raylength
float rayEnd = depthToRayLength * min(rayCastParams.m_maxDepth, maxInterval); // Convert depth to raylength
//float rayCurrent = depthToRayLength * rayCastParams.m_minDepth; // Convert depth to raylength
//float rayEnd = depthToRayLength * rayCastParams.m_maxDepth; // Convert depth to raylength
//printf("MINMAX: %f,%f\n", minInterval, maxInterval);
#pragma unroll 1
while(rayCurrent < rayEnd)
{
float3 currentPosWorld = worldCamPos+rayCurrent*worldDir;
float dist; uchar3 color;
//printf("RAY LOOP: %f,%f,%f\n", currentPosWorld.x, currentPosWorld.y, currentPosWorld.z);
if(trilinearInterpolationSimpleFastFast(hash, currentPosWorld, dist, color))
{
if(lastSample.weight > 0 && lastSample.sdf > 0.0f && dist < 0.0f) // current sample is always valid here
{
float alpha; // = findIntersectionLinear(lastSample.alpha, rayCurrent, lastSample.sdf, dist);
uchar3 color2;
bool b = findIntersectionBisection(hash, worldCamPos, worldDir, lastSample.sdf, lastSample.alpha, dist, rayCurrent, alpha, color2);
float3 currentIso = worldCamPos+alpha*worldDir;
if(b && abs(lastSample.sdf - dist) < rayCastParams.m_thresSampleDist)
{
if(abs(dist) < rayCastParams.m_thresDist)
{
float depth = alpha / depthToRayLength; // Convert ray length to depth depthToRayLength
d_depth[dTid.y*rayCastParams.m_width+dTid.x] = depth;
d_depth3[dTid.y*rayCastParams.m_width+dTid.x] = cameraData.kinectDepthToSkeleton(dTid.x, dTid.y, depth);
d_colors[dTid.y*rayCastParams.m_width+dTid.x] = make_uchar3(color2.x, color2.y, color2.z);
if(rayCastParams.m_useGradients)
{
float3 normal = -gradientForPoint(hash, currentIso);
float4 n = rayCastParams.m_viewMatrix * make_float4(normal, 0.0f);
d_normals[dTid.y*rayCastParams.m_width+dTid.x] = make_float4(n.x, n.y, n.z, 1.0f);
}
return;
}
}
}
lastSample.sdf = dist;
lastSample.alpha = rayCurrent;
// lastSample.color = color;
lastSample.weight = 1;
rayCurrent += rayCastParams.m_rayIncrement;
} else {
lastSample.weight = 0;
rayCurrent += rayCastParams.m_rayIncrement;
}
}
}
#endif // __CUDACC__
union {
float* d_depth;
int* d_depth_i;
};
float3* d_depth3;
float4* d_normals;
uchar3* d_colors;
//float4* d_vertexBuffer; // ray interval splatting triangles, mapped from directx (memory lives there)
float3 *point_cloud_;
cudaArray* d_rayIntervalSplatMinArray;
cudaArray* d_rayIntervalSplatMaxArray;
};
\ No newline at end of file
// From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/CUDASceneRepHashSDF.h
#pragma once
#include <cuda_runtime.h>
#include <ftl/configurable.hpp>
#include <ftl/matrix_conversion.hpp>
#include <ftl/voxel_hash.hpp>
#include <ftl/depth_camera.hpp>
#include <unordered_set>
//#include "CUDAScan.h"
// #include "CUDATimer.h"
// #include "GlobalAppState.h"
// #include "TimingLog.h"
#define SAFE_DELETE_ARRAY(a) { delete [] (a); (a) = NULL; }
extern "C" void resetCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
extern "C" void resetHashBucketMutexCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
extern "C" void allocCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, const unsigned int* d_bitMask);
extern "C" void fillDecisionArrayCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData);
extern "C" void compactifyHashCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
extern "C" unsigned int compactifyHashAllInOneCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
extern "C" void integrateDepthMapCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams);
extern "C" void bindInputDepthColorTextures(const DepthCameraData& depthCameraData);
extern "C" void starveVoxelsKernelCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
extern "C" void garbageCollectIdentifyCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
extern "C" void garbageCollectFreeCUDA(ftl::voxhash::HashData& hashData, const ftl::voxhash::HashParams& hashParams);
namespace ftl {
namespace voxhash {
class SceneRep : public ftl::Configurable {
public:
SceneRep(nlohmann::json &config) : Configurable(config) {
REQUIRED({
{"hashNumBuckets", "Desired hash entries divide bucket size", "number"},
{"hashMaxCollisionLinkedListSize", "", "number"},
{"hashNumSDFBlocks", "", "number"},
{"SDFVoxelSize", "Size in meters of one voxel", "number"},
{"SDFMaxIntegrationDistance", "", "number"},
{"SDFTruncation", "Base error size", "number"},
{"SDFTruncationScale", "Error size scale with depth", "number"},
{"SDFIntegrationWeightSample", "", "number"},
{"SDFIntegrationWeightMax", "", "number"}
});
create(parametersFromConfig(config));
}
~SceneRep() {
destroy();
}
static HashParams parametersFromConfig(nlohmann::json &config) {
HashParams params;
// First camera view is set to identity pose to be at the centre of
// the virtual coordinate space.
params.m_rigidTransform.setIdentity();
params.m_rigidTransformInverse.setIdentity();
params.m_hashNumBuckets = config["hashNumBuckets"];
params.m_hashBucketSize = HASH_BUCKET_SIZE;
params.m_hashMaxCollisionLinkedListSize = config["hashMaxCollisionLinkedListSize"];
params.m_SDFBlockSize = SDF_BLOCK_SIZE;
params.m_numSDFBlocks = config["hashNumSDFBlocks"];
params.m_virtualVoxelSize = config["SDFVoxelSize"];
params.m_maxIntegrationDistance = config["SDFMaxIntegrationDistance"];
params.m_truncation = config["SDFTruncation"];
params.m_truncScale = config["SDFTruncationScale"];
params.m_integrationWeightSample = config["SDFIntegrationWeightSample"];
params.m_integrationWeightMax = config["SDFIntegrationWeightMax"];
// Note (Nick): We are not streaming voxels in/out of GPU
//params.m_streamingVoxelExtents = MatrixConversion::toCUDA(gas.s_streamingVoxelExtents);
//params.m_streamingGridDimensions = MatrixConversion::toCUDA(gas.s_streamingGridDimensions);
//params.m_streamingMinGridPos = MatrixConversion::toCUDA(gas.s_streamingMinGridPos);
//params.m_streamingInitialChunkListSize = gas.s_streamingInitialChunkListSize;
return params;
}
void bindDepthCameraTextures(const DepthCameraData& depthCameraData) {
//bindInputDepthColorTextures(depthCameraData);
}
/**
* Note: lastRigidTransform appears to be the estimated camera pose.
* Note: bitMask can be nullptr if not streaming out voxels from GPU
*/
void integrate(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, unsigned int* d_bitMask) {
setLastRigidTransform(lastRigidTransform);
//make the rigid transform available on the GPU
m_hashData.updateParams(m_hashParams);
//allocate all hash blocks which are corresponding to depth map entries
alloc(depthCameraData, depthCameraParams, d_bitMask);
//generate a linear hash array with only occupied entries
compactifyHashEntries(depthCameraData);
//volumetrically integrate the depth data into the depth SDFBlocks
integrateDepthMap(depthCameraData, depthCameraParams);
garbageCollect(depthCameraData);
m_numIntegratedFrames++;
}
void setLastRigidTransform(const Eigen::Matrix4f& lastRigidTransform) {
m_hashParams.m_rigidTransform = MatrixConversion::toCUDA(lastRigidTransform);
m_hashParams.m_rigidTransformInverse = m_hashParams.m_rigidTransform.getInverse();
}
void setLastRigidTransformAndCompactify(const Eigen::Matrix4f& lastRigidTransform, const DepthCameraData& depthCameraData) {
setLastRigidTransform(lastRigidTransform);
compactifyHashEntries(depthCameraData);
}
const Eigen::Matrix4f getLastRigidTransform() const {
return MatrixConversion::toEigen(m_hashParams.m_rigidTransform);
}
/* Nick: To reduce weights between frames */
void nextFrame() {
starveVoxelsKernelCUDA(m_hashData, m_hashParams);
m_numIntegratedFrames = 0;
}
//! resets the hash to the initial state (i.e., clears all data)
void reset() {
m_numIntegratedFrames = 0;
m_hashParams.m_rigidTransform.setIdentity();
m_hashParams.m_rigidTransformInverse.setIdentity();
m_hashParams.m_numOccupiedBlocks = 0;
m_hashData.updateParams(m_hashParams);
resetCUDA(m_hashData, m_hashParams);
}
ftl::voxhash::HashData& getHashData() {
return m_hashData;
}
const HashParams& getHashParams() const {
return m_hashParams;
}
//! debug only!
unsigned int getHeapFreeCount() {
unsigned int count;
cudaSafeCall(cudaMemcpy(&count, m_hashData.d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
return count+1; //there is one more free than the address suggests (0 would be also a valid address)
}
//! debug only!
void debugHash() {
HashEntry* hashCPU = new HashEntry[m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets];
unsigned int* heapCPU = new unsigned int[m_hashParams.m_numSDFBlocks];
unsigned int heapCounterCPU;
cudaSafeCall(cudaMemcpy(&heapCounterCPU, m_hashData.d_heapCounter, sizeof(unsigned int), cudaMemcpyDeviceToHost));
heapCounterCPU++; //points to the first free entry: number of blocks is one more
cudaSafeCall(cudaMemcpy(heapCPU, m_hashData.d_heap, sizeof(unsigned int)*m_hashParams.m_numSDFBlocks, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(hashCPU, m_hashData.d_hash, sizeof(HashEntry)*m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets, cudaMemcpyDeviceToHost));
//Check for duplicates
class myint3Voxel {
public:
myint3Voxel() {}
~myint3Voxel() {}
bool operator<(const myint3Voxel& other) const {
if (x == other.x) {
if (y == other.y) {
return z < other.z;
}
return y < other.y;
}
return x < other.x;
}
bool operator==(const myint3Voxel& other) const {
return x == other.x && y == other.y && z == other.z;
}
int x,y,z, i;
int offset;
int ptr;
};
std::unordered_set<unsigned int> pointersFreeHash;
std::vector<unsigned int> pointersFreeVec(m_hashParams.m_numSDFBlocks, 0);
for (unsigned int i = 0; i < heapCounterCPU; i++) {
pointersFreeHash.insert(heapCPU[i]);
pointersFreeVec[heapCPU[i]] = FREE_ENTRY;
}
if (pointersFreeHash.size() != heapCounterCPU) {
throw std::runtime_error("ERROR: duplicate free pointers in heap array");
}
unsigned int numOccupied = 0;
unsigned int numMinusOne = 0;
unsigned int listOverallFound = 0;
std::list<myint3Voxel> l;
//std::vector<myint3Voxel> v;
for (unsigned int i = 0; i < m_hashParams.m_hashBucketSize*m_hashParams.m_hashNumBuckets; i++) {
if (hashCPU[i].ptr == -1) {
numMinusOne++;
}
if (hashCPU[i].ptr != -2) {
numOccupied++; // != FREE_ENTRY
myint3Voxel a;
a.x = hashCPU[i].pos.x;
a.y = hashCPU[i].pos.y;
a.z = hashCPU[i].pos.z;
l.push_back(a);
//v.push_back(a);
unsigned int linearBlockSize = m_hashParams.m_SDFBlockSize*m_hashParams.m_SDFBlockSize*m_hashParams.m_SDFBlockSize;
if (pointersFreeHash.find(hashCPU[i].ptr / linearBlockSize) != pointersFreeHash.end()) {
throw std::runtime_error("ERROR: ptr is on free heap, but also marked as an allocated entry");
}
pointersFreeVec[hashCPU[i].ptr / linearBlockSize] = LOCK_ENTRY;
}
}
unsigned int numHeapFree = 0;
unsigned int numHeapOccupied = 0;
for (unsigned int i = 0; i < m_hashParams.m_numSDFBlocks; i++) {
if (pointersFreeVec[i] == FREE_ENTRY) numHeapFree++;
else if (pointersFreeVec[i] == LOCK_ENTRY) numHeapOccupied++;
else {
throw std::runtime_error("memory leak detected: neither free nor allocated");
}
}
if (numHeapFree + numHeapOccupied == m_hashParams.m_numSDFBlocks) std::cout << "HEAP OK!" << std::endl;
else throw std::runtime_error("HEAP CORRUPTED");
l.sort();
size_t sizeBefore = l.size();
l.unique();
size_t sizeAfter = l.size();
std::cout << "diff: " << sizeBefore - sizeAfter << std::endl;
std::cout << "minOne: " << numMinusOne << std::endl;
std::cout << "numOccupied: " << numOccupied << "\t numFree: " << getHeapFreeCount() << std::endl;
std::cout << "numOccupied + free: " << numOccupied + getHeapFreeCount() << std::endl;
std::cout << "numInFrustum: " << m_hashParams.m_numOccupiedBlocks << std::endl;
SAFE_DELETE_ARRAY(heapCPU);
SAFE_DELETE_ARRAY(hashCPU);
//getchar();
}
private:
void create(const HashParams& params) {
m_hashParams = params;
m_hashData.allocate(m_hashParams);
reset();
}
void destroy() {
m_hashData.free();
}
void alloc(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams, const unsigned int* d_bitMask) {
//Start Timing
//if (GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.start(); }
// NOTE (nick): We might want this later...
if (true) {
//allocate until all blocks are allocated
unsigned int prevFree = getHeapFreeCount();
while (1) {
resetHashBucketMutexCUDA(m_hashData, m_hashParams);
allocCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams, d_bitMask);
unsigned int currFree = getHeapFreeCount();
if (prevFree != currFree) {
prevFree = currFree;
}
else {
break;
}
}
}
else {
//this version is faster, but it doesn't guarantee that all blocks are allocated (staggers alloc to the next frame)
resetHashBucketMutexCUDA(m_hashData, m_hashParams);
allocCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams, d_bitMask);
}
// Stop Timing
//if(GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.stop(); TimingLog::totalTimeAlloc += m_timer.getElapsedTimeMS(); TimingLog::countTimeAlloc++; }
}
void compactifyHashEntries(const DepthCameraData& depthCameraData) {
//Start Timing
//if(GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.start(); }
//CUDATimer t;
//t.startEvent("fillDecisionArray");
//fillDecisionArrayCUDA(m_hashData, m_hashParams, depthCameraData);
//t.endEvent();
//t.startEvent("prefixSum");
//m_hashParams.m_numOccupiedBlocks =
// m_cudaScan.prefixSum(
// m_hashParams.m_hashNumBuckets*m_hashParams.m_hashBucketSize,
// m_hashData.d_hashDecision,
// m_hashData.d_hashDecisionPrefix);
//t.endEvent();
//t.startEvent("compactifyHash");
//m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU
//compactifyHashCUDA(m_hashData, m_hashParams);
//t.endEvent();
//t.startEvent("compactifyAllInOne");
m_hashParams.m_numOccupiedBlocks = compactifyHashAllInOneCUDA(m_hashData, m_hashParams); //this version uses atomics over prefix sums, which has a much better performance
std::cout << "Occ blocks = " << m_hashParams.m_numOccupiedBlocks << std::endl;
m_hashData.updateParams(m_hashParams); //make sure numOccupiedBlocks is updated on the GPU
//t.endEvent();
//t.evaluate();
// Stop Timing
//if(GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.stop(); TimingLog::totalTimeCompactifyHash += m_timer.getElapsedTimeMS(); TimingLog::countTimeCompactifyHash++; }
//std::cout << "numOccupiedBlocks: " << m_hashParams.m_numOccupiedBlocks << std::endl;
}
void integrateDepthMap(const DepthCameraData& depthCameraData, const DepthCameraParams& depthCameraParams) {
//Start Timing
//if(GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.start(); }
integrateDepthMapCUDA(m_hashData, m_hashParams, depthCameraData, depthCameraParams);
// Stop Timing
//if(GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.stop(); TimingLog::totalTimeIntegrate += m_timer.getElapsedTimeMS(); TimingLog::countTimeIntegrate++; }
}
void garbageCollect(const DepthCameraData& depthCameraData) {
//only perform if enabled by global app state
//if (GlobalAppState::get().s_garbageCollectionEnabled) {
garbageCollectIdentifyCUDA(m_hashData, m_hashParams);
resetHashBucketMutexCUDA(m_hashData, m_hashParams); //needed if linked lists are enabled -> for memeory deletion
garbageCollectFreeCUDA(m_hashData, m_hashParams);
//}
}
HashParams m_hashParams;
ftl::voxhash::HashData m_hashData;
//CUDAScan m_cudaScan;
unsigned int m_numIntegratedFrames; //used for garbage collect
// static Timer m_timer;
};
}; // namespace voxhash
}; // namespace ftl
This diff is collapsed.
// From: https://github.com/niessner/VoxelHashing/blob/master/DepthSensingCUDA/Source/CUDAHashParams.h
#pragma once
//#include <cutil_inline.h>
//#include <cutil_math.h>
#include <vector_types.h>
#include <cuda_runtime.h>
#include <ftl/cuda_matrix_util.hpp>
namespace ftl {
namespace voxhash {
//TODO might have to be split into static and dynamics
struct __align__(16) HashParams {
HashParams() {
}
float4x4 m_rigidTransform;
float4x4 m_rigidTransformInverse;
unsigned int m_hashNumBuckets;
unsigned int m_hashBucketSize;
unsigned int m_hashMaxCollisionLinkedListSize;
unsigned int m_numSDFBlocks;
int m_SDFBlockSize;
float m_virtualVoxelSize;
unsigned int m_numOccupiedBlocks; //occupied blocks in the viewing frustum
float m_maxIntegrationDistance;
float m_truncScale;
float m_truncation;
unsigned int m_integrationWeightSample;
unsigned int m_integrationWeightMax;
float3 m_streamingVoxelExtents;
int3 m_streamingGridDimensions;
int3 m_streamingMinGridPos;
unsigned int m_streamingInitialChunkListSize;
uint2 m_dummy;
};
} // namespace voxhash
} // namespace ftl
This diff is collapsed.
......@@ -7,6 +7,9 @@
#include <glog/logging.h>
#include <ftl/config.h>
#include <ftl/configuration.hpp>
#include <ftl/depth_camera.hpp>
#include <ftl/scene_rep_hash_sdf.hpp>
#include <ftl/ray_cast_sdf.hpp>
#include <ftl/rgbd.hpp>
// #include <zlib.h>
......@@ -67,6 +70,7 @@ using cv::Mat;
namespace ftl {
namespace rgbd {
PointCloud<PointXYZRGB>::Ptr createPointCloud(RGBDSource *src);
PointCloud<PointXYZRGB>::Ptr createPointCloud(RGBDSource *src, const cv::Point3_<uchar> &col);
}
}
......@@ -87,14 +91,14 @@ PointCloud<PointXYZRGB>::Ptr ftl::rgbd::createPointCloud(RGBDSource *src) {
for(int i=0;i<rgb.rows;i++) {
const float *sptr = depth.ptr<float>(i);
for(int j=0;j<rgb.cols;j++) {
float d = sptr[j] * 1000.0f;
float d = sptr[j]; // * 1000.0f;
pcl::PointXYZRGB point;
point.x = (((double)j + CX) / FX) * d;
point.y = (((double)i + CY) / FY) * d;
point.z = d;
if (point.x == INFINITY || point.y == INFINITY || point.z > 20000.0f || point.z < 0.04f) {
if (point.x == INFINITY || point.y == INFINITY || point.z > 20.0f || point.z < 0.04f) {
point.x = 0.0f; point.y = 0.0f; point.z = 0.0f;
}
......@@ -109,6 +113,44 @@ PointCloud<PointXYZRGB>::Ptr ftl::rgbd::createPointCloud(RGBDSource *src) {
return point_cloud_ptr;
}
PointCloud<PointXYZRGB>::Ptr ftl::rgbd::createPointCloud(RGBDSource *src, const cv::Point3_<uchar> &prgb) {
const double CX = src->getParameters().cx;
const double CY = src->getParameters().cy;
const double FX = src->getParameters().fx;
const double FY = src->getParameters().fy;
cv::Mat rgb;
cv::Mat depth;
src->getRGBD(rgb, depth);
pcl::PointCloud<pcl::PointXYZRGB>::Ptr point_cloud_ptr(new pcl::PointCloud<pcl::PointXYZRGB>);
point_cloud_ptr->width = rgb.cols * rgb.rows;
point_cloud_ptr->height = 1;
for(int i=0;i<rgb.rows;i++) {
const float *sptr = depth.ptr<float>(i);
for(int j=0;j<rgb.cols;j++) {
float d = sptr[j]; // * 1000.0f;
pcl::PointXYZRGB point;
point.x = (((double)j + CX) / FX) * d;
point.y = (((double)i + CY) / FY) * d;
point.z = d;
if (point.x == INFINITY || point.y == INFINITY || point.z > 20.0f || point.z < 0.04f) {
point.x = 0.0f; point.y = 0.0f; point.z = 0.0f;
}
uint32_t rgb = (static_cast<uint32_t>(prgb.z) << 16 | static_cast<uint32_t>(prgb.y) << 8 | static_cast<uint32_t>(prgb.x));
point.rgb = *reinterpret_cast<float*>(&rgb);
point_cloud_ptr -> points.push_back(point);
}
}
return point_cloud_ptr;
}
#ifdef HAVE_PCL
#include <pcl/filters/uniform_sampling.h>
......@@ -181,8 +223,14 @@ void saveRegistration(std::map<string, Eigen::Matrix4f> registration) {
file << save;
}
struct Cameras {
RGBDSource *source;
DepthCameraData gpu;
DepthCameraParams params;
};
template <template<class> class Container>
std::map<string, Eigen::Matrix4f> runRegistration(ftl::net::Universe &net, Container<RGBDSource*> &inputs) {
std::map<string, Eigen::Matrix4f> runRegistration(ftl::net::Universe &net, Container<Cameras> &inputs) {
std::map<string, Eigen::Matrix4f> registration;
......@@ -205,7 +253,7 @@ std::map<string, Eigen::Matrix4f> runRegistration(ftl::net::Universe &net, Conta
size_t ref_i;
bool found = false;
for (size_t i = 0; i < inputs.size(); i++) {
if (inputs[i]->getConfig()["uri"] == ref_uri) {
if (inputs[i].source->getConfig()["uri"] == ref_uri) {
ref_i = i;
found = true;
break;
......@@ -215,7 +263,7 @@ std::map<string, Eigen::Matrix4f> runRegistration(ftl::net::Universe &net, Conta
if (!found) { LOG(ERROR) << "Reference input not found!"; return registration; }
for (auto &input : inputs) {
cv::namedWindow("Registration: " + (string)input->getConfig()["uri"], cv::WINDOW_KEEPRATIO|cv::WINDOW_NORMAL);
cv::namedWindow("Registration: " + (string)input.source->getConfig()["uri"], cv::WINDOW_KEEPRATIO|cv::WINDOW_NORMAL);
}
// vector for every input: vector of point clouds of patterns
......@@ -227,7 +275,7 @@ std::map<string, Eigen::Matrix4f> runRegistration(ftl::net::Universe &net, Conta
vector<PointCloud<PointXYZ>::Ptr> patterns_iter(inputs.size());
for (size_t i = 0; i < inputs.size(); i++) {
RGBDSource *input = inputs[i];
RGBDSource *input = inputs[i].source;
Mat rgb, depth;
input->getRGBD(rgb, depth);
......@@ -250,7 +298,7 @@ std::map<string, Eigen::Matrix4f> runRegistration(ftl::net::Universe &net, Conta
else { LOG(WARNING) << "Pattern not detected on all inputs";}
}
for (auto &input : inputs) { cv::destroyWindow("Registration: " + (string)input->getConfig()["uri"]); }
for (auto &input : inputs) { cv::destroyWindow("Registration: " + (string)input.source->getConfig()["uri"]); }
for (size_t i = 0; i < inputs.size(); i++) {
Eigen::Matrix4f T;
......@@ -260,13 +308,48 @@ std::map<string, Eigen::Matrix4f> runRegistration(ftl::net::Universe &net, Conta
else {
T = ftl::registration::findTransformation(patterns[i], patterns[ref_i]);
}
registration[(string)inputs[i]->getConfig()["uri"]] = T;
registration[(string)inputs[i].source->getConfig()["uri"]] = T;
}
saveRegistration(registration);
return registration;
}
#endif
template<class T>
Eigen::Matrix<T,4,4> lookAt
(
Eigen::Matrix<T,3,1> const & eye,
Eigen::Matrix<T,3,1> const & center,
Eigen::Matrix<T,3,1> const & up
)
{
typedef Eigen::Matrix<T,4,4> Matrix4;
typedef Eigen::Matrix<T,3,1> Vector3;
Vector3 f = (center - eye).normalized();
Vector3 u = up.normalized();
Vector3 s = f.cross(u).normalized();
u = s.cross(f);
Matrix4 res;
res << s.x(),s.y(),s.z(),-s.dot(eye),
u.x(),u.y(),u.z(),-u.dot(eye),
-f.x(),-f.y(),-f.z(),f.dot(eye),
0,0,0,1;
return res;
}
using MouseAction = std::function<void(int, int, int, int)>;
static void setMouseAction(const std::string& winName, const MouseAction &action)
{
cv::setMouseCallback(winName,
[] (int event, int x, int y, int flags, void* userdata) {
(*(MouseAction*)userdata)(event, x, y, flags);
}, (void*)&action);
}
static void run() {
Universe net(config["net"]);
......@@ -278,8 +361,8 @@ static void run() {
return;
}
std::deque<RGBDSource*> inputs;
std::vector<Display> displays;
std::deque<Cameras> inputs;
//std::vector<Display> displays;
// TODO Allow for non-net source types
for (auto &src : config["sources"]) {
......@@ -287,9 +370,20 @@ static void run() {
if (!in) {
LOG(ERROR) << "Unrecognised source: " << src;
} else {
inputs.push_back(in);
displays.emplace_back(config["display"], src["uri"]);
LOG(INFO) << (string)src["uri"] << " loaded";
auto &cam = inputs.emplace_back();
cam.source = in;
cam.params.fx = in->getParameters().fx;
cam.params.fy = in->getParameters().fy;
cam.params.mx = -in->getParameters().cx;
cam.params.my = -in->getParameters().cy;
cam.params.m_imageWidth = in->getParameters().width;
cam.params.m_imageHeight = in->getParameters().height;
cam.params.m_sensorDepthWorldMax = in->getParameters().maxDepth;
cam.params.m_sensorDepthWorldMin = in->getParameters().minDepth;
cam.gpu.alloc(cam.params);
//displays.emplace_back(config["display"], src["uri"]);
LOG(INFO) << (string)src["uri"] << " loaded " << cam.params.m_imageWidth;
}
}
......@@ -297,63 +391,126 @@ static void run() {
// load point cloud transformations
#ifdef HAVE_PCL
std::map<string, Eigen::Matrix4f> registration;
if (config["registration"]["calibration"]["run"]) {
registration = runRegistration(net, inputs);
}
else {
LOG(INFO) << "LOAD REG";
registration = loadRegistration();
}
LOG(INFO) << "Assigning poses";
vector<Eigen::Matrix4f> T;
for (auto &input : inputs) {
Eigen::Matrix4f RT = (registration.count((string)input->getConfig()["uri"]) > 0) ? registration[(string)input->getConfig()["uri"]] : registration["default"];
LOG(INFO) << (unsigned long long)input.source;
Eigen::Matrix4f RT = (registration.count(input.source->getConfig()["uri"].get<string>()) > 0) ? registration[(string)input.source->getConfig()["uri"]] : registration["default"];
T.push_back(RT);
input.source->setPose(RT);
}
//
vector<PointCloud<PointXYZRGB>::Ptr> clouds(inputs.size());
//vector<PointCloud<PointXYZRGB>::Ptr> clouds(inputs.size());
Display display_merged(config["display"], "Merged"); // todo
CUDARayCastSDF rays(config["voxelhash"]);
ftl::voxhash::SceneRep scene(config["voxelhash"]);
cv::Mat colour_array(cv::Size(rays.getRayCastParams().m_width,rays.getRayCastParams().m_height), CV_8UC3);
// Force window creation
display_merged.render(colour_array);
display_merged.wait(1);
unsigned char frameCount = 0;
bool paused = false;
float cam_x = 0.0f;
float cam_z = 0.0f;
Eigen::Vector3f eye(0.0f, 0.0f, 0.0f);
Eigen::Vector3f centre(0.0f, 0.0f, -4.0f);
Eigen::Vector3f up(0,1.0f,0);
Eigen::Vector3f lookPoint(0.0f,1.0f,-4.0f);
Eigen::Matrix4f viewPose;
float lerpSpeed = 0.4f;
// Keyboard camera controls
display_merged.onKey([&paused,&eye,&centre](int key) {
LOG(INFO) << "Key = " << key;
if (key == 32) paused = !paused;
else if (key == 81) eye[0] += 0.02f;
else if (key == 83) eye[0] -= 0.02f;
else if (key == 84) eye[2] += 0.02f;
else if (key == 82) eye[2] -= 0.02f;
});
// TODO(Nick) Calculate "camera" properties of viewport.
MouseAction mouseact = [&inputs,&lookPoint,&viewPose]( int event, int ux, int uy, int) {
LOG(INFO) << "Mouse " << ux << "," << uy;
if (event == 1) { // click
const float x = ((float)ux-inputs[0].params.mx) / inputs[0].params.fx;
const float y = ((float)uy-inputs[0].params.my) / inputs[0].params.fy;
const float depth = -4.0f;
Eigen::Vector4f camPos(x*depth,y*depth,depth,1.0);
Eigen::Vector4f worldPos = viewPose * camPos;
lookPoint = Eigen::Vector3f(worldPos[0],worldPos[1],worldPos[2]);
}
};
::setMouseAction("Image", mouseact);
int active = displays.size();
int active = inputs.size();
while (active > 0 && display_merged.active()) {
active = 0;
if (!paused) {
net.broadcast("grab"); // To sync cameras
PointCloud<PointXYZRGB>::Ptr cloud(new PointCloud<PointXYZRGB>);
scene.nextFrame();
for (size_t i = 0; i < inputs.size(); i++) {
//if (i == 1) continue;
//Display &display = displays[i];
RGBDSource *input = inputs[i];
RGBDSource *input = inputs[i].source;
Mat rgb, depth;
//LOG(INFO) << "GetRGB";
input->getRGBD(rgb,depth);
//if (!display.active()) continue;
active += 1;
clouds[i] = ftl::rgbd::createPointCloud(input);
//clouds[i] = ftl::rgbd::createPointCloud(input);
//display.render(rgb, depth,input->getParameters());
//display.render(clouds[i]);
//display.wait(5);
}
for (size_t i = 0; i < clouds.size(); i++) {
pcl::transformPointCloud(*clouds[i], *clouds[i], T[i]);
*cloud += *clouds[i];
//LOG(INFO) << "Data size: " << depth.cols << "," << depth.rows;
if (depth.cols == 0) continue;
Mat rgba;
cv::cvtColor(rgb,rgba, cv::COLOR_BGR2BGRA);
inputs[i].params.flags = frameCount;
// Send to GPU and merge view into scene
inputs[i].gpu.updateParams(inputs[i].params);
inputs[i].gpu.updateData(depth, rgba);
scene.integrate(inputs[i].source->getPose(), inputs[i].gpu, inputs[i].params, nullptr);
}
} else {
active = 1;
}
pcl::UniformSampling<PointXYZRGB> uniform_sampling;
uniform_sampling.setInputCloud(cloud);
uniform_sampling.setRadiusSearch(0.1f);
uniform_sampling.filter(*cloud);
frameCount++;
// Set virtual camera transformation matrix
//Eigen::Affine3f transform(Eigen::Translation3f(cam_x,0.0f,cam_z));
centre += (lookPoint - centre) * (lerpSpeed * 0.1f);
viewPose = lookAt<float>(eye,centre,up); // transform.matrix();
display_merged.render(cloud);
display_merged.wait(50);
// Call GPU renderer and download result into OpenCV mat
rays.render(scene.getHashData(), scene.getHashParams(), inputs[0].gpu, viewPose);
rays.getRayCastData().download(nullptr, (uchar3*)colour_array.data, rays.getRayCastParams());
display_merged.render(colour_array);
display_merged.wait(1);
}
#endif
// TODO non-PCL (?)
}
int main(int argc, char **argv) {
......
This diff is collapsed.
This diff is collapsed.
......@@ -101,7 +101,7 @@ PointCloud<PointXYZ>::Ptr cornersToPointCloud(const vector<cv::Point2f> &corners
for (int i = 0; i < corners_len; i++) {
double x = corners[i].x;
double y = corners[i].y;
double d = disp.at<float>((int) y, (int) x) * 1000.0f; // todo: better estimation
double d = disp.at<float>((int) y, (int) x); // * 1000.0f; // todo: better estimation
//cv::Vec4d homg_pt = Q_ * cv::Vec4d(x, y, d, 1.0);
//cv::Vec3d p = cv::Vec3d(homg_pt.val) / homg_pt[3];
......