From e6a15e01090692ba7271bf3e289f90016aa5e152 Mon Sep 17 00:00:00 2001 From: Nicolas Pope <nicolas.pope@utu.fi> Date: Sun, 17 Nov 2019 13:59:23 +0200 Subject: [PATCH] Implements #232 all source MLS --- applications/reconstruct/src/ilw/ilw.cpp | 4 +- applications/reconstruct/src/main.cpp | 4 +- components/operators/CMakeLists.txt | 2 + .../operators/include/ftl/operators/mvmls.hpp | 28 + components/operators/src/correspondence.cu | 548 ++++++++++++++++++ components/operators/src/mvmls.cpp | 340 +++++++++++ components/operators/src/mvmls_cuda.hpp | 101 ++++ components/renderers/cpp/src/reprojection.cu | 23 +- .../renderers/cpp/src/splatter_cuda.hpp | 2 +- components/renderers/cpp/src/tri_render.cpp | 19 +- .../rgbd-sources/include/ftl/rgbd/frame.hpp | 2 +- 11 files changed, 1048 insertions(+), 25 deletions(-) create mode 100644 components/operators/include/ftl/operators/mvmls.hpp create mode 100644 components/operators/src/correspondence.cu create mode 100644 components/operators/src/mvmls.cpp create mode 100644 components/operators/src/mvmls_cuda.hpp diff --git a/applications/reconstruct/src/ilw/ilw.cpp b/applications/reconstruct/src/ilw/ilw.cpp index 55a9539a9..a686f35c3 100644 --- a/applications/reconstruct/src/ilw/ilw.cpp +++ b/applications/reconstruct/src/ilw/ilw.cpp @@ -25,7 +25,7 @@ static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) { } ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) { - enabled_ = value("ilw_align", true); + enabled_ = value("ilw_align", false); iterations_ = value("iterations", 4); motion_rate_ = value("motion_rate", 0.8f); motion_window_ = value("motion_window", 3); @@ -42,7 +42,7 @@ ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) { }); on("ilw_align", [this](const ftl::config::Event &e) { - enabled_ = value("ilw_align", true); + enabled_ = value("ilw_align", false); }); on("iterations", [this](const ftl::config::Event &e) { diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp index 382f49293..0603dad40 100644 --- a/applications/reconstruct/src/main.cpp +++ b/applications/reconstruct/src/main.cpp @@ -37,6 +37,7 @@ #include <ftl/operators/segmentation.hpp> #include <ftl/operators/mask.hpp> #include <ftl/operators/antialiasing.hpp> +#include <ftl/operators/mvmls.hpp> #include <ftl/operators/clipping.hpp> #include <ftl/cuda/normals.hpp> @@ -330,8 +331,9 @@ static void run(ftl::Configurable *root) { pipeline1->append<ftl::operators::CrossSupport>("cross"); pipeline1->append<ftl::operators::DiscontinuityMask>("discontinuity"); pipeline1->append<ftl::operators::CullDiscontinuity>("remove_discontinuity"); - pipeline1->append<ftl::operators::AggreMLS>("mls"); // Perform MLS (using smoothing channel) + //pipeline1->append<ftl::operators::AggreMLS>("mls"); // Perform MLS (using smoothing channel) pipeline1->append<ftl::operators::VisCrossSupport>("viscross")->set("enabled", false); + pipeline1->append<ftl::operators::MultiViewMLS>("mvmls"); // Alignment diff --git a/components/operators/CMakeLists.txt b/components/operators/CMakeLists.txt index 20dbced5e..b025a39b3 100644 --- a/components/operators/CMakeLists.txt +++ b/components/operators/CMakeLists.txt @@ -17,6 +17,8 @@ set(OPERSRC src/mask.cpp src/antialiasing.cpp src/antialiasing.cu + src/mvmls.cpp + src/correspondence.cu src/clipping.cpp ) diff --git a/components/operators/include/ftl/operators/mvmls.hpp b/components/operators/include/ftl/operators/mvmls.hpp new file mode 100644 index 000000000..6b8eff1be --- /dev/null +++ b/components/operators/include/ftl/operators/mvmls.hpp @@ -0,0 +1,28 @@ +#ifndef _FTL_OPERATORS_MVMLS_HPP_ +#define _FTL_OPERATORS_MVMLS_HPP_ + +#include <ftl/operators/operator.hpp> + +namespace ftl { +namespace operators { + +class MultiViewMLS : public ftl::operators::Operator { + public: + explicit MultiViewMLS(ftl::Configurable*); + ~MultiViewMLS(); + + inline Operator::Type type() const override { return Operator::Type::ManyToMany; } + + bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) override; + + private: + std::vector<ftl::cuda::TextureObject<float4>> centroid_horiz_; + std::vector<ftl::cuda::TextureObject<float4>> centroid_vert_; + std::vector<ftl::cuda::TextureObject<float4>> normals_horiz_; + std::vector<ftl::cuda::TextureObject<float>> contributions_; +}; + +} +} + +#endif diff --git a/components/operators/src/correspondence.cu b/components/operators/src/correspondence.cu new file mode 100644 index 000000000..36e91d9f4 --- /dev/null +++ b/components/operators/src/correspondence.cu @@ -0,0 +1,548 @@ +#include "mvmls_cuda.hpp" +#include <ftl/cuda/weighting.hpp> +#include <ftl/cuda/mask.hpp> + +using ftl::cuda::TextureObject; +using ftl::rgbd::Camera; +using ftl::cuda::Mask; +using ftl::cuda::MvMLSParams; + +#define T_PER_BLOCK 8 + +template<int FUNCTION> +__device__ float weightFunction(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight); + +template <> +__device__ inline float weightFunction<0>(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight) { + return (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight); +} + +template <> +__device__ inline float weightFunction<1>(const ftl::cuda::MvMLSParams ¶m, float dweight, float cweight) { + return (cweight * cweight * dweight); +} + +template <> +__device__ inline float weightFunction<2>(const ftl::cuda::MvMLSParams ¶m, float dweight, float cweight) { + return (dweight * dweight * cweight); +} + +template <> +__device__ inline float weightFunction<3>(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight) { + return (dweight == 0.0f) ? 0.0f : (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight); +} + +template <> +__device__ inline float weightFunction<4>(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight) { + return cweight; +} + +template <> +__device__ inline float weightFunction<5>(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight) { + return (cweight > 0.0f) ? dweight : 0.0f; +} + +template<int COR_STEPS, int FUNCTION> +__global__ void corresponding_point_kernel( + TextureObject<float> d1, + TextureObject<float> d2, + TextureObject<uchar4> c1, + TextureObject<uchar4> c2, + TextureObject<short2> screenOut, + TextureObject<float> conf, + TextureObject<int> mask, + float4x4 pose1, + float4x4 pose1_inv, + float4x4 pose2, // Inverse + Camera cam1, + Camera cam2, ftl::cuda::MvMLSParams params) { + + // Each warp picks point in p1 + //const int tid = (threadIdx.x + threadIdx.y * blockDim.x); + const int x = (blockIdx.x*blockDim.x + threadIdx.x); // / WARP_SIZE; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x >= 0 && y >=0 && x < screenOut.width() && y < screenOut.height()) { + screenOut(x,y) = make_short2(-1,-1); + + //const float3 world1 = make_float3(p1.tex2D(x, y)); + const float depth1 = d1.tex2D(x,y); //(pose1_inv * world1).z; // Initial starting depth + if (depth1 < cam1.minDepth || depth1 > cam1.maxDepth) return; + + // TODO: Temporary hack to ensure depth1 is present + //const float4 temp = vout.tex2D(x,y); + //vout(x,y) = make_float4(depth1, 0.0f, temp.z, temp.w); + + const float3 world1 = pose1 * cam1.screenToCam(x,y,depth1); + + const auto colour1 = c1.tex2D((float)x+0.5f, (float)y+0.5f); + + //float bestdepth = 0.0f; + short2 bestScreen = make_short2(-1,-1); + float bestdepth = 0.0f; + float bestdepth2 = 0.0f; + float bestweight = 0.0f; + float bestcolour = 0.0f; + float bestdweight = 0.0f; + float totalcolour = 0.0f; + int count = 0; + float contrib = 0.0f; + + const float step_interval = params.range / (COR_STEPS / 2); + + const float3 rayStep_world = pose1.getFloat3x3() * cam1.screenToCam(x,y,step_interval); + const float3 rayStart_2 = pose2 * world1; + const float3 rayStep_2 = pose2.getFloat3x3() * rayStep_world; + + // Project to p2 using cam2 + // Each thread takes a possible correspondence and calculates a weighting + //const int lane = tid % WARP_SIZE; + for (int i=0; i<COR_STEPS; ++i) { + const int j = i - (COR_STEPS/2); + const float depth_adjust = (float)j * step_interval; + + // Calculate adjusted depth 3D point in camera 2 space + const float3 worldPos = world1 + j * rayStep_world; //(pose1 * cam1.screenToCam(x, y, depth_adjust)); + const float3 camPos = rayStart_2 + j * rayStep_2; //pose2 * worldPos; + const float2 screen = cam2.camToScreen<float2>(camPos); + + float weight = (screen.x >= cam2.width || screen.y >= cam2.height) ? 0.0f : 1.0f; + + // Generate a colour correspondence value + const auto colour2 = c2.tex2D(screen.x, screen.y); + const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth); + + // Generate a depth correspondence value + const float depth2 = d2.tex2D(int(screen.x+0.5f), int(screen.y+0.5f)); + + if (FUNCTION == 1) { + weight *= ftl::cuda::weighting(fabs(depth2 - camPos.z), cweight*params.spatial_smooth); + } else { + const float dweight = ftl::cuda::weighting(fabs(depth2 - camPos.z), params.spatial_smooth); + weight *= weightFunction<FUNCTION>(params, dweight, cweight); + } + //const float dweight = ftl::cuda::weighting(fabs(depth_adjust), 10.0f*params.range); + + //weight *= weightFunction<FUNCTION>(params, dweight, cweight); + + ++count; + contrib += weight; + bestcolour = max(cweight, bestcolour); + //bestdweight = max(dweight, bestdweight); + totalcolour += cweight; + bestdepth = (weight > bestweight) ? depth_adjust : bestdepth; + //bestdepth2 = (weight > bestweight) ? camPos.z : bestdepth2; + //bestScreen = (weight > bestweight) ? make_short2(screen.x+0.5f, screen.y+0.5f) : bestScreen; + bestweight = max(bestweight, weight); + //bestweight = weight; + //bestdepth = depth_adjust; + //bestScreen = make_short2(screen.x+0.5f, screen.y+0.5f); + //} + } + + const float avgcolour = totalcolour/(float)count; + const float confidence = bestcolour / totalcolour; //bestcolour - avgcolour; + + //Mask m(mask.tex2D(x,y)); + + //if (bestweight > 0.0f) { + float old = conf.tex2D(x,y); + + if (bestweight * confidence > old) { + d1(x,y) = 0.4f*bestdepth + depth1; + //d2(bestScreen.x, bestScreen.y) = bestdepth2; + //screenOut(x,y) = bestScreen; + conf(x,y) = bestweight * confidence; + } + //} + + // If a good enough match is found, mark dodgy depth as solid + //if ((m.isFilled() || m.isDiscontinuity()) && (bestweight > params.match_threshold)) mask(x,y) = 0; + } +} + +void ftl::cuda::correspondence( + TextureObject<float> &d1, + TextureObject<float> &d2, + TextureObject<uchar4> &c1, + TextureObject<uchar4> &c2, + TextureObject<short2> &screen, + TextureObject<float> &conf, + TextureObject<int> &mask, + float4x4 &pose1, + float4x4 &pose1_inv, + float4x4 &pose2, + const Camera &cam1, + const Camera &cam2, const MvMLSParams ¶ms, int func, + cudaStream_t stream) { + + const dim3 gridSize((d1.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (d1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + //printf("COR SIZE %d,%d\n", p1.width(), p1.height()); + + switch (func) { + case 0: corresponding_point_kernel<16,0><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; + case 1: corresponding_point_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; + case 2: corresponding_point_kernel<16,2><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; + case 3: corresponding_point_kernel<16,3><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; + case 4: corresponding_point_kernel<16,4><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; + case 5: corresponding_point_kernel<16,5><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; + } + + cudaSafeCall( cudaGetLastError() ); +} + +// ==== Remove zero-confidence ================================================= + +__global__ void zero_confidence_kernel( + TextureObject<float> conf, + TextureObject<float> depth) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < depth.width() && y < depth.height()) { + const float c = conf.tex2D((int)x,(int)y); + + if (c == 0.0f) { + depth(x,y) = 1000.0f; + } + } +} + +void ftl::cuda::zero_confidence(TextureObject<float> &conf, TextureObject<float> &depth, cudaStream_t stream) { + const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + zero_confidence_kernel<<<gridSize, blockSize, 0, stream>>>(conf, depth); + cudaSafeCall( cudaGetLastError() ); +} + + +// ==== MultiViewMLS Aggregate ================================================= + +__device__ inline short3 getScreenPos(int x, int y, float d, const Camera &cam1, const Camera &cam2, const float4x4 &transform) { + const float3 campos = transform * cam1.screenToCam(x,y,d); + const int2 screen = cam2.camToScreen<int2>(campos); + return make_short3(screen.x, screen.y, campos.z); +} + +__device__ inline short2 packScreen(int x, int y, int id) { + return make_short2((id << 12) + x, y); +} + +__device__ inline short2 packScreen(const short3 &p, int id) { + return make_short2((id << 12) + p.x, p.y); +} + +__device__ inline int supportSize(uchar4 support) { + return (support.x+support.y) * (support.z+support.w); +} + +__device__ inline short2 choosePoint(uchar4 sup1, uchar4 sup2, float dot1, float dot2, short2 screen1, short2 screen2) { + //return (float(supportSize(sup2))*dot1 > float(supportSize(sup1))*dot2) ? screen2 : screen1; + return (dot1 > dot2) ? screen2 : screen1; +} + +__device__ inline int unpackCameraID(short2 p) { + return p.x >> 12; +} + +/** + * Identify which source has the best support region for a given pixel. + */ +__global__ void best_sources_kernel( + TextureObject<float4> normals1, + TextureObject<float4> normals2, + TextureObject<uchar4> support1, + TextureObject<uchar4> support2, + TextureObject<float> depth1, + TextureObject<float> depth2, + TextureObject<short2> screen, + float4x4 transform, + //float3x3 transformR, + ftl::rgbd::Camera cam1, + ftl::rgbd::Camera cam2, + int id1, + int id2) { + + const int x = (blockIdx.x*blockDim.x + threadIdx.x); + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x >= 0 && y >= 0 && x < screen.width() && y < screen.height()) { + const float d1 = depth1.tex2D(x,y); + + const short3 scr2 = getScreenPos(x, y, d1, cam1, cam2, transform); + short2 bestPoint = packScreen(x,y,0); + + if (scr2.x >= 0 && scr2.y >= 0 && scr2.x < cam2.width && scr2.y < cam2.height) { + uchar4 sup1 = support1.tex2D(x,y); + uchar4 sup2 = support2.tex2D(scr2.x,scr2.y); + const float d2 = depth2.tex2D(scr2.x,scr2.y); + float3 n1 = transform.getFloat3x3() * make_float3(normals1.tex2D(x,y)); + float3 n2 = make_float3(normals2.tex2D(scr2.x,scr2.y)); + + float3 camray = cam2.screenToCam(scr2.x,scr2.y,1.0f); + camray /= length(camray); + const float dot1 = dot(camray, n1); + const float dot2 = dot(camray, n2); + + bestPoint = (fabs(scr2.z - d2) < 0.04f) ? choosePoint(sup1, sup2, dot1, dot2, packScreen(x,y,id1), packScreen(scr2,id2)) : packScreen(x,y,6); + //bestPoint = choosePoint(sup1, sup2, dot1, dot2, packScreen(x,y,id1), packScreen(scr2,id2)); + //bestPoint = (d1 < d2) ? packScreen(x,y,id1) : packScreen(x,y,id2); + + bestPoint = (fabs(scr2.z - d2) < 0.04f) ? packScreen(scr2,id2) : packScreen(scr2,id1); + } + + screen(x,y) = bestPoint; + + /*if (s.x >= 0 && s.y >= 0) { + auto norm1 = make_float3(n1.tex2D(x,y)); + const auto norm2 = make_float3(n2.tex2D(s.x,s.y)); + //n2(s.x,s.y) = norm1; + + float3 cent1 = make_float3(c1.tex2D(x,y)); + const auto cent2 = make_float3(c2.tex2D(s.x,s.y)); + + if (cent2.x+cent2.y+cent2.z > 0.0f && norm2.x+norm2.y+norm2.z > 0.0f) { + norm1 += poseInv1.getFloat3x3() * (pose2.getFloat3x3() * norm2); + n1(x,y) = make_float4(norm1, 0.0f); + cent1 += poseInv1 * (pose2 * cent2); // FIXME: Transform between camera spaces + cent1 /= 2.0f; + c1(x,y) = make_float4(cent1, 0.0f); + //c2(s.x,s.y) = cent1; + + //contribs1(x,y) = contribs1.tex2D(x,y) + 1.0f; + } + // contribs2(s.x,s.y) = contribs2.tex2D(s.x,s.y) + 1.0f; + }*/ + } +} + +void ftl::cuda::best_sources( + ftl::cuda::TextureObject<float4> &normals1, + ftl::cuda::TextureObject<float4> &normals2, + ftl::cuda::TextureObject<uchar4> &support1, + ftl::cuda::TextureObject<uchar4> &support2, + ftl::cuda::TextureObject<float> &depth1, + ftl::cuda::TextureObject<float> &depth2, + ftl::cuda::TextureObject<short2> &screen, + const float4x4 &transform, + const ftl::rgbd::Camera &cam1, + const ftl::rgbd::Camera &cam2, + int id1, + int id2, + cudaStream_t stream) { + + const dim3 gridSize((screen.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (screen.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + best_sources_kernel<<<gridSize, blockSize, 0, stream>>>(normals1, normals2, support1, support2, depth1, depth2, screen, transform, cam1, cam2, id1, id2); + cudaSafeCall( cudaGetLastError() ); +} + +/** + * Identify which source has the best support region for a given pixel. + */ + __global__ void aggregate_sources_kernel( + TextureObject<float4> n1, + TextureObject<float4> n2, + TextureObject<float4> c1, + TextureObject<float4> c2, + TextureObject<float> depth1, + //TextureObject<float> depth2, + //TextureObject<short2> screen, + float4x4 transform, + //float3x3 transformR, + ftl::rgbd::Camera cam1, + ftl::rgbd::Camera cam2) { + + const int x = (blockIdx.x*blockDim.x + threadIdx.x); + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x >= 0 && y >= 0 && x < n1.width() && y < n1.height()) { + const float d1 = depth1.tex2D(x,y); + + if (d1 > cam1.minDepth && d1 < cam1.maxDepth) { + //const short3 s = getScreenPos(x, y, d1, cam1, cam2, transform); + const float3 camPos = transform * cam1.screenToCam(x, y, d1); + const int2 s = cam2.camToScreen<int2>(camPos); + + if (s.x >= 0 && s.y >= 0 && s.x < n2.width() && s.y < n2.height()) { + auto norm1 = make_float3(n1.tex2D(x,y)); + const auto norm2 = make_float3(n2.tex2D(s.x,s.y)); + //n2(s.x,s.y) = norm1; + + float3 cent1 = make_float3(c1.tex2D(x,y)); + const auto cent2 = transform.getInverse() * make_float3(c2.tex2D(s.x,s.y)); + + //printf("MERGING %f\n", length(cent2-cent1)); + + if (cent2.x+cent2.y+cent2.z > 0.0f && norm2.x+norm2.y+norm2.z > 0.0f && length(cent2-cent1) < 0.04f) { + norm1 += norm2; + norm1 /= 2.0f; + n1(x,y) = make_float4(norm1, 0.0f); + cent1 += cent2; + cent1 /= 2.0f; + c1(x,y) = make_float4(cent1, 0.0f); + //c2(s.x,s.y) = cent1; + + //contribs1(x,y) = contribs1.tex2D(x,y) + 1.0f; + } + // contribs2(s.x,s.y) = contribs2.tex2D(s.x,s.y) + 1.0f; + } + } + } +} + +void ftl::cuda::aggregate_sources( + ftl::cuda::TextureObject<float4> &n1, + ftl::cuda::TextureObject<float4> &n2, + ftl::cuda::TextureObject<float4> &c1, + ftl::cuda::TextureObject<float4> &c2, + ftl::cuda::TextureObject<float> &depth1, + //ftl::cuda::TextureObject<float> &depth2, + //ftl::cuda::TextureObject<short2> &screen, + const float4x4 &transform, + const ftl::rgbd::Camera &cam1, + const ftl::rgbd::Camera &cam2, + cudaStream_t stream) { + + const dim3 gridSize((n1.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (n1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + aggregate_sources_kernel<<<gridSize, blockSize, 0, stream>>>(n1, n2, c1, c2, depth1, transform, cam1, cam2); + cudaSafeCall( cudaGetLastError() ); +} + +__device__ static uchar4 HSVtoRGB(int H, float S, float V) { + const float C = S * V; + const float X = C * (1 - fabs(fmodf(H / 60.0f, 2) - 1)); + const float m = V - C; + float Rs, Gs, Bs; + + if(H >= 0 && H < 60) { + Rs = C; + Gs = X; + Bs = 0; + } + else if(H >= 60 && H < 120) { + Rs = X; + Gs = C; + Bs = 0; + } + else if(H >= 120 && H < 180) { + Rs = 0; + Gs = C; + Bs = X; + } + else if(H >= 180 && H < 240) { + Rs = 0; + Gs = X; + Bs = C; + } + else if(H >= 240 && H < 300) { + Rs = X; + Gs = 0; + Bs = C; + } + else { + Rs = C; + Gs = 0; + Bs = X; + } + + return make_uchar4((Bs + m) * 255, (Gs + m) * 255, (Rs + m) * 255, 0); +} + +/** + * Render each pixel is a colour corresponding to the source camera with the + * best support window. + */ + __global__ void vis_best_sources_kernel( + TextureObject<short2> screen, + TextureObject<uchar4> colour, + int myid, + int count) { + + const int x = (blockIdx.x*blockDim.x + threadIdx.x); + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x >= 0 && y >= 0 && x < colour.width() && y < colour.height()) { + short2 s = screen.tex2D(x,y); + int id = unpackCameraID(s); + + uchar4 c = HSVtoRGB((360 / count) * id, 0.6f, 0.85f); + if (myid != id) colour(x,y) = c; + //colour(x,y) = c; + } +} + +void ftl::cuda::vis_best_sources( + ftl::cuda::TextureObject<short2> &screen, + ftl::cuda::TextureObject<uchar4> &colour, + int myid, + int count, + cudaStream_t stream) { + + const dim3 gridSize((colour.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + vis_best_sources_kernel<<<gridSize, blockSize, 0, stream>>>(screen, colour, myid, count); + cudaSafeCall( cudaGetLastError() ); +} + +/*void ftl::cuda::aggregate_sources( + ftl::cuda::TextureObject<float4> &n1, + ftl::cuda::TextureObject<float4> &n2, + ftl::cuda::TextureObject<float4> &c1, + ftl::cuda::TextureObject<float4> &c2, + ftl::cuda::TextureObject<float> &contribs1, + ftl::cuda::TextureObject<float> &contribs2, + ftl::cuda::TextureObject<short2> &screen, + const float4x4 &poseInv1, + const float4x4 &pose2, + cudaStream_t stream) { + + const dim3 gridSize((screen.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (screen.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + aggregate_sources_kernel<<<gridSize, blockSize, 0, stream>>>(n1, n2, c1, c2, contribs1, contribs2, screen, poseInv1, pose2); + cudaSafeCall( cudaGetLastError() ); +}*/ + +// ==== Normalise aggregations ================================================= + +__global__ void normalise_aggregations_kernel( + TextureObject<float4> norms, + TextureObject<float4> cents, + TextureObject<float> contribs) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < norms.width() && y < norms.height()) { + const float contrib = contribs.tex2D((int)x,(int)y); + + const auto a = norms.tex2D((int)x,(int)y); + const auto b = cents.tex2D(x,y); + //const float4 normal = normals.tex2D((int)x,(int)y); + + //out(x,y) = (contrib == 0.0f) ? make<B>(a) : make<B>(a / contrib); + + if (contrib > 0.0f) { + norms(x,y) = a / (contrib+1.0f); + cents(x,y) = b / (contrib+1.0f); + } + } +} + +void ftl::cuda::normalise_aggregations(TextureObject<float4> &norms, TextureObject<float4> ¢s, TextureObject<float> &contribs, cudaStream_t stream) { + const dim3 gridSize((norms.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (norms.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + normalise_aggregations_kernel<<<gridSize, blockSize, 0, stream>>>(norms, cents, contribs); + cudaSafeCall( cudaGetLastError() ); +} + diff --git a/components/operators/src/mvmls.cpp b/components/operators/src/mvmls.cpp new file mode 100644 index 000000000..4c02a7607 --- /dev/null +++ b/components/operators/src/mvmls.cpp @@ -0,0 +1,340 @@ +#include <ftl/operators/mvmls.hpp> +#include "smoothing_cuda.hpp" +#include <ftl/utility/matrix_conversion.hpp> +#include "mvmls_cuda.hpp" + +using ftl::operators::MultiViewMLS; +using ftl::codecs::Channel; +using cv::cuda::GpuMat; +using ftl::rgbd::Format; + +MultiViewMLS::MultiViewMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +MultiViewMLS::~MultiViewMLS() { + +} + +bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) { + cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + + float thresh = config()->value("mls_threshold", 0.4f); + float col_smooth = config()->value("mls_colour_smoothing", 30.0f); + int iters = config()->value("mls_iterations", 3); + int radius = config()->value("mls_radius",5); + //bool aggre = config()->value("aggregation", true); + int win = config()->value("cost_function",1); + bool do_corr = config()->value("merge_corresponding", true); + bool do_aggr = config()->value("merge_mls", false); + bool cull_zero = config()->value("cull_no_confidence", false); + //bool show_best_source = config()->value("show_pixel_source", false); + + ftl::cuda::MvMLSParams params; + params.range = config()->value("search_range", 0.05f); + params.fill_match = config()->value("fill_match", 50.0f); + params.fill_threshold = config()->value("fill_threshold", 0.0f); + params.match_threshold = config()->value("match_threshold", 0.3f); + params.colour_smooth = config()->value("colour_smooth", 150.0f); + params.spatial_smooth = config()->value("spatial_smooth", 0.04f); + params.cost_ratio = config()->value("cost_ratio", 0.2f); + params.cost_threshold = config()->value("cost_threshold", 1.0f); + + // Make sure we have enough buffers + if (normals_horiz_.size() < in.frames.size()) { + normals_horiz_.resize(in.frames.size()); + centroid_horiz_.resize(in.frames.size()); + centroid_vert_.resize(in.frames.size()); + contributions_.resize(in.frames.size()); + } + + // Make sure all buffers are at correct resolution and are allocated + for (size_t i=0; i<in.frames.size(); ++i) { + auto &f = in.frames[i]; + auto size = f.get<GpuMat>(Channel::Depth).size(); + centroid_horiz_[i].create(size.height, size.width); + normals_horiz_[i].create(size.height, size.width); + centroid_vert_[i].create(size.width, size.height); + contributions_[i].create(size.width, size.height); + + if (!f.hasChannel(Channel::Normals)) { + LOG(ERROR) << "Required normals channel missing for MLS"; + return false; + } + if (!f.hasChannel(Channel::Support1)) { + LOG(ERROR) << "Required cross support channel missing for MLS"; + return false; + } + + // Create required channels + f.create<GpuMat>(Channel::Confidence, Format<float>(size)); + f.createTexture<float>(Channel::Confidence); + f.create<GpuMat>(Channel::Screen, Format<short2>(size)); + f.createTexture<short2>(Channel::Screen); + } + + for (int iter=0; iter<iters; ++iter) { + // Step 1: + // Calculate correspondences and adjust depth values + // Step 2: + // Find corresponding points and perform aggregation of any correspondences + // For each camera combination + if (do_corr) { + for (size_t i=0; i<in.frames.size(); ++i) { + auto &f1 = in.frames[i]; + //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream); + f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); + + Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0); + d1 = in.sources[i]->getPose() * d1; + + for (size_t j=0; j<in.frames.size(); ++j) { + if (i == j) continue; + + //LOG(INFO) << "Running phase1"; + + auto &f2 = in.frames[j]; + auto s1 = in.sources[i]; + auto s2 = in.sources[j]; + + // Are cameras facing similar enough direction? + Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0); + d2 = in.sources[j]->getPose() * d2; + // No, so skip this combination + if (d1.dot(d2) <= 0.0) continue; + + auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>()); + auto pose1_inv = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse()); + auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse()); + auto pose2_inv = MatrixConversion::toCUDA(s2->getPose().cast<float>()); + + auto transform = pose2 * pose1; + + //Calculate screen positions of estimated corresponding points + ftl::cuda::correspondence( + f1.getTexture<float>(Channel::Depth), + f2.getTexture<float>(Channel::Depth), + f1.getTexture<uchar4>(Channel::Colour), + f2.getTexture<uchar4>(Channel::Colour), + // TODO: Add normals and other things... + f1.getTexture<short2>(Channel::Screen), + f1.getTexture<float>(Channel::Confidence), + f1.getTexture<int>(Channel::Mask), + pose1, + pose1_inv, + pose2, + s1->parameters(), + s2->parameters(), + params, + win, + stream + ); + + // Also calculate best source for each point + /*ftl::cuda::best_sources( + f1.getTexture<uchar4>(Channel::Support1), + f2.getTexture<uchar4>(Channel::Support1), + f1.getTexture<float>(Channel::Depth), + f2.getTexture<float>(Channel::Depth), + f1.getTexture<short2>(Channel::Screen), + transform, + s1->parameters(), + s2->parameters(), + i, j, stream + );*/ + } + } + } + + // Find best source for every pixel + /*for (size_t i=0; i<in.frames.size(); ++i) { + auto &f1 = in.frames[i]; + //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream); + //f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); + + Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0); + d1 = in.sources[i]->getPose() * d1; + + for (size_t j=0; j<in.frames.size(); ++j) { + if (i == j) continue; + + //LOG(INFO) << "Running phase1"; + + auto &f2 = in.frames[j]; + auto s1 = in.sources[i]; + auto s2 = in.sources[j]; + + // Are cameras facing similar enough direction? + Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0); + d2 = in.sources[j]->getPose() * d2; + // No, so skip this combination + if (d1.dot(d2) <= 0.0) continue; + + auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>()); + auto pose1_inv = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse()); + auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse()); + auto pose2_inv = MatrixConversion::toCUDA(s2->getPose().cast<float>()); + + auto transform = pose2 * pose1; + + // Also calculate best source for each point + ftl::cuda::best_sources( + f1.getTexture<float4>(Channel::Normals), + f2.getTexture<float4>(Channel::Normals), + f1.getTexture<uchar4>(Channel::Support1), + f2.getTexture<uchar4>(Channel::Support1), + f1.getTexture<float>(Channel::Depth), + f2.getTexture<float>(Channel::Depth), + f1.getTexture<short2>(Channel::Screen), + transform, + s1->parameters(), + s2->parameters(), + i, j, stream + ); + } + }*/ + + // Step 2: + // Do the horizontal and vertical MLS aggregations for each source + // But don't do the final move step. + for (size_t i=0; i<in.frames.size(); ++i) { + auto &f = in.frames[i]; + auto *s = in.sources[i]; + + // Clear data + cv::cuda::GpuMat data(contributions_[i].height(), contributions_[i].width(), CV_32F, contributions_[i].pixelPitch()); + data.setTo(cv::Scalar(0.0f), cvstream); + + if (cull_zero && iter == iters-1) { + ftl::cuda::zero_confidence( + f.getTexture<float>(Channel::Confidence), + f.getTexture<float>(Channel::Depth), + stream + ); + } + + ftl::cuda::mls_aggr_horiz( + f.createTexture<uchar4>(Channel::Support1), + f.createTexture<float4>(Channel::Normals), + normals_horiz_[i], + f.createTexture<float>(Channel::Depth), + centroid_horiz_[i], + f.createTexture<uchar4>(Channel::Colour), + thresh, + col_smooth, + radius, + s->parameters(), + stream + ); + + ftl::cuda::mls_aggr_vert( + f.getTexture<uchar4>(Channel::Support1), + normals_horiz_[i], + f.getTexture<float4>(Channel::Normals), + centroid_horiz_[i], + centroid_vert_[i], + f.getTexture<uchar4>(Channel::Colour), + f.getTexture<float>(Channel::Depth), + thresh, + col_smooth, + radius, + s->parameters(), + stream + ); + } + + // Step 3: + // Find corresponding points and perform aggregation of any correspondences + // For each camera combination + if (do_aggr) { + for (size_t i=0; i<in.frames.size(); ++i) { + auto &f1 = in.frames[i]; + //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream); + f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); + + Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0); + d1 = in.sources[i]->getPose() * d1; + + for (size_t j=0; j<in.frames.size(); ++j) { + if (i == j) continue; + + //LOG(INFO) << "Running phase1"; + + auto &f2 = in.frames[j]; + auto s1 = in.sources[i]; + auto s2 = in.sources[j]; + + // Are cameras facing similar enough direction? + Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0); + d2 = in.sources[j]->getPose() * d2; + // No, so skip this combination + if (d1.dot(d2) <= 0.0) continue; + + auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>()); + auto pose1_inv = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse()); + auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse()); + auto pose2_inv = MatrixConversion::toCUDA(s2->getPose().cast<float>()); + + auto transform = pose2 * pose1; + + // For the corresponding points, combine normals and centroids + ftl::cuda::aggregate_sources( + f1.getTexture<float4>(Channel::Normals), + f2.getTexture<float4>(Channel::Normals), + centroid_vert_[i], + centroid_vert_[j], + f1.getTexture<float>(Channel::Depth), + //contributions_[i], + //contributions_[j], + //f1.getTexture<short2>(Channel::Screen), + transform, + s1->parameters(), + s2->parameters(), + stream + ); + + //LOG(INFO) << "Correspondences done... " << i; + } + } + } + + // Step 3: + // Normalise aggregations and move the points + for (size_t i=0; i<in.frames.size(); ++i) { + auto &f = in.frames[i]; + auto *s = in.sources[i]; + auto size = f.get<GpuMat>(Channel::Depth).size(); + + /*if (do_corr) { + ftl::cuda::normalise_aggregations( + f.getTexture<float4>(Channel::Normals), + centroid_vert_[i], + contributions_[i], + stream + ); + }*/ + + ftl::cuda::mls_adjust_depth( + f.getTexture<float4>(Channel::Normals), + centroid_vert_[i], + f.getTexture<float>(Channel::Depth), + f.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(size)), + s->parameters(), + stream + ); + + f.swapChannels(Channel::Depth, Channel::Depth2); + + /*if (show_best_source) { + ftl::cuda::vis_best_sources( + f.getTexture<short2>(Channel::Screen), + f.getTexture<uchar4>(Channel::Colour), + i, + 7, stream + ); + }*/ + } + } + + return true; +} diff --git a/components/operators/src/mvmls_cuda.hpp b/components/operators/src/mvmls_cuda.hpp new file mode 100644 index 000000000..93b1e8d88 --- /dev/null +++ b/components/operators/src/mvmls_cuda.hpp @@ -0,0 +1,101 @@ +#ifndef _FTL_CUDA_MVMLS_HPP_ +#define _FTL_CUDA_MVMLS_HPP_ + +#include <ftl/rgbd/camera.hpp> +#include <ftl/cuda_common.hpp> +#include <ftl/cuda_matrix_util.hpp> + +namespace ftl { +namespace cuda { + +struct MvMLSParams { + float spatial_smooth; + float colour_smooth; + float fill_match; + float fill_threshold; + float match_threshold; + float cost_ratio; + float cost_threshold; + float range; + uint flags; +}; + +void correspondence( + ftl::cuda::TextureObject<float> &d1, + ftl::cuda::TextureObject<float> &d2, + ftl::cuda::TextureObject<uchar4> &c1, + ftl::cuda::TextureObject<uchar4> &c2, + ftl::cuda::TextureObject<short2> &screen, + ftl::cuda::TextureObject<float> &conf, + ftl::cuda::TextureObject<int> &mask, + float4x4 &pose1, + float4x4 &pose1_inv, + float4x4 &pose2, + const ftl::rgbd::Camera &cam1, + const ftl::rgbd::Camera &cam2, const ftl::cuda::MvMLSParams ¶ms, int func, + cudaStream_t stream); + +void zero_confidence( + ftl::cuda::TextureObject<float> &conf, + ftl::cuda::TextureObject<float> &depth, + cudaStream_t stream); + +/*void aggregate_sources( + ftl::cuda::TextureObject<float4> &n1, + ftl::cuda::TextureObject<float4> &n2, + ftl::cuda::TextureObject<float4> &c1, + ftl::cuda::TextureObject<float4> &c2, + ftl::cuda::TextureObject<float> &contribs1, + ftl::cuda::TextureObject<float> &contribs2, + ftl::cuda::TextureObject<short2> &screen, + //const float4x4 &pose1, + //const float4x4 &poseInv2, + const float4x4 &poseInv1, + const float4x4 &pose2, + cudaStream_t stream);*/ + +void aggregate_sources( + ftl::cuda::TextureObject<float4> &n1, + ftl::cuda::TextureObject<float4> &n2, + ftl::cuda::TextureObject<float4> &c1, + ftl::cuda::TextureObject<float4> &c2, + ftl::cuda::TextureObject<float> &depth1, + //ftl::cuda::TextureObject<float> &depth2, + //ftl::cuda::TextureObject<short2> &screen, + const float4x4 &transform, + const ftl::rgbd::Camera &cam1, + const ftl::rgbd::Camera &cam2, + cudaStream_t stream); + +void best_sources( + ftl::cuda::TextureObject<float4> &normals1, + ftl::cuda::TextureObject<float4> &normals2, + ftl::cuda::TextureObject<uchar4> &support1, + ftl::cuda::TextureObject<uchar4> &suppor2, + ftl::cuda::TextureObject<float> &depth1, + ftl::cuda::TextureObject<float> &depth2, + ftl::cuda::TextureObject<short2> &screen, + const float4x4 &transform, + const ftl::rgbd::Camera &cam1, + const ftl::rgbd::Camera &cam2, + int id1, + int id2, + cudaStream_t stream); + +void vis_best_sources( + ftl::cuda::TextureObject<short2> &screen, + ftl::cuda::TextureObject<uchar4> &colour, + int myid, + int count, + cudaStream_t stream); + +void normalise_aggregations( + ftl::cuda::TextureObject<float4> &norms, + ftl::cuda::TextureObject<float4> ¢s, + ftl::cuda::TextureObject<float> &contribs, + cudaStream_t stream); + +} +} + +#endif diff --git a/components/renderers/cpp/src/reprojection.cu b/components/renderers/cpp/src/reprojection.cu index dc636f359..e6490c47e 100644 --- a/components/renderers/cpp/src/reprojection.cu +++ b/components/renderers/cpp/src/reprojection.cu @@ -66,7 +66,7 @@ __global__ void reprojection_kernel( TextureObject<B> out, // Accumulated output TextureObject<float> contrib, SplatParams params, - Camera camera, float4x4 poseInv) { + Camera camera, float4x4 transform, float3x3 transformR) { const int x = (blockIdx.x*blockDim.x + threadIdx.x); const int y = blockIdx.y*blockDim.y + threadIdx.y; @@ -74,10 +74,10 @@ __global__ void reprojection_kernel( const float d = depth_in.tex2D((int)x, (int)y); if (d < params.camera.minDepth || d > params.camera.maxDepth) return; - const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d); + //const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d); //if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return; - const float3 camPos = poseInv * worldPos; + const float3 camPos = transform * params.camera.screenToCam(x, y, d); if (camPos.z < camera.minDepth) return; if (camPos.z > camera.maxDepth) return; const float2 screenPos = camera.camToScreen<float2>(camPos); @@ -86,7 +86,7 @@ __global__ void reprojection_kernel( if (screenPos.x >= depth_src.width() || screenPos.y >= depth_src.height()) return; // Calculate the dot product of surface normal and camera ray - const float3 n = poseInv.getFloat3x3() * make_float3(normals.tex2D((int)x, (int)y)); + const float3 n = transformR * make_float3(normals.tex2D((int)x, (int)y)); float3 ray = camera.screenToCam(screenPos.x, screenPos.y, 1.0f); ray = ray / length(ray); const float dotproduct = max(dot(ray,n),0.0f); @@ -123,7 +123,7 @@ void ftl::cuda::reproject( TextureObject<B> &out, // Accumulated output TextureObject<float> &contrib, const SplatParams ¶ms, - const Camera &camera, const float4x4 &poseInv, cudaStream_t stream) { + const Camera &camera, const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream) { const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); @@ -136,7 +136,8 @@ void ftl::cuda::reproject( contrib, params, camera, - poseInv + transform, + transformR ); cudaSafeCall( cudaGetLastError() ); } @@ -150,7 +151,7 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &contrib, const ftl::render::SplatParams ¶ms, const ftl::rgbd::Camera &camera, - const float4x4 &poseInv, cudaStream_t stream); + const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream); template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &in, // Original colour image @@ -161,7 +162,7 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &contrib, const ftl::render::SplatParams ¶ms, const ftl::rgbd::Camera &camera, - const float4x4 &poseInv, cudaStream_t stream); + const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream); template void ftl::cuda::reproject( ftl::cuda::TextureObject<float4> &in, // Original colour image @@ -172,7 +173,7 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &contrib, const ftl::render::SplatParams ¶ms, const ftl::rgbd::Camera &camera, - const float4x4 &poseInv, cudaStream_t stream); + const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream); //============================================================================== // Without normals @@ -197,10 +198,10 @@ __global__ void reprojection_kernel( const float d = depth_in.tex2D((int)x, (int)y); if (d < params.camera.minDepth || d > params.camera.maxDepth) return; - const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d); + //const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d); //if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return; - const float3 camPos = poseInv * worldPos; + const float3 camPos = poseInv * params.camera.screenToCam(x, y, d); if (camPos.z < camera.minDepth) return; if (camPos.z > camera.maxDepth) return; const float2 screenPos = camera.camToScreen<float2>(camPos); diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp index 3918a0eab..c81977cf3 100644 --- a/components/renderers/cpp/src/splatter_cuda.hpp +++ b/components/renderers/cpp/src/splatter_cuda.hpp @@ -80,7 +80,7 @@ namespace cuda { ftl::cuda::TextureObject<float> &contrib, const ftl::render::SplatParams ¶ms, const ftl::rgbd::Camera &camera, - const float4x4 &poseInv, cudaStream_t stream); + const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream); template <typename A, typename B> void reproject( diff --git a/components/renderers/cpp/src/tri_render.cpp b/components/renderers/cpp/src/tri_render.cpp index d42d5a0e2..5912b6480 100644 --- a/components/renderers/cpp/src/tri_render.cpp +++ b/components/renderers/cpp/src/tri_render.cpp @@ -228,7 +228,8 @@ void Triangular::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Chann cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA); } - auto poseInv = MatrixConversion::toCUDA(s->getPose().cast<float>().inverse()); + auto transform = MatrixConversion::toCUDA(s->getPose().cast<float>().inverse()) * params_.m_viewMatrixInverse; + auto transformR = MatrixConversion::toCUDA(s->getPose().cast<float>().inverse()).getFloat3x3(); if (mesh_) { ftl::cuda::reproject( @@ -240,7 +241,7 @@ void Triangular::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Chann temp_.getTexture<float>(Channel::Contribution), params_, s->parameters(), - poseInv, stream + transform, transformR, stream ); } else { // Can't use normals with point cloud version @@ -252,7 +253,7 @@ void Triangular::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Chann temp_.getTexture<float>(Channel::Contribution), params_, s->parameters(), - poseInv, stream + transform, stream ); } } @@ -580,19 +581,19 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { if (aligned_source >= 0 && aligned_source < scene_->frames.size()) { // FIXME: Output may not be same resolution as source! cudaSafeCall(cudaStreamSynchronize(stream_)); - scene_->frames[aligned_source].copyTo(Channel::Depth + Channel::Colour + Channel::Smoothing, out); + scene_->frames[aligned_source].copyTo(Channel::Depth + Channel::Colour + Channel::Smoothing + Channel::Confidence, out); - if (chan == Channel::Normals) { + if (chan == Channel::ColourNormals) { // Convert normal to single float value - temp_.create<GpuMat>(Channel::Colour, Format<uchar4>(out.get<GpuMat>(Channel::Colour).size())).setTo(cv::Scalar(0,0,0,0), cvstream); - ftl::cuda::normal_visualise(scene_->frames[aligned_source].getTexture<float4>(Channel::Normals), temp_.createTexture<uchar4>(Channel::Colour), + out.create<GpuMat>(Channel::ColourNormals, Format<uchar4>(out.get<GpuMat>(Channel::Colour).size())).setTo(cv::Scalar(0,0,0,0), cvstream); + ftl::cuda::normal_visualise(scene_->frames[aligned_source].getTexture<float4>(Channel::Normals), out.createTexture<uchar4>(Channel::ColourNormals), light_pos_, light_diffuse_, light_ambient_, stream_); // Put in output as single float - cv::cuda::swap(temp_.get<GpuMat>(Channel::Colour), out.create<GpuMat>(Channel::Normals)); - out.resetTexture(Channel::Normals); + //cv::cuda::swap(temp_.get<GpuMat>(Channel::Colour), out.create<GpuMat>(Channel::Normals)); + //out.resetTexture(Channel::Normals); } return true; diff --git a/components/rgbd-sources/include/ftl/rgbd/frame.hpp b/components/rgbd-sources/include/ftl/rgbd/frame.hpp index d7b8292ff..e7a949600 100644 --- a/components/rgbd-sources/include/ftl/rgbd/frame.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/frame.hpp @@ -233,7 +233,7 @@ ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c, const template <typename T> ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c, bool interpolated) { - if (!channels_.has(c)) throw ftl::exception("createTexture needs a format if the channel does not exist"); + if (!channels_.has(c)) throw ftl::exception(ftl::Formatter() << "createTexture needs a format if the channel does not exist: " << (int)c); auto &m = _get(c); -- GitLab