diff --git a/applications/gui/src/camera.cpp b/applications/gui/src/camera.cpp index ca11bf526ef0e6e141861a79a15e2bf656106a5c..961728d88fb7101ee2b0e99ae8cbde89dd11f9ba 100644 --- a/applications/gui/src/camera.cpp +++ b/applications/gui/src/camera.cpp @@ -540,7 +540,7 @@ void ftl::gui::Camera::keyMovement(int key, int modifiers) { float scalar = (key == 266) ? -mag : mag; neye_ += rotmat_*Eigen::Vector4d(0.0,scalar,0.0,1.0); return; - } else if (key >= '0' && key <= '5') { + } else if (key >= '0' && key <= '5' && modifiers == 2) { // Ctrl+NUMBER int ix = key - (int)('0'); transform_ix_ = ix-1; return; diff --git a/applications/gui/src/src_window.cpp b/applications/gui/src/src_window.cpp index f86a36d58c0b4acd1a10180561760a832c382b7b..556d1f794419547b41ee18464ff0213a967bdb3a 100644 --- a/applications/gui/src/src_window.cpp +++ b/applications/gui/src/src_window.cpp @@ -30,6 +30,7 @@ #include <ftl/operators/disparity.hpp> #include <ftl/operators/detectandtrack.hpp> #include <ftl/operators/weighting.hpp> +#include <ftl/operators/mvmls.hpp> #include <nlohmann/json.hpp> @@ -224,9 +225,10 @@ void SourceWindow::_checkFrameSets(int id) { p->append<ftl::operators::CrossSupport>("cross"); p->append<ftl::operators::PixelWeights>("weights"); p->append<ftl::operators::CullWeight>("remove_weights")->value("enabled", false); - p->append<ftl::operators::CullDiscontinuity>("remove_discontinuity"); p->append<ftl::operators::DegradeWeight>("degrade"); p->append<ftl::operators::VisCrossSupport>("viscross")->set("enabled", false); + p->append<ftl::operators::MultiViewMLS>("mvmls")->value("enabled", false); + p->append<ftl::operators::CullDiscontinuity>("remove_discontinuity"); pre_pipelines_.push_back(p); framesets_.push_back(new ftl::rgbd::FrameSet); diff --git a/components/common/cpp/include/ftl/cuda_texture.hpp b/components/common/cpp/include/ftl/cuda_texture.hpp index e103b049750b0a39087fcd4dbd21100ddb7815b5..fa6c41866d20257ccd25faf62e3818cdeb3ce1c1 100644 --- a/components/common/cpp/include/ftl/cuda_texture.hpp +++ b/components/common/cpp/include/ftl/cuda_texture.hpp @@ -61,6 +61,8 @@ class TextureObjectBase { __host__ void free(); inline int cvType() const { return cvType_; } + + __host__ __device__ inline bool isValid() const { return ptr_ != nullptr; } protected: cudaTextureObject_t texobj_; diff --git a/components/operators/CMakeLists.txt b/components/operators/CMakeLists.txt index a5e9ea7790c40538f55d3911e4fd1a67193736b6..c2e7b791df51bdf57bedf01e9fdeb312461cdaee 100644 --- a/components/operators/CMakeLists.txt +++ b/components/operators/CMakeLists.txt @@ -19,8 +19,11 @@ set(OPERSRC src/mask.cpp src/antialiasing.cpp src/antialiasing.cu - src/mvmls.cpp - src/correspondence.cu + src/fusion/mvmls.cpp + src/fusion/correspondence.cu + src/fusion/correspondence_depth.cu + src/fusion/correspondence_util.cu + src/fusion/mls_aggr.cu src/clipping.cpp src/depth.cpp src/detectandtrack.cpp diff --git a/components/operators/include/ftl/operators/mask_cuda.hpp b/components/operators/include/ftl/operators/mask_cuda.hpp index 53747d61fd82c96ec1eafdb4f51d3ad322db1222..f780d5178d80a0be6691648e618d48e4ce2031e5 100644 --- a/components/operators/include/ftl/operators/mask_cuda.hpp +++ b/components/operators/include/ftl/operators/mask_cuda.hpp @@ -73,6 +73,7 @@ void cull_mask( ftl::cuda::TextureObject<ftl::cuda::Mask::type> &mask, ftl::cuda::TextureObject<float> &depth, ftl::cuda::Mask::type id, + bool invert, unsigned int radius, cudaStream_t stream); diff --git a/components/operators/src/correspondence.cu b/components/operators/src/correspondence.cu deleted file mode 100644 index 0b5ea46075627051b86ac25e1617258be22335a9..0000000000000000000000000000000000000000 --- a/components/operators/src/correspondence.cu +++ /dev/null @@ -1,641 +0,0 @@ -#include "mvmls_cuda.hpp" -#include <ftl/cuda/weighting.hpp> -#include <ftl/operators/mask_cuda.hpp> -#include <ftl/cuda/warp.hpp> - -using ftl::cuda::TextureObject; -using ftl::rgbd::Camera; -using ftl::cuda::Mask; -using ftl::cuda::MvMLSParams; - -#define T_PER_BLOCK 8 -#define WARP_SIZE 32 -#define INTERVAL 1 - -template<int FUNCTION> -__device__ float weightFunction(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight); - -template <> -__device__ inline float weightFunction<0>(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight) { - return (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight); -} - -template <> -__device__ inline float weightFunction<1>(const ftl::cuda::MvMLSParams ¶m, float dweight, float cweight) { - return (cweight * cweight * dweight); -} - -template <> -__device__ inline float weightFunction<2>(const ftl::cuda::MvMLSParams ¶m, float dweight, float cweight) { - return (dweight * dweight * cweight); -} - -template <> -__device__ inline float weightFunction<3>(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight) { - return (dweight == 0.0f) ? 0.0f : (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight); -} - -template <> -__device__ inline float weightFunction<4>(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight) { - return cweight; -} - -template <> -__device__ inline float weightFunction<5>(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight) { - return (cweight > 0.0f) ? dweight : 0.0f; -} - -#ifndef PINF -#define PINF __int_as_float(0x7f800000) -#endif - -__device__ inline float distance(float4 p1, float4 p2) { - return min(1.0f, max(max(fabsf(p1.x - p2.x),fabsf(p1.y - p2.y)), fabsf(p1.z - p2.z))/ 10.0f); - //return min(1.0f, ftl::cuda::colourDistance(p1, p2) / 10.0f); -} - -__device__ inline int halfWarpCensus(float e) { - float e0 = __shfl_sync(FULL_MASK, e, (threadIdx.x >= 16) ? 16 : 0, WARP_SIZE); - int c = (e > e0) ? 1 << (threadIdx.x % 16) : 0; - for (int i = WARP_SIZE/4; i > 0; i /= 2) { - const int other = __shfl_xor_sync(FULL_MASK, c, i, WARP_SIZE); - c |= other; - } - return c; -} - -__device__ inline float halfWarpBest(float e, float c) { - for (int i = WARP_SIZE/4; i > 0; i /= 2) { - const float o1 = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); - const float o2 = __shfl_xor_sync(FULL_MASK, c, i, WARP_SIZE); - e = (o2 > c) ? o1 : e; - } - return e; -} - -__device__ inline float4 relativeDelta(const float4 &e) { - const float e0x = __shfl_sync(FULL_MASK, e.x, 0, WARP_SIZE/2); - const float e0y = __shfl_sync(FULL_MASK, e.y, 0, WARP_SIZE/2); - const float e0z = __shfl_sync(FULL_MASK, e.z, 0, WARP_SIZE/2); - return make_float4(e.x-e0x, e.y-e0y, e.z-e0z, 0.0f); -} - -/** - * See: Birchfield S. et al. (1998). A pixel dissimilarity measure that is - * insensitive to image sampling. IEEE Transactions on Pattern Analysis and - * Machine Intelligence. - */ -__device__ float dissimilarity(const float4 &l, const float4 &rp, const float4 &rc, const float4 &rn) { - const float rpd = distance((rc - rp) * 0.5f + rp, l); - const float rnd = distance((rc - rn) * 0.5f + rn, l); - const float rcd = distance(rc, l); - return min(min(rpd, rnd), rcd); -} - - -template<int COR_STEPS, int FUNCTION> -__global__ void corresponding_point_kernel( - TextureObject<float> d1, - TextureObject<float> d2, - TextureObject<uchar4> c1, - TextureObject<uchar4> c2, - TextureObject<short2> screenOut, - TextureObject<float> conf, - TextureObject<uint8_t> mask, - float4x4 pose, - Camera cam1, - Camera cam2, ftl::cuda::MvMLSParams params) { - - //const int tid = (threadIdx.x + threadIdx.y * blockDim.x); - const int x = (blockIdx.x*8 + (threadIdx.x%4) + 4*(threadIdx.x / 16)); // / WARP_SIZE; - const int y = blockIdx.y*8 + threadIdx.x/4 + 4*threadIdx.y; - - - if (x >= 0 && y >=0 && x < screenOut.width() && y < screenOut.height()) { - screenOut(x,y) = make_short2(-1,-1); - - //const float3 world1 = make_float3(p1.tex2D(x, y)); - const float depth1 = d1.tex2D(x,y); //(pose1_inv * world1).z; // Initial starting depth - if (depth1 < cam1.minDepth || depth1 > cam1.maxDepth) return; - - // TODO: Temporary hack to ensure depth1 is present - //const float4 temp = vout.tex2D(x,y); - //vout(x,y) = make_float4(depth1, 0.0f, temp.z, temp.w); - - //const float3 world1 = pose1 * cam1.screenToCam(x,y,depth1); - - const auto colour1 = c1.tex2D((float)x+0.5f, (float)y+0.5f); - - //float bestdepth = 0.0f; - short2 bestScreen = make_short2(-1,-1); - //float bestdepth = 0.0f; - //float bestdepth2 = 0.0f; - float bestweight = 0.0f; - float bestcolour = 0.0f; - //float bestdweight = 0.0f; - float totalcolour = 0.0f; - //int count = 0; - //float contrib = 0.0f; - - const float3 camPosOrigin = pose * cam1.screenToCam(x,y,depth1); - const float2 lineOrigin = cam2.camToScreen<float2>(camPosOrigin); - const float3 camPosDistant = pose * cam1.screenToCam(x,y,depth1 + 10.0f); - const float2 lineDistant = cam2.camToScreen<float2>(camPosDistant); - const float lineM = (lineDistant.y - lineOrigin.y) / (lineDistant.x - lineOrigin.x); - const float depthM = 10.0f / (lineDistant.x - lineOrigin.x); - const float depthM2 = (camPosDistant.z - camPosOrigin.z) / (lineDistant.x - lineOrigin.x); - float2 linePos; - linePos.x = lineOrigin.x - ((COR_STEPS/2)); - linePos.y = lineOrigin.y - (((COR_STEPS/2)) * lineM); - //float depthPos = depth1 - (float((COR_STEPS/2)) * depthM); - float depthPos2 = camPosOrigin.z - (float((COR_STEPS/2)) * depthM2); - //const float depthCoef = cam1.baseline*cam1.fx; - - const float depthCoef = (1.0f / cam1.fx) * 10.0f; - - uint badMask = 0; - int bestStep = COR_STEPS/2; - - - // Project to p2 using cam2 - // Each thread takes a possible correspondence and calculates a weighting - //const int lane = tid % WARP_SIZE; - for (int i=0; i<COR_STEPS; ++i) { - //float weight = 1.0f; //(linePos.x >= cam2.width || linePos.y >= cam2.height) ? 0.0f : 1.0f; - - // Generate a colour correspondence value - const auto colour2 = c2.tex2D(linePos.x, linePos.y); - - // TODO: Check if other colour dissimilarities are better... - const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth); - - // Generate a depth correspondence value - const float depth2 = d2.tex2D(int(linePos.x+0.5f), int(linePos.y+0.5f)); - - // Record which correspondences are invalid - badMask |= ( - depth2 <= cam2.minDepth || - depth2 >= cam2.maxDepth || - linePos.x < 0.5f || - linePos.y < 0.5f || - linePos.x >= d2.width()-0.5f || - linePos.y >= d2.height()-0.5f - ) ? 1 << i : 0; - - //if (FUNCTION == 1) { - // TODO: Spatial smooth must be disparity discon threshold of largest depth in original camera space. - // ie. if depth1 > depth2 then depth1 has the largest error potential and the resolution at that - // depth is used to determine the spatial smoothing amount. - - const float maxdepth = max(depth1, depth2); - const float smooth = depthCoef * maxdepth; - float weight = ftl::cuda::weighting(fabs(depth2 - depthPos2), cweight*smooth); - //weight = ftl::cuda::halfWarpSum(weight); - //} else { - // const float dweight = ftl::cuda::weighting(fabs(depth2 - depthPos2), params.spatial_smooth); - // weight *= weightFunction<FUNCTION>(params, dweight, cweight); - //} - //const float dweight = ftl::cuda::weighting(fabs(depth_adjust), 10.0f*params.range); - - //weight *= weightFunction<FUNCTION>(params, dweight, cweight); - - //++count; - - bestcolour = max(cweight, bestcolour); - totalcolour += cweight; - - //bestdepth = (weight > bestweight) ? depthPos : bestdepth; - bestStep = (weight > bestweight) ? i : bestStep; - - bestweight = max(bestweight, weight); - - - //depthPos += depthM; - depthPos2 += depthM2; - linePos.x += 1.0f; - linePos.y += lineM; - } - - //const float avgcolour = totalcolour/(float)count; - const float confidence = ((bestcolour / totalcolour) - (1.0f / 16.0f)) * (1.0f + (1.0f/16.0f)); - float bestadjust = float(bestStep-(COR_STEPS/2))*depthM; - - // Detect matches to boundaries, and discard those - uint stepMask = 1 << bestStep; - if ((stepMask & badMask) || (stepMask & (badMask << 1)) || (stepMask & (badMask >> 1))) bestweight = 0.0f; - - //bestadjust = halfWarpBest(bestadjust, (bestweight > 0.0f) ? confidence : 0.0f); - - //Mask m(mask.tex2D(x,y)); - - //if (bestweight > 0.0f) { - float old = conf.tex2D(x,y); - - if (bestweight > 0.0f) { - d1(x,y) = (0.4f*bestadjust) + depth1; - //d2(bestScreen.x, bestScreen.y) = bestdepth2; - //screenOut(x,y) = bestScreen; - conf(x,y) = max(old,confidence); //bestweight * confidence; - //conf(x,y) = max(old,fabs(bestadjust)); - } - //} - - // If a good enough match is found, mark dodgy depth as solid - //if ((m.isFilled() || m.isDiscontinuity()) && (bestweight > params.match_threshold)) mask(x,y) = 0; - } -} - -void ftl::cuda::correspondence( - TextureObject<float> &d1, - TextureObject<float> &d2, - TextureObject<uchar4> &c1, - TextureObject<uchar4> &c2, - TextureObject<short2> &screen, - TextureObject<float> &conf, - TextureObject<uint8_t> &mask, - float4x4 &pose2, - const Camera &cam1, - const Camera &cam2, const MvMLSParams ¶ms, int func, - cudaStream_t stream) { - - //const dim3 gridSize((d1.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (d1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - //const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - const dim3 gridSize((d1.width() + 1), (d1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(WARP_SIZE, 2); - - //printf("COR SIZE %d,%d\n", p1.width(), p1.height()); - - /*switch (func) { - case 0: corresponding_point_kernel<16,0><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; - case 1: corresponding_point_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; - case 2: corresponding_point_kernel<16,2><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; - case 3: corresponding_point_kernel<16,3><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; - case 4: corresponding_point_kernel<16,4><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; - case 5: corresponding_point_kernel<16,5><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; - }*/ - - switch (func) { - case 32: corresponding_point_kernel<32,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; - case 16: corresponding_point_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; - case 8: corresponding_point_kernel<8,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; - case 4: corresponding_point_kernel<4,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; - case 2: corresponding_point_kernel<2,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; - } - - cudaSafeCall( cudaGetLastError() ); -} - -// ==== Remove zero-confidence ================================================= - -__global__ void zero_confidence_kernel( - TextureObject<float> conf, - TextureObject<float> depth) { - - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < depth.width() && y < depth.height()) { - const float c = conf.tex2D((int)x,(int)y); - - if (c == 0.0f) { - depth(x,y) = 1000.0f; - } - } -} - -void ftl::cuda::zero_confidence(TextureObject<float> &conf, TextureObject<float> &depth, cudaStream_t stream) { - const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - zero_confidence_kernel<<<gridSize, blockSize, 0, stream>>>(conf, depth); - cudaSafeCall( cudaGetLastError() ); -} - - -// ==== MultiViewMLS Aggregate ================================================= - -__device__ inline short3 getScreenPos(int x, int y, float d, const Camera &cam1, const Camera &cam2, const float4x4 &transform) { - const float3 campos = transform * cam1.screenToCam(x,y,d); - const int2 screen = cam2.camToScreen<int2>(campos); - return make_short3(screen.x, screen.y, campos.z); -} - -__device__ inline short2 packScreen(int x, int y, int id) { - return make_short2((id << 12) + x, y); -} - -__device__ inline short2 packScreen(const short3 &p, int id) { - return make_short2((id << 12) + p.x, p.y); -} - -__device__ inline int supportSize(uchar4 support) { - return (support.x+support.y) * (support.z+support.w); -} - -__device__ inline short2 choosePoint(uchar4 sup1, uchar4 sup2, float dot1, float dot2, short2 screen1, short2 screen2) { - //return (float(supportSize(sup2))*dot1 > float(supportSize(sup1))*dot2) ? screen2 : screen1; - return (dot1 > dot2) ? screen2 : screen1; -} - -__device__ inline int unpackCameraID(short2 p) { - return p.x >> 12; -} - -/** - * Identify which source has the best support region for a given pixel. - */ -__global__ void best_sources_kernel( - TextureObject<half4> normals1, - TextureObject<half4> normals2, - TextureObject<uchar4> support1, - TextureObject<uchar4> support2, - TextureObject<float> depth1, - TextureObject<float> depth2, - TextureObject<short2> screen, - float4x4 transform, - //float3x3 transformR, - ftl::rgbd::Camera cam1, - ftl::rgbd::Camera cam2, - int id1, - int id2) { - - const int x = (blockIdx.x*blockDim.x + threadIdx.x); - const int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x >= 0 && y >= 0 && x < screen.width() && y < screen.height()) { - const float d1 = depth1.tex2D(x,y); - - const short3 scr2 = getScreenPos(x, y, d1, cam1, cam2, transform); - short2 bestPoint = packScreen(x,y,0); - - if (scr2.x >= 0 && scr2.y >= 0 && scr2.x < cam2.width && scr2.y < cam2.height) { - uchar4 sup1 = support1.tex2D(x,y); - uchar4 sup2 = support2.tex2D(scr2.x,scr2.y); - const float d2 = depth2.tex2D(scr2.x,scr2.y); - float3 n1 = transform.getFloat3x3() * make_float3(normals1.tex2D(x,y)); - float3 n2 = make_float3(normals2.tex2D(scr2.x,scr2.y)); - - float3 camray = cam2.screenToCam(scr2.x,scr2.y,1.0f); - camray /= length(camray); - const float dot1 = dot(camray, n1); - const float dot2 = dot(camray, n2); - - bestPoint = (fabs(scr2.z - d2) < 0.04f) ? choosePoint(sup1, sup2, dot1, dot2, packScreen(x,y,id1), packScreen(scr2,id2)) : packScreen(x,y,6); - //bestPoint = choosePoint(sup1, sup2, dot1, dot2, packScreen(x,y,id1), packScreen(scr2,id2)); - //bestPoint = (d1 < d2) ? packScreen(x,y,id1) : packScreen(x,y,id2); - - bestPoint = (fabs(scr2.z - d2) < 0.04f) ? packScreen(scr2,id2) : packScreen(scr2,id1); - } - - screen(x,y) = bestPoint; - - /*if (s.x >= 0 && s.y >= 0) { - auto norm1 = make_float3(n1.tex2D(x,y)); - const auto norm2 = make_float3(n2.tex2D(s.x,s.y)); - //n2(s.x,s.y) = norm1; - - float3 cent1 = make_float3(c1.tex2D(x,y)); - const auto cent2 = make_float3(c2.tex2D(s.x,s.y)); - - if (cent2.x+cent2.y+cent2.z > 0.0f && norm2.x+norm2.y+norm2.z > 0.0f) { - norm1 += poseInv1.getFloat3x3() * (pose2.getFloat3x3() * norm2); - n1(x,y) = make_float4(norm1, 0.0f); - cent1 += poseInv1 * (pose2 * cent2); // FIXME: Transform between camera spaces - cent1 /= 2.0f; - c1(x,y) = make_float4(cent1, 0.0f); - //c2(s.x,s.y) = cent1; - - //contribs1(x,y) = contribs1.tex2D(x,y) + 1.0f; - } - // contribs2(s.x,s.y) = contribs2.tex2D(s.x,s.y) + 1.0f; - }*/ - } -} - -void ftl::cuda::best_sources( - ftl::cuda::TextureObject<half4> &normals1, - ftl::cuda::TextureObject<half4> &normals2, - ftl::cuda::TextureObject<uchar4> &support1, - ftl::cuda::TextureObject<uchar4> &support2, - ftl::cuda::TextureObject<float> &depth1, - ftl::cuda::TextureObject<float> &depth2, - ftl::cuda::TextureObject<short2> &screen, - const float4x4 &transform, - const ftl::rgbd::Camera &cam1, - const ftl::rgbd::Camera &cam2, - int id1, - int id2, - cudaStream_t stream) { - - const dim3 gridSize((screen.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (screen.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - best_sources_kernel<<<gridSize, blockSize, 0, stream>>>(normals1, normals2, support1, support2, depth1, depth2, screen, transform, cam1, cam2, id1, id2); - cudaSafeCall( cudaGetLastError() ); -} - -/** - * Identify which source has the best support region for a given pixel. - */ - __global__ void aggregate_sources_kernel( - TextureObject<half4> n1, - TextureObject<half4> n2, - TextureObject<float4> c1, - TextureObject<float4> c2, - TextureObject<float> depth1, - //TextureObject<float> depth2, - //TextureObject<short2> screen, - float4x4 transform, - //float3x3 transformR, - ftl::rgbd::Camera cam1, - ftl::rgbd::Camera cam2) { - - const int x = (blockIdx.x*blockDim.x + threadIdx.x); - const int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x >= 0 && y >= 0 && x < n1.width() && y < n1.height()) { - const float d1 = depth1.tex2D(x,y); - - if (d1 > cam1.minDepth && d1 < cam1.maxDepth) { - //const short3 s = getScreenPos(x, y, d1, cam1, cam2, transform); - const float3 camPos = transform * cam1.screenToCam(x, y, d1); - const int2 s = cam2.camToScreen<int2>(camPos); - - if (s.x >= 0 && s.y >= 0 && s.x < n2.width() && s.y < n2.height()) { - auto norm1 = make_float3(n1.tex2D(x,y)); - const auto norm2 = make_float3(n2.tex2D(s.x,s.y)); - //n2(s.x,s.y) = norm1; - - float3 cent1 = make_float3(c1.tex2D(x,y)); - const auto cent2 = transform.getInverse() * make_float3(c2.tex2D(s.x,s.y)); - - //printf("MERGING %f\n", length(cent2-cent1)); - - if (cent2.x+cent2.y+cent2.z > 0.0f && norm2.x+norm2.y+norm2.z > 0.0f && length(cent2-cent1) < 0.04f) { - norm1 += norm2; - norm1 /= 2.0f; - n1(x,y) = make_half4(norm1, 0.0f); - cent1 += cent2; - cent1 /= 2.0f; - c1(x,y) = make_float4(cent1, 0.0f); - //c2(s.x,s.y) = cent1; - - //contribs1(x,y) = contribs1.tex2D(x,y) + 1.0f; - } - // contribs2(s.x,s.y) = contribs2.tex2D(s.x,s.y) + 1.0f; - } - } - } -} - -void ftl::cuda::aggregate_sources( - ftl::cuda::TextureObject<half4> &n1, - ftl::cuda::TextureObject<half4> &n2, - ftl::cuda::TextureObject<float4> &c1, - ftl::cuda::TextureObject<float4> &c2, - ftl::cuda::TextureObject<float> &depth1, - //ftl::cuda::TextureObject<float> &depth2, - //ftl::cuda::TextureObject<short2> &screen, - const float4x4 &transform, - const ftl::rgbd::Camera &cam1, - const ftl::rgbd::Camera &cam2, - cudaStream_t stream) { - - const dim3 gridSize((n1.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (n1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - aggregate_sources_kernel<<<gridSize, blockSize, 0, stream>>>(n1, n2, c1, c2, depth1, transform, cam1, cam2); - cudaSafeCall( cudaGetLastError() ); -} - -__device__ static uchar4 HSVtoRGB(int H, float S, float V) { - const float C = S * V; - const float X = C * (1 - fabs(fmodf(H / 60.0f, 2) - 1)); - const float m = V - C; - float Rs, Gs, Bs; - - if(H >= 0 && H < 60) { - Rs = C; - Gs = X; - Bs = 0; - } - else if(H >= 60 && H < 120) { - Rs = X; - Gs = C; - Bs = 0; - } - else if(H >= 120 && H < 180) { - Rs = 0; - Gs = C; - Bs = X; - } - else if(H >= 180 && H < 240) { - Rs = 0; - Gs = X; - Bs = C; - } - else if(H >= 240 && H < 300) { - Rs = X; - Gs = 0; - Bs = C; - } - else { - Rs = C; - Gs = 0; - Bs = X; - } - - return make_uchar4((Bs + m) * 255, (Gs + m) * 255, (Rs + m) * 255, 0); -} - -/** - * Render each pixel is a colour corresponding to the source camera with the - * best support window. - */ - __global__ void vis_best_sources_kernel( - TextureObject<short2> screen, - TextureObject<uchar4> colour, - int myid, - int count) { - - const int x = (blockIdx.x*blockDim.x + threadIdx.x); - const int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x >= 0 && y >= 0 && x < colour.width() && y < colour.height()) { - short2 s = screen.tex2D(x,y); - int id = unpackCameraID(s); - - uchar4 c = HSVtoRGB((360 / count) * id, 0.6f, 0.85f); - if (myid != id) colour(x,y) = c; - //colour(x,y) = c; - } -} - -void ftl::cuda::vis_best_sources( - ftl::cuda::TextureObject<short2> &screen, - ftl::cuda::TextureObject<uchar4> &colour, - int myid, - int count, - cudaStream_t stream) { - - const dim3 gridSize((colour.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - vis_best_sources_kernel<<<gridSize, blockSize, 0, stream>>>(screen, colour, myid, count); - cudaSafeCall( cudaGetLastError() ); -} - -/*void ftl::cuda::aggregate_sources( - ftl::cuda::TextureObject<float4> &n1, - ftl::cuda::TextureObject<float4> &n2, - ftl::cuda::TextureObject<float4> &c1, - ftl::cuda::TextureObject<float4> &c2, - ftl::cuda::TextureObject<float> &contribs1, - ftl::cuda::TextureObject<float> &contribs2, - ftl::cuda::TextureObject<short2> &screen, - const float4x4 &poseInv1, - const float4x4 &pose2, - cudaStream_t stream) { - - const dim3 gridSize((screen.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (screen.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - aggregate_sources_kernel<<<gridSize, blockSize, 0, stream>>>(n1, n2, c1, c2, contribs1, contribs2, screen, poseInv1, pose2); - cudaSafeCall( cudaGetLastError() ); -}*/ - -// ==== Normalise aggregations ================================================= - -__global__ void normalise_aggregations_kernel( - TextureObject<half4> norms, - TextureObject<float4> cents, - TextureObject<float> contribs) { - - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < norms.width() && y < norms.height()) { - const float contrib = contribs.tex2D((int)x,(int)y); - - const auto a = make_float3(norms.tex2D((int)x,(int)y)); - const auto b = cents.tex2D(x,y); - //const float4 normal = normals.tex2D((int)x,(int)y); - - //out(x,y) = (contrib == 0.0f) ? make<B>(a) : make<B>(a / contrib); - - if (contrib > 0.0f) { - norms(x,y) = make_half4(a / (contrib+1.0f), 1.0f); - cents(x,y) = b / (contrib+1.0f); - } - } -} - -void ftl::cuda::normalise_aggregations(TextureObject<half4> &norms, TextureObject<float4> ¢s, TextureObject<float> &contribs, cudaStream_t stream) { - const dim3 gridSize((norms.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (norms.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - normalise_aggregations_kernel<<<gridSize, blockSize, 0, stream>>>(norms, cents, contribs); - cudaSafeCall( cudaGetLastError() ); -} - diff --git a/components/operators/src/disparity/disp2depth.cu b/components/operators/src/disparity/disp2depth.cu index a2c60def9e436f7cdb007fb15e4275fdcdb9e620..349920dd2d21f8a330c8f2de7ce6aea5e9672185 100644 --- a/components/operators/src/disparity/disp2depth.cu +++ b/components/operators/src/disparity/disp2depth.cu @@ -8,7 +8,7 @@ __global__ void d2d_kernel(cv::cuda::PtrStepSz<float> disp, cv::cuda::PtrStepSz< for (STRIDE_Y(v,disp.rows)) { for (STRIDE_X(u,disp.cols)) { float d = disp(v,u); - depth(v,u) = (d == 0) ? 50.0f : ((cam.baseline*cam.fx) / (d - cam.doffs)); + depth(v,u) = (d == 0) ? 0.0f : ((cam.baseline*cam.fx) / (d - cam.doffs)); } } } diff --git a/components/operators/src/fusion/correspondence.cu b/components/operators/src/fusion/correspondence.cu new file mode 100644 index 0000000000000000000000000000000000000000..2df21a24daa6f04366152837367593c34183ead4 --- /dev/null +++ b/components/operators/src/fusion/correspondence.cu @@ -0,0 +1,276 @@ +#include "mvmls_cuda.hpp" +#include <ftl/cuda/weighting.hpp> +#include <ftl/operators/mask_cuda.hpp> +#include <ftl/cuda/warp.hpp> + +using ftl::cuda::TextureObject; +using ftl::rgbd::Camera; +using ftl::cuda::Mask; +using ftl::cuda::MvMLSParams; + +#define T_PER_BLOCK 8 +#define WARP_SIZE 32 +#define INTERVAL 1 + +#include "correspondence_common.hpp" + +template<int FUNCTION> +__device__ float weightFunction(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight); + +/*template <> +__device__ inline float weightFunction<0>(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight) { + return (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight); +} + +template <> +__device__ inline float weightFunction<1>(const ftl::cuda::MvMLSParams ¶m, float dweight, float cweight) { + return (cweight * cweight * dweight); +} + +template <> +__device__ inline float weightFunction<2>(const ftl::cuda::MvMLSParams ¶m, float dweight, float cweight) { + return (dweight * dweight * cweight); +} + +template <> +__device__ inline float weightFunction<3>(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight) { + return (dweight == 0.0f) ? 0.0f : (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight); +} + +template <> +__device__ inline float weightFunction<4>(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight) { + return cweight; +} + +template <> +__device__ inline float weightFunction<5>(const ftl::cuda::MvMLSParams ¶ms, float dweight, float cweight) { + return (cweight > 0.0f) ? dweight : 0.0f; +}*/ + +__device__ inline float distance(float4 p1, float4 p2) { + return min(1.0f, max(max(fabsf(p1.x - p2.x),fabsf(p1.y - p2.y)), fabsf(p1.z - p2.z))/ 10.0f); + //return min(1.0f, ftl::cuda::colourDistance(p1, p2) / 10.0f); +} + +__device__ inline int halfWarpCensus(float e) { + float e0 = __shfl_sync(FULL_MASK, e, (threadIdx.x >= 16) ? 16 : 0, WARP_SIZE); + int c = (e > e0) ? 1 << (threadIdx.x % 16) : 0; + for (int i = WARP_SIZE/4; i > 0; i /= 2) { + const int other = __shfl_xor_sync(FULL_MASK, c, i, WARP_SIZE); + c |= other; + } + return c; +} + +__device__ inline float halfWarpBest(float e, float c) { + for (int i = WARP_SIZE/4; i > 0; i /= 2) { + const float o1 = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + const float o2 = __shfl_xor_sync(FULL_MASK, c, i, WARP_SIZE); + e = (o2 > c) ? o1 : e; + } + return e; +} + +__device__ inline float4 relativeDelta(const float4 &e) { + const float e0x = __shfl_sync(FULL_MASK, e.x, 0, WARP_SIZE/2); + const float e0y = __shfl_sync(FULL_MASK, e.y, 0, WARP_SIZE/2); + const float e0z = __shfl_sync(FULL_MASK, e.z, 0, WARP_SIZE/2); + return make_float4(e.x-e0x, e.y-e0y, e.z-e0z, 0.0f); +} + +/** + * See: Birchfield S. et al. (1998). A pixel dissimilarity measure that is + * insensitive to image sampling. IEEE Transactions on Pattern Analysis and + * Machine Intelligence. + */ +__device__ float dissimilarity(const float4 &l, const float4 &rp, const float4 &rc, const float4 &rn) { + const float rpd = distance((rc - rp) * 0.5f + rp, l); + const float rnd = distance((rc - rn) * 0.5f + rn, l); + const float rcd = distance(rc, l); + return min(min(rpd, rnd), rcd); +} + + + +template<int COR_STEPS, int FUNCTION> +__global__ void corresponding_point_kernel( + TextureObject<float> d1, + TextureObject<float> d2, + TextureObject<uchar4> c1, + TextureObject<uchar4> c2, + TextureObject<short2> screenOut, + TextureObject<float> conf, + TextureObject<uint8_t> mask, + float4x4 pose, + Camera cam1, + Camera cam2, ftl::cuda::MvMLSParams params) { + + //const int tid = (threadIdx.x + threadIdx.y * blockDim.x); + const int x = (blockIdx.x*8 + (threadIdx.x%4) + 4*(threadIdx.x / 16)); // / WARP_SIZE; + const int y = blockIdx.y*8 + threadIdx.x/4 + 4*threadIdx.y; + + + if (x >= 0 && y >=0 && x < screenOut.width() && y < screenOut.height()) { + screenOut(x,y) = make_short2(-1,-1); + + //const float3 world1 = make_float3(p1.tex2D(x, y)); + const float depth1 = d1.tex2D(x,y); //(pose1_inv * world1).z; // Initial starting depth + if (depth1 < cam1.minDepth || depth1 > cam1.maxDepth) return; + + // TODO: Temporary hack to ensure depth1 is present + //const float4 temp = vout.tex2D(x,y); + //vout(x,y) = make_float4(depth1, 0.0f, temp.z, temp.w); + + //const float3 world1 = pose1 * cam1.screenToCam(x,y,depth1); + + const auto colour1 = c1.tex2D((float)x+0.5f, (float)y+0.5f); + + //float bestdepth = 0.0f; + short2 bestScreen = make_short2(-1,-1); + //float bestdepth = 0.0f; + //float bestdepth2 = 0.0f; + float bestweight = 0.0f; + float bestcolour = 0.0f; + //float bestdweight = 0.0f; + float totalcolour = 0.0f; + //int count = 0; + //float contrib = 0.0f; + + const float3 camPosOrigin = pose * cam1.screenToCam(x,y,depth1); + const float2 lineOrigin = cam2.camToScreen<float2>(camPosOrigin); + const float3 camPosDistant = pose * cam1.screenToCam(x,y,depth1 + 10.0f); + const float2 lineDistant = cam2.camToScreen<float2>(camPosDistant); + const float lineM = (lineDistant.y - lineOrigin.y) / (lineDistant.x - lineOrigin.x); + const float depthM = 10.0f / (lineDistant.x - lineOrigin.x); + const float depthM2 = (camPosDistant.z - camPosOrigin.z) / (lineDistant.x - lineOrigin.x); + float2 linePos; + linePos.x = lineOrigin.x - ((COR_STEPS/2)); + linePos.y = lineOrigin.y - (((COR_STEPS/2)) * lineM); + //float depthPos = depth1 - (float((COR_STEPS/2)) * depthM); + float depthPos2 = camPosOrigin.z - (float((COR_STEPS/2)) * depthM2); + //const float depthCoef = cam1.baseline*cam1.fx; + + const float depthCoef = (1.0f / cam1.fx) * 10.0f; + + uint badMask = 0; + int bestStep = COR_STEPS/2; + + + // Project to p2 using cam2 + // Each thread takes a possible correspondence and calculates a weighting + //const int lane = tid % WARP_SIZE; + for (int i=0; i<COR_STEPS; ++i) { + //float weight = 1.0f; //(linePos.x >= cam2.width || linePos.y >= cam2.height) ? 0.0f : 1.0f; + + // Generate a colour correspondence value + const auto colour2 = c2.tex2D(linePos.x, linePos.y); + + // TODO: Check if other colour dissimilarities are better... + const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth); + + // Generate a depth correspondence value + const float depth2 = d2.tex2D(int(linePos.x+0.5f), int(linePos.y+0.5f)); + + // Record which correspondences are invalid + badMask |= ( + depth2 <= cam2.minDepth || + depth2 >= cam2.maxDepth || + linePos.x < 0.5f || + linePos.y < 0.5f || + linePos.x >= d2.width()-0.5f || + linePos.y >= d2.height()-0.5f + ) ? 1 << i : 0; + + //if (FUNCTION == 1) { + // TODO: Spatial smooth must be disparity discon threshold of largest depth in original camera space. + // ie. if depth1 > depth2 then depth1 has the largest error potential and the resolution at that + // depth is used to determine the spatial smoothing amount. + + const float maxdepth = max(depth1, depth2); + const float smooth = depthCoef * maxdepth; + float weight = ftl::cuda::weighting(fabs(depth2 - depthPos2), cweight*smooth); + //weight = ftl::cuda::halfWarpSum(weight); + //} else { + // const float dweight = ftl::cuda::weighting(fabs(depth2 - depthPos2), params.spatial_smooth); + // weight *= weightFunction<FUNCTION>(params, dweight, cweight); + //} + //const float dweight = ftl::cuda::weighting(fabs(depth_adjust), 10.0f*params.range); + + //weight *= weightFunction<FUNCTION>(params, dweight, cweight); + + //++count; + + bestcolour = max(cweight, bestcolour); + totalcolour += cweight; + + //bestdepth = (weight > bestweight) ? depthPos : bestdepth; + bestStep = (weight > bestweight) ? i : bestStep; + + bestweight = max(bestweight, weight); + + + //depthPos += depthM; + depthPos2 += depthM2; + linePos.x += 1.0f; + linePos.y += lineM; + } + + //const float avgcolour = totalcolour/(float)count; + const float confidence = ((bestcolour / totalcolour) - (1.0f / 16.0f)) * (1.0f + (1.0f/16.0f)); + float bestadjust = float(bestStep-(COR_STEPS/2))*depthM; + + // Detect matches to boundaries, and discard those + uint stepMask = 1 << bestStep; + if ((stepMask & badMask) || (stepMask & (badMask << 1)) || (stepMask & (badMask >> 1))) bestweight = 0.0f; + + //bestadjust = halfWarpBest(bestadjust, (bestweight > 0.0f) ? confidence : 0.0f); + + //Mask m(mask.tex2D(x,y)); + + //if (bestweight > 0.0f) { + float old = conf.tex2D(x,y); + + if (bestweight > 0.0f) { + d1(x,y) = (0.4f*bestadjust) + depth1; + //d2(bestScreen.x, bestScreen.y) = bestdepth2; + //screenOut(x,y) = bestScreen; + conf(x,y) = max(old,confidence); //bestweight * confidence; + //conf(x,y) = max(old,fabs(bestadjust)); + } + //} + + // If a good enough match is found, mark dodgy depth as solid + //if ((m.isFilled() || m.isDiscontinuity()) && (bestweight > params.match_threshold)) mask(x,y) = 0; + } +} + +void ftl::cuda::correspondence( + TextureObject<float> &d1, + TextureObject<float> &d2, + TextureObject<uchar4> &c1, + TextureObject<uchar4> &c2, + TextureObject<short2> &screen, + TextureObject<float> &conf, + TextureObject<uint8_t> &mask, + float4x4 &pose2, + const Camera &cam1, + const Camera &cam2, const MvMLSParams ¶ms, int func, + cudaStream_t stream) { + + //const dim3 gridSize((d1.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (d1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + //const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + const dim3 gridSize((d1.width() + 1), (d1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(WARP_SIZE, 2); + + + switch (func) { + case 32: corresponding_point_kernel<32,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; + case 16: corresponding_point_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; + case 8: corresponding_point_kernel<8,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; + case 4: corresponding_point_kernel<4,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; + case 2: corresponding_point_kernel<2,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; + } + + cudaSafeCall( cudaGetLastError() ); +} diff --git a/components/operators/src/fusion/correspondence_common.hpp b/components/operators/src/fusion/correspondence_common.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6b9405f474bf7ed00fc38d7e3632b826922c4069 --- /dev/null +++ b/components/operators/src/fusion/correspondence_common.hpp @@ -0,0 +1,23 @@ +#ifndef _FTL_OPERATORS_MVMLS_CORCOMMON_HPP_ +#define _FTL_OPERATORS_MVMLS_CORCOMMON_HPP_ + +#ifndef PINF +#define PINF __int_as_float(0x7f800000) +#endif + +/** + * If the corresponding point has no depth or is off screen then it is bad. + */ +__device__ inline bool isBadCor(float depth, const float2 &pos, const ftl::rgbd::Camera &cam) { + return + depth <= cam.minDepth || + depth >= cam.maxDepth || + pos.x < 0.5f || + pos.y < 0.5f || + pos.x >= cam.width-0.5f || + pos.y >= cam.height-0.5f; +} + +__device__ inline float square(float v) { return v*v; } + +#endif diff --git a/components/operators/src/fusion/correspondence_depth.cu b/components/operators/src/fusion/correspondence_depth.cu new file mode 100644 index 0000000000000000000000000000000000000000..5331526175861b3280d121da2daf2532faa73aee --- /dev/null +++ b/components/operators/src/fusion/correspondence_depth.cu @@ -0,0 +1,146 @@ +#include "mvmls_cuda.hpp" +#include <ftl/cuda/weighting.hpp> +#include <ftl/operators/mask_cuda.hpp> +#include <ftl/cuda/warp.hpp> + +using ftl::cuda::TextureObject; +using ftl::rgbd::Camera; +using ftl::cuda::Mask; +using ftl::cuda::MvMLSParams; + +#define T_PER_BLOCK 8 +#define WARP_SIZE 32 + +#include "correspondence_common.hpp" + +template<int COR_STEPS> +__global__ void corresponding_depth_kernel( + TextureObject<float> d1, + TextureObject<float> d2, + TextureObject<short2> screenOut, + TextureObject<float> conf, + TextureObject<uint8_t> mask, + float4x4 pose, + Camera cam1, + Camera cam2, ftl::cuda::MvMLSParams params) { + + //const int tid = (threadIdx.x + threadIdx.y * blockDim.x); + const int x = (blockIdx.x*8 + (threadIdx.x%4) + 4*(threadIdx.x / 16)); // / WARP_SIZE; + const int y = blockIdx.y*8 + threadIdx.x/4 + 4*threadIdx.y; + + // FIXME: Don't return like this due to warp operations + if (x >= 0 && y >=0 && x < screenOut.width() && y < screenOut.height()) { + screenOut(x,y) = make_short2(-1,-1); + conf(x,y) = 0.0f; + + const float depth1 = d1.tex2D(x,y); + // TODO: If all depths within a half warp here are bad then can return + // early. + //if (__ballot_sync(FULL_MASK, depth1 > cam1.minDepth && depth1 < cam1.maxDepth) & ((threadIdx.x/16 == 1) ? 0xFFFF0000 : 0xFFFF) == 0) return; + + short2 bestScreen = make_short2(-1,-1); + float bestcost = PINF; + + const float3 camPosOrigin = pose * cam1.screenToCam(x,y,depth1); + const float2 lineOrigin = cam2.camToScreen<float2>(camPosOrigin); + const float3 camPosDistant = pose * cam1.screenToCam(x,y,depth1 + 0.4f); + const float2 lineDistant = cam2.camToScreen<float2>(camPosDistant); + const float lineM = (lineDistant.y - lineOrigin.y) / (lineDistant.x - lineOrigin.x); + const float depthM = 0.4f / (lineDistant.x - lineOrigin.x); + const float depthM2 = (camPosDistant.z - camPosOrigin.z) / (lineDistant.x - lineOrigin.x); + float2 linePos; + linePos.x = lineOrigin.x - float((COR_STEPS/2)); + linePos.y = lineOrigin.y - (float((COR_STEPS/2)) * lineM); + float depthPos2 = camPosOrigin.z - (float((COR_STEPS/2)) * depthM2); + + // Pre-calculate a depth resolution coeficient + const float depthCoef = (1.0f / (cam1.baseline *cam1.fx)) * params.spatial_smooth; + + uint badMask = 0; + int bestStep = COR_STEPS/2; + //float bestSource = 0.0f; + + // Scan from -COR_STEPS/2 to +COR_STEPS/2, where COR_STEPS is the number + // of pixels to check in the second camera for the current first camera + // pixel. + // TODO: Could consider sub pixel accuracy using linear interpolation + // between the depths of subsequent pixels to find a better minimum. + // This depends on GPU resource limitations for the calculation. + + #pragma unroll + for (int i=0; i<COR_STEPS; ++i) { + // Read the actual depth at current correspondence check point + const float depth2 = d2.tex2D(int(linePos.x+0.5f), int(linePos.y+0.5f)); + + // Record which correspondences are invalid + badMask |= (isBadCor(depth2, linePos, cam2)) ? 1 << i : 0; + + // Sum of square of normalised depth error from both cameras + // Normalised using depth resolution in respective camera, this + // should bias in favour of moving the lower res camera point + // Effectively this is a squared distance from ideal location. + float cost = square(min(1.0f, fabs(depth2-depthPos2) / (depthCoef * depth2 * depth2))); + cost += square(min(1.0f, (fabs(float(i-COR_STEPS/2))*depthM) / (depthCoef * depth1 * depth1))); + + // TODO: Perhaps allow per pixel variations but with additional cost + // the greater they are from the warp concensus best cost + + // Sum all squared distance errors in a 4x4 pixel neighborhood + cost = ftl::cuda::halfWarpSum(cost); + + // Record the best result + bestStep = (cost < bestcost) ? i : bestStep; + bestScreen = (cost < bestcost) ? make_short2(linePos.x+0.5f,linePos.y+0.5f) : bestScreen; + bestcost = min(bestcost, cost); + + + // Move to next correspondence check point + depthPos2 += depthM2; + linePos.x += 1.0f; + linePos.y += lineM; + } + + float bestadjust = float(bestStep-(COR_STEPS/2))*depthM; + + // Detect matches to boundaries, and discard those + uint stepMask = 1 << bestStep; + if ((stepMask & badMask) || (stepMask & (badMask << 1)) || (stepMask & (badMask >> 1))) bestcost = PINF; + + // Check that no boundaries were matched in 4x4 block + bestcost = ftl::cuda::halfWarpMax(bestcost); + + // If a match was found then record it + if (depth1 > cam1.minDepth && depth1 < cam1.maxDepth && bestcost < PINF) { + // Delay making the depth change until later. + conf(x,y) = bestadjust; + mask(x,y) = mask(x,y) | Mask::kMask_Correspondence; + screenOut(x,y) = bestScreen; + } + } +} + +void ftl::cuda::correspondence( + TextureObject<float> &d1, + TextureObject<float> &d2, + TextureObject<short2> &screen, + TextureObject<float> &conf, + TextureObject<uint8_t> &mask, + float4x4 &pose2, + const Camera &cam1, + const Camera &cam2, const MvMLSParams ¶ms, int func, + cudaStream_t stream) { + + // FIXME: Check the grid size makes sense + const dim3 gridSize((d1.width() + 1), (d1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(WARP_SIZE, 2); + + switch (func) { + case 32: corresponding_depth_kernel<32><<<gridSize, blockSize, 0, stream>>>(d1, d2, screen, conf, mask, pose2, cam1, cam2, params); break; + case 16: corresponding_depth_kernel<16><<<gridSize, blockSize, 0, stream>>>(d1, d2, screen, conf, mask, pose2, cam1, cam2, params); break; + case 8: corresponding_depth_kernel<8><<<gridSize, blockSize, 0, stream>>>(d1, d2, screen, conf, mask, pose2, cam1, cam2, params); break; + case 4: corresponding_depth_kernel<4><<<gridSize, blockSize, 0, stream>>>(d1, d2, screen, conf, mask, pose2, cam1, cam2, params); break; + case 2: corresponding_depth_kernel<2><<<gridSize, blockSize, 0, stream>>>(d1, d2, screen, conf, mask, pose2, cam1, cam2, params); break; + } + + cudaSafeCall( cudaGetLastError() ); +} diff --git a/components/operators/src/fusion/correspondence_util.cu b/components/operators/src/fusion/correspondence_util.cu new file mode 100644 index 0000000000000000000000000000000000000000..ca1b8a82a81be469de9b8e87d20cc8260297578e --- /dev/null +++ b/components/operators/src/fusion/correspondence_util.cu @@ -0,0 +1,100 @@ +#include "mvmls_cuda.hpp" +#include <ftl/cuda/weighting.hpp> +#include <ftl/operators/mask_cuda.hpp> +#include <ftl/cuda/warp.hpp> + +using ftl::cuda::TextureObject; +using ftl::rgbd::Camera; +using ftl::cuda::Mask; +using ftl::cuda::MvMLSParams; + +#define T_PER_BLOCK 8 +#define WARP_SIZE 32 + +#include "correspondence_common.hpp" + +// ==== Remove zero-confidence ================================================= + +__global__ void zero_confidence_kernel( + TextureObject<float> conf, + TextureObject<float> depth) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < depth.width() && y < depth.height()) { + const float c = conf.tex2D((int)x,(int)y); + + if (c == 0.0f) { + depth(x,y) = 1000.0f; + } + } +} + +void ftl::cuda::zero_confidence(TextureObject<float> &conf, TextureObject<float> &depth, cudaStream_t stream) { + const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + zero_confidence_kernel<<<gridSize, blockSize, 0, stream>>>(conf, depth); + cudaSafeCall( cudaGetLastError() ); +} + +// ==== Show correspondence errors ============================================= + +__global__ void show_cor_error_kernel( + TextureObject<uchar4> colour, + TextureObject<short2> screen1, + TextureObject<short2> screen2) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < colour.width() && y < colour.height()) { + short2 s1 = screen1.tex2D(x,y); + + if (s1.x >= 0 && s1.x < screen2.width() && s1.y < screen2.height()) { + short2 s2 = screen2.tex2D(s1.x, s1.y); + + float l = sqrt(square(s2.x-x) + square(s2.y-y)); + float nl = min(1.0f, l/5.0f); + //colour(x,y) = (l < 1.0f) ? make_uchar4(0,255,0,0) : (s2.x < 0) ? make_uchar4(255.0f, 0.0f, 0.0f, 0.0f) : make_uchar4(0.0f, (1.0f-nl)*255.0f, nl*255.0f, 0.0f); + if (nl < 1.0f && s2.x >= 0) colour(x,y) = make_uchar4(0.0f, (1.0f-nl)*255.0f, nl*255.0f, 0.0f); + } + } +} + +void ftl::cuda::show_cor_error(TextureObject<uchar4> &colour, TextureObject<short2> &screen1, TextureObject<short2> &screen2, cudaStream_t stream) { + const dim3 gridSize((colour.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + show_cor_error_kernel<<<gridSize, blockSize, 0, stream>>>(colour, screen1, screen2); + cudaSafeCall( cudaGetLastError() ); +} + + +// ==== Show depth adjustments ================================================= + +__global__ void show_depth_adjust_kernel( + TextureObject<uchar4> colour, + TextureObject<float> adjust) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < colour.width() && y < colour.height()) { + float a = adjust.tex2D(x,y); + + if (a != 0.0f) { + float nc = min(1.0f, fabsf(a)/0.04f); + colour(x,y) = make_uchar4(0.0f, (1.0f-nc)*255.0f, nc*255.0f, 0.0f); + } + } +} + +void ftl::cuda::show_depth_adjustment(TextureObject<uchar4> &colour, TextureObject<float> &adjust, cudaStream_t stream) { + const dim3 gridSize((colour.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + show_depth_adjust_kernel<<<gridSize, blockSize, 0, stream>>>(colour, adjust); + cudaSafeCall( cudaGetLastError() ); +} diff --git a/components/operators/src/fusion/mls_aggr.cu b/components/operators/src/fusion/mls_aggr.cu new file mode 100644 index 0000000000000000000000000000000000000000..9332117bc11f382f418151c5305f2d694cf3e7e0 --- /dev/null +++ b/components/operators/src/fusion/mls_aggr.cu @@ -0,0 +1,341 @@ +#include "mvmls_cuda.hpp" +#include <ftl/cuda/weighting.hpp> +#include <ftl/operators/mask_cuda.hpp> +#include <ftl/cuda/warp.hpp> + +using ftl::cuda::TextureObject; +using ftl::rgbd::Camera; +using ftl::cuda::Mask; +using ftl::cuda::MvMLSParams; + +#define T_PER_BLOCK 8 +#define WARP_SIZE 32 + +#include "correspondence_common.hpp" + + +// ==== MultiViewMLS Aggregate ================================================= + +__device__ inline short3 getScreenPos(int x, int y, float d, const Camera &cam1, const Camera &cam2, const float4x4 &transform) { + const float3 campos = transform * cam1.screenToCam(x,y,d); + const int2 screen = cam2.camToScreen<int2>(campos); + return make_short3(screen.x, screen.y, campos.z); +} + +__device__ inline short2 packScreen(int x, int y, int id) { + return make_short2((id << 12) + x, y); +} + +__device__ inline short2 packScreen(const short3 &p, int id) { + return make_short2((id << 12) + p.x, p.y); +} + +__device__ inline int supportSize(uchar4 support) { + return (support.x+support.y) * (support.z+support.w); +} + +__device__ inline short2 choosePoint(uchar4 sup1, uchar4 sup2, float dot1, float dot2, short2 screen1, short2 screen2) { + //return (float(supportSize(sup2))*dot1 > float(supportSize(sup1))*dot2) ? screen2 : screen1; + return (dot1 > dot2) ? screen2 : screen1; +} + +__device__ inline int unpackCameraID(short2 p) { + return p.x >> 12; +} + +/** + * Identify which source has the best support region for a given pixel. + */ +__global__ void best_sources_kernel( + TextureObject<half4> normals1, + TextureObject<half4> normals2, + TextureObject<uchar4> support1, + TextureObject<uchar4> support2, + TextureObject<float> depth1, + TextureObject<float> depth2, + TextureObject<short2> screen, + float4x4 transform, + //float3x3 transformR, + ftl::rgbd::Camera cam1, + ftl::rgbd::Camera cam2, + int id1, + int id2) { + + const int x = (blockIdx.x*blockDim.x + threadIdx.x); + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x >= 0 && y >= 0 && x < screen.width() && y < screen.height()) { + const float d1 = depth1.tex2D(x,y); + + const short3 scr2 = getScreenPos(x, y, d1, cam1, cam2, transform); + short2 bestPoint = packScreen(x,y,0); + + if (scr2.x >= 0 && scr2.y >= 0 && scr2.x < cam2.width && scr2.y < cam2.height) { + uchar4 sup1 = support1.tex2D(x,y); + uchar4 sup2 = support2.tex2D(scr2.x,scr2.y); + const float d2 = depth2.tex2D(scr2.x,scr2.y); + float3 n1 = transform.getFloat3x3() * make_float3(normals1.tex2D(x,y)); + float3 n2 = make_float3(normals2.tex2D(scr2.x,scr2.y)); + + float3 camray = cam2.screenToCam(scr2.x,scr2.y,1.0f); + camray /= length(camray); + const float dot1 = dot(camray, n1); + const float dot2 = dot(camray, n2); + + bestPoint = (fabs(scr2.z - d2) < 0.04f) ? choosePoint(sup1, sup2, dot1, dot2, packScreen(x,y,id1), packScreen(scr2,id2)) : packScreen(x,y,6); + //bestPoint = choosePoint(sup1, sup2, dot1, dot2, packScreen(x,y,id1), packScreen(scr2,id2)); + //bestPoint = (d1 < d2) ? packScreen(x,y,id1) : packScreen(x,y,id2); + + bestPoint = (fabs(scr2.z - d2) < 0.04f) ? packScreen(scr2,id2) : packScreen(scr2,id1); + } + + screen(x,y) = bestPoint; + + /*if (s.x >= 0 && s.y >= 0) { + auto norm1 = make_float3(n1.tex2D(x,y)); + const auto norm2 = make_float3(n2.tex2D(s.x,s.y)); + //n2(s.x,s.y) = norm1; + + float3 cent1 = make_float3(c1.tex2D(x,y)); + const auto cent2 = make_float3(c2.tex2D(s.x,s.y)); + + if (cent2.x+cent2.y+cent2.z > 0.0f && norm2.x+norm2.y+norm2.z > 0.0f) { + norm1 += poseInv1.getFloat3x3() * (pose2.getFloat3x3() * norm2); + n1(x,y) = make_float4(norm1, 0.0f); + cent1 += poseInv1 * (pose2 * cent2); // FIXME: Transform between camera spaces + cent1 /= 2.0f; + c1(x,y) = make_float4(cent1, 0.0f); + //c2(s.x,s.y) = cent1; + + //contribs1(x,y) = contribs1.tex2D(x,y) + 1.0f; + } + // contribs2(s.x,s.y) = contribs2.tex2D(s.x,s.y) + 1.0f; + }*/ + } +} + +void ftl::cuda::best_sources( + ftl::cuda::TextureObject<half4> &normals1, + ftl::cuda::TextureObject<half4> &normals2, + ftl::cuda::TextureObject<uchar4> &support1, + ftl::cuda::TextureObject<uchar4> &support2, + ftl::cuda::TextureObject<float> &depth1, + ftl::cuda::TextureObject<float> &depth2, + ftl::cuda::TextureObject<short2> &screen, + const float4x4 &transform, + const ftl::rgbd::Camera &cam1, + const ftl::rgbd::Camera &cam2, + int id1, + int id2, + cudaStream_t stream) { + + const dim3 gridSize((screen.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (screen.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + best_sources_kernel<<<gridSize, blockSize, 0, stream>>>(normals1, normals2, support1, support2, depth1, depth2, screen, transform, cam1, cam2, id1, id2); + cudaSafeCall( cudaGetLastError() ); +} + +/** + * Identify which source has the best support region for a given pixel. + */ + __global__ void aggregate_sources_kernel( + TextureObject<half4> n1, + TextureObject<half4> n2, + TextureObject<float4> c1, + TextureObject<float4> c2, + TextureObject<float> depth1, + //TextureObject<float> depth2, + //TextureObject<short2> screen, + float4x4 transform, + //float3x3 transformR, + ftl::rgbd::Camera cam1, + ftl::rgbd::Camera cam2) { + + const int x = (blockIdx.x*blockDim.x + threadIdx.x); + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x >= 0 && y >= 0 && x < n1.width() && y < n1.height()) { + const float d1 = depth1.tex2D(x,y); + + if (d1 > cam1.minDepth && d1 < cam1.maxDepth) { + //const short3 s = getScreenPos(x, y, d1, cam1, cam2, transform); + const float3 camPos = transform * cam1.screenToCam(x, y, d1); + const int2 s = cam2.camToScreen<int2>(camPos); + + if (s.x >= 0 && s.y >= 0 && s.x < n2.width() && s.y < n2.height()) { + auto norm1 = make_float3(n1.tex2D(x,y)); + const auto norm2 = make_float3(n2.tex2D(s.x,s.y)); + //n2(s.x,s.y) = norm1; + + float3 cent1 = make_float3(c1.tex2D(x,y)); + const auto cent2 = transform.getInverse() * make_float3(c2.tex2D(s.x,s.y)); + + //printf("MERGING %f\n", length(cent2-cent1)); + + if (cent2.x+cent2.y+cent2.z > 0.0f && norm2.x+norm2.y+norm2.z > 0.0f && length(cent2-cent1) < 0.04f) { + norm1 += norm2; + norm1 /= 2.0f; + n1(x,y) = make_half4(norm1, 0.0f); + cent1 += cent2; + cent1 /= 2.0f; + c1(x,y) = make_float4(cent1, 0.0f); + //c2(s.x,s.y) = cent1; + + //contribs1(x,y) = contribs1.tex2D(x,y) + 1.0f; + } + // contribs2(s.x,s.y) = contribs2.tex2D(s.x,s.y) + 1.0f; + } + } + } +} + +void ftl::cuda::aggregate_sources( + ftl::cuda::TextureObject<half4> &n1, + ftl::cuda::TextureObject<half4> &n2, + ftl::cuda::TextureObject<float4> &c1, + ftl::cuda::TextureObject<float4> &c2, + ftl::cuda::TextureObject<float> &depth1, + //ftl::cuda::TextureObject<float> &depth2, + //ftl::cuda::TextureObject<short2> &screen, + const float4x4 &transform, + const ftl::rgbd::Camera &cam1, + const ftl::rgbd::Camera &cam2, + cudaStream_t stream) { + + const dim3 gridSize((n1.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (n1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + aggregate_sources_kernel<<<gridSize, blockSize, 0, stream>>>(n1, n2, c1, c2, depth1, transform, cam1, cam2); + cudaSafeCall( cudaGetLastError() ); +} + +__device__ static uchar4 HSVtoRGB(int H, float S, float V) { + const float C = S * V; + const float X = C * (1 - fabs(fmodf(H / 60.0f, 2) - 1)); + const float m = V - C; + float Rs, Gs, Bs; + + if(H >= 0 && H < 60) { + Rs = C; + Gs = X; + Bs = 0; + } + else if(H >= 60 && H < 120) { + Rs = X; + Gs = C; + Bs = 0; + } + else if(H >= 120 && H < 180) { + Rs = 0; + Gs = C; + Bs = X; + } + else if(H >= 180 && H < 240) { + Rs = 0; + Gs = X; + Bs = C; + } + else if(H >= 240 && H < 300) { + Rs = X; + Gs = 0; + Bs = C; + } + else { + Rs = C; + Gs = 0; + Bs = X; + } + + return make_uchar4((Bs + m) * 255, (Gs + m) * 255, (Rs + m) * 255, 0); +} + +/** + * Render each pixel is a colour corresponding to the source camera with the + * best support window. + */ + __global__ void vis_best_sources_kernel( + TextureObject<short2> screen, + TextureObject<uchar4> colour, + int myid, + int count) { + + const int x = (blockIdx.x*blockDim.x + threadIdx.x); + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x >= 0 && y >= 0 && x < colour.width() && y < colour.height()) { + short2 s = screen.tex2D(x,y); + int id = unpackCameraID(s); + + uchar4 c = HSVtoRGB((360 / count) * id, 0.6f, 0.85f); + if (myid != id) colour(x,y) = c; + //colour(x,y) = c; + } +} + +void ftl::cuda::vis_best_sources( + ftl::cuda::TextureObject<short2> &screen, + ftl::cuda::TextureObject<uchar4> &colour, + int myid, + int count, + cudaStream_t stream) { + + const dim3 gridSize((colour.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + vis_best_sources_kernel<<<gridSize, blockSize, 0, stream>>>(screen, colour, myid, count); + cudaSafeCall( cudaGetLastError() ); +} + +/*void ftl::cuda::aggregate_sources( + ftl::cuda::TextureObject<float4> &n1, + ftl::cuda::TextureObject<float4> &n2, + ftl::cuda::TextureObject<float4> &c1, + ftl::cuda::TextureObject<float4> &c2, + ftl::cuda::TextureObject<float> &contribs1, + ftl::cuda::TextureObject<float> &contribs2, + ftl::cuda::TextureObject<short2> &screen, + const float4x4 &poseInv1, + const float4x4 &pose2, + cudaStream_t stream) { + + const dim3 gridSize((screen.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (screen.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + aggregate_sources_kernel<<<gridSize, blockSize, 0, stream>>>(n1, n2, c1, c2, contribs1, contribs2, screen, poseInv1, pose2); + cudaSafeCall( cudaGetLastError() ); +}*/ + +// ==== Normalise aggregations ================================================= + +__global__ void normalise_aggregations_kernel( + TextureObject<half4> norms, + TextureObject<float4> cents, + TextureObject<float> contribs) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < norms.width() && y < norms.height()) { + const float contrib = contribs.tex2D((int)x,(int)y); + + const auto a = make_float3(norms.tex2D((int)x,(int)y)); + const auto b = cents.tex2D(x,y); + //const float4 normal = normals.tex2D((int)x,(int)y); + + //out(x,y) = (contrib == 0.0f) ? make<B>(a) : make<B>(a / contrib); + + if (contrib > 0.0f) { + norms(x,y) = make_half4(a / (contrib+1.0f), 1.0f); + cents(x,y) = b / (contrib+1.0f); + } + } +} + +void ftl::cuda::normalise_aggregations(TextureObject<half4> &norms, TextureObject<float4> ¢s, TextureObject<float> &contribs, cudaStream_t stream) { + const dim3 gridSize((norms.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (norms.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + normalise_aggregations_kernel<<<gridSize, blockSize, 0, stream>>>(norms, cents, contribs); + cudaSafeCall( cudaGetLastError() ); +} diff --git a/components/operators/src/fusion/mvmls.cpp b/components/operators/src/fusion/mvmls.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7d7ef03d9701ecedd906cd999d01ef3ce02e4231 --- /dev/null +++ b/components/operators/src/fusion/mvmls.cpp @@ -0,0 +1,409 @@ +#include <ftl/operators/mvmls.hpp> +#include "smoothing_cuda.hpp" +#include <ftl/utility/matrix_conversion.hpp> +#include "mvmls_cuda.hpp" + +#include <opencv2/cudaarithm.hpp> + +using ftl::operators::MultiViewMLS; +using ftl::codecs::Channel; +using cv::cuda::GpuMat; +using ftl::rgbd::Format; + +MultiViewMLS::MultiViewMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +MultiViewMLS::~MultiViewMLS() { + +} + +bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) { + cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + + // float thresh = config()->value("mls_threshold", 0.4f); + float disconPixels = config()->value("discon_pixels", 100.0f); // Max before definitely not same surface + + float col_smooth = config()->value("mls_colour_smoothing", 30.0f); + int iters = config()->value("mls_iterations", 3); + int radius = config()->value("mls_radius",5); + //bool aggre = config()->value("aggregation", true); + //int win = config()->value("cost_function",1); + int win = config()->value("window_size",16); + bool do_corr = config()->value("merge_corresponding", true); + bool do_aggr = config()->value("merge_mls", false); + bool cull_zero = config()->value("cull_no_confidence", false); + //bool show_best_source = config()->value("show_pixel_source", false); + + ftl::cuda::MvMLSParams params; + params.colour_smooth = config()->value("colour_smooth", 150.0f); + params.spatial_smooth = config()->value("spatial_smooth", 1.0f); + + bool show_consistency = config()->value("show_consistency", false); + bool show_adjustment = config()->value("show_adjustment", false); + + if (in.frames.size() < 1) return false; + auto size = in.frames[0].get<GpuMat>(Channel::Depth).size(); + + // Make sure we have enough buffers + while (normals_horiz_.size() < in.frames.size()) { + normals_horiz_.push_back(new ftl::cuda::TextureObject<half4>(size.height, size.width)); + centroid_horiz_.push_back(new ftl::cuda::TextureObject<float4>(size.height, size.width)); + centroid_vert_.push_back(new ftl::cuda::TextureObject<float4>(size.width, size.height)); + contributions_.push_back(new ftl::cuda::TextureObject<float>(size.width, size.height)); + } + + // Make sure all buffers are at correct resolution and are allocated + for (size_t i=0; i<in.frames.size(); ++i) { + auto &f = in.frames[i]; + auto size = f.get<GpuMat>(Channel::Depth).size(); + centroid_horiz_[i]->create(size.height, size.width); + normals_horiz_[i]->create(size.height, size.width); + centroid_vert_[i]->create(size.width, size.height); + contributions_[i]->create(size.width, size.height); + + if (!f.hasChannel(Channel::Normals)) { + throw FTL_Error("Required normals channel missing for MLS"); + } + if (!f.hasChannel(Channel::Support2) && !f.hasChannel(Channel::Support1)) { + throw FTL_Error("Required cross support channel missing for MLS"); + } + + // Create required channels + f.create<GpuMat>(Channel::Confidence, Format<float>(size)); + f.createTexture<float>(Channel::Confidence); + f.create<GpuMat>(Channel::Screen, Format<short2>(size)); + f.createTexture<short2>(Channel::Screen); + + f.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); + } + + //for (int iter=0; iter<iters; ++iter) { + // Step 1: + // Calculate correspondences and adjust depth values + // Step 2: + // Find corresponding points and perform aggregation of any correspondences + // For each camera combination + if (do_corr) { + for (size_t i=0; i<in.frames.size(); ++i) { + auto &f1 = in.frames[i]; + //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream); + //f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); + + Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0); + d1 = f1.getPose() * d1; + + for (size_t j=i+1; j<in.frames.size(); ++j) { + if (i == j) continue; + + //LOG(INFO) << "Running phase1"; + + auto &f2 = in.frames[j]; + //auto s1 = in.sources[i]; + //auto s2 = in.sources[j]; + + // Are cameras facing similar enough direction? + Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0); + d2 = f2.getPose() * d2; + // No, so skip this combination + if (d1.dot(d2) <= 0.0) continue; + + auto pose = MatrixConversion::toCUDA(f1.getPose().cast<float>().inverse() * f2.getPose().cast<float>()); + auto pose2 = MatrixConversion::toCUDA(f2.getPose().cast<float>().inverse() * f1.getPose().cast<float>()); + + //auto transform = pose2 * pose1; + + //Calculate screen positions of estimated corresponding points + //if (iter == 0) { + ftl::cuda::correspondence( + f1.getTexture<float>(Channel::Depth), + f2.getTexture<float>(Channel::Depth), + f1.getTexture<short2>(Channel::Screen), + f1.getTexture<float>(Channel::Confidence), + f1.getTexture<uint8_t>(Channel::Mask), + pose2, + f1.getLeftCamera(), + f2.getLeftCamera(), + params, + win, + stream + ); + ftl::cuda::correspondence( + f2.getTexture<float>(Channel::Depth), + f1.getTexture<float>(Channel::Depth), + f2.getTexture<short2>(Channel::Screen), + f2.getTexture<float>(Channel::Confidence), + f2.getTexture<uint8_t>(Channel::Mask), + pose, + f2.getLeftCamera(), + f1.getLeftCamera(), + params, + win, + stream + ); + + if (show_consistency) { + ftl::cuda::show_cor_error(f1.getTexture<uchar4>(Channel::Colour), f1.getTexture<short2>(Channel::Screen), f2.getTexture<short2>(Channel::Screen), stream); + ftl::cuda::show_cor_error(f2.getTexture<uchar4>(Channel::Colour), f2.getTexture<short2>(Channel::Screen), f1.getTexture<short2>(Channel::Screen), stream); + } + + if (show_adjustment) { + ftl::cuda::show_depth_adjustment(f1.getTexture<uchar4>(Channel::Colour), f1.getTexture<float>(Channel::Confidence), stream); + ftl::cuda::show_depth_adjustment(f2.getTexture<uchar4>(Channel::Colour), f2.getTexture<float>(Channel::Confidence), stream); + } + + // Actually perform the adjustment + cv::cuda::add(f1.get<GpuMat>(Channel::Depth), f1.get<GpuMat>(Channel::Confidence), f1.get<GpuMat>(Channel::Depth), cv::noArray(), -1, cvstream); + cv::cuda::add(f2.get<GpuMat>(Channel::Depth), f2.get<GpuMat>(Channel::Confidence), f2.get<GpuMat>(Channel::Depth), cv::noArray(), -1, cvstream); + //} //else { + /*ftl::cuda::correspondence( + f1.getTexture<float>(Channel::Depth), + f2.getTexture<float>(Channel::Depth), + f1.getTexture<uchar4>(Channel::Colour), + f2.getTexture<uchar4>(Channel::Colour), + // TODO: Add normals and other things... + f1.getTexture<short2>(Channel::Screen), + f1.getTexture<float>(Channel::Confidence), + f1.getTexture<uint8_t>(Channel::Mask), + pose2, + f1.getLeftCamera(), + f2.getLeftCamera(), + params, + win, + stream + ); + ftl::cuda::correspondence( + f2.getTexture<float>(Channel::Depth), + f1.getTexture<float>(Channel::Depth), + f2.getTexture<uchar4>(Channel::Colour), + f1.getTexture<uchar4>(Channel::Colour), + // TODO: Add normals and other things... + f2.getTexture<short2>(Channel::Screen), + f2.getTexture<float>(Channel::Confidence), + f2.getTexture<uint8_t>(Channel::Mask), + pose, + f2.getLeftCamera(), + f1.getLeftCamera(), + params, + win, + stream + );*/ + //} + + // Also calculate best source for each point + /*ftl::cuda::best_sources( + f1.getTexture<uchar4>(Channel::Support1), + f2.getTexture<uchar4>(Channel::Support1), + f1.getTexture<float>(Channel::Depth), + f2.getTexture<float>(Channel::Depth), + f1.getTexture<short2>(Channel::Screen), + transform, + s1->parameters(), + s2->parameters(), + i, j, stream + );*/ + } + } + + // Reduce window size for next iteration + //win = max(win>>1, 4); + } + + for (int iter=0; iter<iters; ++iter) { + + // Find best source for every pixel + /*for (size_t i=0; i<in.frames.size(); ++i) { + auto &f1 = in.frames[i]; + //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream); + //f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); + + Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0); + d1 = in.sources[i]->getPose() * d1; + + for (size_t j=0; j<in.frames.size(); ++j) { + if (i == j) continue; + + //LOG(INFO) << "Running phase1"; + + auto &f2 = in.frames[j]; + auto s1 = in.sources[i]; + auto s2 = in.sources[j]; + + // Are cameras facing similar enough direction? + Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0); + d2 = in.sources[j]->getPose() * d2; + // No, so skip this combination + if (d1.dot(d2) <= 0.0) continue; + + auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>()); + auto pose1_inv = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse()); + auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse()); + auto pose2_inv = MatrixConversion::toCUDA(s2->getPose().cast<float>()); + + auto transform = pose2 * pose1; + + // Also calculate best source for each point + ftl::cuda::best_sources( + f1.getTexture<float4>(Channel::Normals), + f2.getTexture<float4>(Channel::Normals), + f1.getTexture<uchar4>(Channel::Support1), + f2.getTexture<uchar4>(Channel::Support1), + f1.getTexture<float>(Channel::Depth), + f2.getTexture<float>(Channel::Depth), + f1.getTexture<short2>(Channel::Screen), + transform, + s1->parameters(), + s2->parameters(), + i, j, stream + ); + } + }*/ + + if (radius > 0) { + // Step 2: + // Do the horizontal and vertical MLS aggregations for each source + // But don't do the final move step. + for (size_t i=0; i<in.frames.size(); ++i) { + auto &f = in.frames[i]; + //auto *s = in.sources[i]; + + // Clear data + //cv::cuda::GpuMat data(contributions_[i]->height(), contributions_[i]->width(), CV_32F, contributions_[i]->devicePtr(), contributions_[i]->pixelPitch()); + //data.setTo(cv::Scalar(0.0f), cvstream); + + if (cull_zero && iter == iters-1) { + ftl::cuda::zero_confidence( + f.getTexture<float>(Channel::Confidence), + f.getTexture<float>(Channel::Depth), + stream + ); + } + + float thresh = (1.0f / f.getLeft().fx) * disconPixels; + + ftl::cuda::mls_aggr_horiz( + f.createTexture<uchar4>((f.hasChannel(Channel::Support2)) ? Channel::Support2 : Channel::Support1), + f.createTexture<half4>(Channel::Normals), + *normals_horiz_[i], + f.createTexture<float>(Channel::Depth), + *centroid_horiz_[i], + f.createTexture<uchar4>(Channel::Colour), + thresh, + col_smooth, + radius, + f.getLeftCamera(), + stream + ); + + ftl::cuda::mls_aggr_vert( + f.getTexture<uchar4>((f.hasChannel(Channel::Support2)) ? Channel::Support2 : Channel::Support1), + *normals_horiz_[i], + f.getTexture<half4>(Channel::Normals), + *centroid_horiz_[i], + *centroid_vert_[i], + thresh, + col_smooth, + radius, + f.getLeftCamera(), + stream + ); + } + + //return true; + + + // Step 3: + // Find corresponding points and perform aggregation of any correspondences + // For each camera combination + if (do_aggr) { + for (size_t i=0; i<in.frames.size(); ++i) { + auto &f1 = in.frames[i]; + //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream); + //f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); + + Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0); + d1 = f1.getPose() * d1; + + for (size_t j=0; j<in.frames.size(); ++j) { + if (i == j) continue; + + //LOG(INFO) << "Running phase1"; + + auto &f2 = in.frames[j]; + //auto s1 = in.sources[i]; + //auto s2 = in.sources[j]; + + // Are cameras facing similar enough direction? + Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0); + d2 = f2.getPose() * d2; + // No, so skip this combination + if (d1.dot(d2) <= 0.0) continue; + + auto pose1 = MatrixConversion::toCUDA(f1.getPose().cast<float>()); + //auto pose1_inv = MatrixConversion::toCUDA(f1.getPose().cast<float>().inverse()); + auto pose2 = MatrixConversion::toCUDA(f2.getPose().cast<float>().inverse()); + //auto pose2_inv = MatrixConversion::toCUDA(f2.getPose().cast<float>()); + + auto transform = pose2 * pose1; + + // For the corresponding points, combine normals and centroids + ftl::cuda::aggregate_sources( + f1.getTexture<half4>(Channel::Normals), + f2.getTexture<half4>(Channel::Normals), + *centroid_vert_[i], + *centroid_vert_[j], + f1.getTexture<float>(Channel::Depth), + //contributions_[i], + //contributions_[j], + //f1.getTexture<short2>(Channel::Screen), + transform, + f1.getLeftCamera(), + f2.getLeftCamera(), + stream + ); + + //LOG(INFO) << "Correspondences done... " << i; + } + } + } + + // Step 3: + // Normalise aggregations and move the points + for (size_t i=0; i<in.frames.size(); ++i) { + auto &f = in.frames[i]; + //auto *s = in.sources[i]; + auto size = f.get<GpuMat>(Channel::Depth).size(); + + /*if (do_corr) { + ftl::cuda::normalise_aggregations( + f.getTexture<float4>(Channel::Normals), + centroid_vert_[i], + contributions_[i], + stream + ); + }*/ + + ftl::cuda::mls_adjust_depth( + f.getTexture<half4>(Channel::Normals), + *centroid_vert_[i], + f.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(size)), + f.getLeftCamera(), + stream + ); + + f.swapChannels(Channel::Depth, Channel::Depth2); + + /*if (show_best_source) { + ftl::cuda::vis_best_sources( + f.getTexture<short2>(Channel::Screen), + f.getTexture<uchar4>(Channel::Colour), + i, + 7, stream + ); + }*/ + } + } + } + + return true; +} diff --git a/components/operators/src/mvmls_cuda.hpp b/components/operators/src/fusion/mvmls_cuda.hpp similarity index 78% rename from components/operators/src/mvmls_cuda.hpp rename to components/operators/src/fusion/mvmls_cuda.hpp index 49dd0c1db15e45606f1fe791879c05cf75dfe1f1..4ee589e0d1e5af86c50aab2bd83bee374f3b5048 100644 --- a/components/operators/src/mvmls_cuda.hpp +++ b/components/operators/src/fusion/mvmls_cuda.hpp @@ -11,13 +11,6 @@ namespace cuda { struct MvMLSParams { float spatial_smooth; float colour_smooth; - float fill_match; - float fill_threshold; - float match_threshold; - float cost_ratio; - float cost_threshold; - float range; - uint flags; }; void correspondence( @@ -33,6 +26,17 @@ void correspondence( const ftl::rgbd::Camera &cam2, const ftl::cuda::MvMLSParams ¶ms, int func, cudaStream_t stream); +void correspondence( + ftl::cuda::TextureObject<float> &d1, + ftl::cuda::TextureObject<float> &d2, + ftl::cuda::TextureObject<short2> &screen, + ftl::cuda::TextureObject<float> &conf, + ftl::cuda::TextureObject<uint8_t> &mask, + float4x4 &pose, + const ftl::rgbd::Camera &cam1, + const ftl::rgbd::Camera &cam2, const ftl::cuda::MvMLSParams ¶ms, int func, + cudaStream_t stream); + void zero_confidence( ftl::cuda::TextureObject<float> &conf, ftl::cuda::TextureObject<float> &depth, @@ -93,6 +97,17 @@ void normalise_aggregations( ftl::cuda::TextureObject<float> &contribs, cudaStream_t stream); +void show_cor_error( + ftl::cuda::TextureObject<uchar4> &colour, + ftl::cuda::TextureObject<short2> &screen1, + ftl::cuda::TextureObject<short2> &screen2, + cudaStream_t stream); + +void show_depth_adjustment( + ftl::cuda::TextureObject<uchar4> &colour, + ftl::cuda::TextureObject<float> &adjust, + cudaStream_t stream); + } } diff --git a/components/operators/src/mask.cpp b/components/operators/src/mask.cpp index 302b1b39555401cc4bf83091a44def76d91fa6ab..beec2bd285f496a2d91f2fbbe6fbbbd39ffce090 100644 --- a/components/operators/src/mask.cpp +++ b/components/operators/src/mask.cpp @@ -62,12 +62,14 @@ bool CullDiscontinuity::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaS uint8_t maskID = config()->value("mask_id", (unsigned int)ftl::cuda::Mask::kMask_Discontinuity); unsigned int radius = config()->value("radius", 2); + bool inverted = config()->value("invert", false); out.clearPackets(Channel::Depth); // Force reset ftl::cuda::cull_mask( in.createTexture<uint8_t>(Channel::Mask), out.createTexture<float>(Channel::Depth), maskID, + inverted, radius, stream ); diff --git a/components/operators/src/mask.cu b/components/operators/src/mask.cu index 4975bcd069f204d3ec1015d8af871e1ad861569f..7c7ce32ad2775433dcd239bf0024ee36185b4d3a 100644 --- a/components/operators/src/mask.cu +++ b/components/operators/src/mask.cu @@ -68,7 +68,7 @@ void ftl::cuda::discontinuity( ftl::cuda::TextureObject<uint8_t> &mask_out, ftl: // ============================================================================= -template <int RADIUS> +template <int RADIUS, bool INVERT> __global__ void cull_mask_kernel(ftl::cuda::TextureObject<uint8_t> mask, ftl::cuda::TextureObject<float> depth, uint8_t id) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; @@ -85,24 +85,36 @@ __global__ void cull_mask_kernel(ftl::cuda::TextureObject<uint8_t> mask, ftl::cu } } - if (isdiscon) { + if ((!INVERT && isdiscon) || (INVERT && !isdiscon)) { depth(x,y) = 0.0f; } } } -void ftl::cuda::cull_mask(ftl::cuda::TextureObject<uint8_t> &mask, ftl::cuda::TextureObject<float> &depth, uint8_t id, unsigned int radius, cudaStream_t stream) { +void ftl::cuda::cull_mask(ftl::cuda::TextureObject<uint8_t> &mask, ftl::cuda::TextureObject<float> &depth, uint8_t id, bool invert, unsigned int radius, cudaStream_t stream) { const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - switch (radius) { - case 0 : cull_mask_kernel<0><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; - case 1 : cull_mask_kernel<1><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; - case 2 : cull_mask_kernel<2><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; - case 3 : cull_mask_kernel<3><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; - case 4 : cull_mask_kernel<4><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; - case 5 : cull_mask_kernel<5><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; - default: break; + if (invert) { + switch (radius) { + case 0 : cull_mask_kernel<0,true><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; + case 1 : cull_mask_kernel<1,true><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; + case 2 : cull_mask_kernel<2,true><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; + case 3 : cull_mask_kernel<3,true><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; + case 4 : cull_mask_kernel<4,true><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; + case 5 : cull_mask_kernel<5,true><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; + default: break; + } + } else { + switch (radius) { + case 0 : cull_mask_kernel<0,false><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; + case 1 : cull_mask_kernel<1,false><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; + case 2 : cull_mask_kernel<2,false><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; + case 3 : cull_mask_kernel<3,false><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; + case 4 : cull_mask_kernel<4,false><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; + case 5 : cull_mask_kernel<5,false><<<gridSize, blockSize, 0, stream>>>(mask, depth, id); break; + default: break; + } } cudaSafeCall( cudaGetLastError() ); diff --git a/components/operators/src/mvmls.cpp b/components/operators/src/mvmls.cpp deleted file mode 100644 index 8a74b7a95f92103e55d5bce371ae38886353c1f2..0000000000000000000000000000000000000000 --- a/components/operators/src/mvmls.cpp +++ /dev/null @@ -1,346 +0,0 @@ -#include <ftl/operators/mvmls.hpp> -#include "smoothing_cuda.hpp" -#include <ftl/utility/matrix_conversion.hpp> -#include "mvmls_cuda.hpp" - -using ftl::operators::MultiViewMLS; -using ftl::codecs::Channel; -using cv::cuda::GpuMat; -using ftl::rgbd::Format; - -MultiViewMLS::MultiViewMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { - -} - -MultiViewMLS::~MultiViewMLS() { - -} - -bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) { - cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); - - // float thresh = config()->value("mls_threshold", 0.4f); - float disconPixels = config()->value("discon_pixels", 100.0f); // Max before definitely not same surface - - float col_smooth = config()->value("mls_colour_smoothing", 30.0f); - int iters = config()->value("mls_iterations", 3); - int radius = config()->value("mls_radius",5); - //bool aggre = config()->value("aggregation", true); - //int win = config()->value("cost_function",1); - int win = config()->value("window_size",16); - bool do_corr = config()->value("merge_corresponding", true); - bool do_aggr = config()->value("merge_mls", false); - bool cull_zero = config()->value("cull_no_confidence", false); - //bool show_best_source = config()->value("show_pixel_source", false); - - ftl::cuda::MvMLSParams params; - params.range = config()->value("search_range", 0.05f); - params.fill_match = config()->value("fill_match", 50.0f); - params.fill_threshold = config()->value("fill_threshold", 0.0f); - params.match_threshold = config()->value("match_threshold", 0.3f); - params.colour_smooth = config()->value("colour_smooth", 150.0f); - //params.spatial_smooth = config()->value("spatial_smooth", 0.04f); - params.cost_ratio = config()->value("cost_ratio", 0.2f); - params.cost_threshold = config()->value("cost_threshold", 1.0f); - - if (in.frames.size() < 1) return false; - auto size = in.frames[0].get<GpuMat>(Channel::Depth).size(); - - // Make sure we have enough buffers - while (normals_horiz_.size() < in.frames.size()) { - normals_horiz_.push_back(new ftl::cuda::TextureObject<half4>(size.height, size.width)); - centroid_horiz_.push_back(new ftl::cuda::TextureObject<float4>(size.height, size.width)); - centroid_vert_.push_back(new ftl::cuda::TextureObject<float4>(size.width, size.height)); - contributions_.push_back(new ftl::cuda::TextureObject<float>(size.width, size.height)); - } - - // Make sure all buffers are at correct resolution and are allocated - for (size_t i=0; i<in.frames.size(); ++i) { - auto &f = in.frames[i]; - auto size = f.get<GpuMat>(Channel::Depth).size(); - centroid_horiz_[i]->create(size.height, size.width); - normals_horiz_[i]->create(size.height, size.width); - centroid_vert_[i]->create(size.width, size.height); - contributions_[i]->create(size.width, size.height); - - if (!f.hasChannel(Channel::Normals)) { - throw FTL_Error("Required normals channel missing for MLS"); - } - if (!f.hasChannel(Channel::Support2)) { - throw FTL_Error("Required cross support channel missing for MLS"); - } - - // Create required channels - f.create<GpuMat>(Channel::Confidence, Format<float>(size)); - f.createTexture<float>(Channel::Confidence); - f.create<GpuMat>(Channel::Screen, Format<short2>(size)); - f.createTexture<short2>(Channel::Screen); - - f.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); - } - - for (int iter=0; iter<iters; ++iter) { - // Step 1: - // Calculate correspondences and adjust depth values - // Step 2: - // Find corresponding points and perform aggregation of any correspondences - // For each camera combination - if (do_corr) { - for (size_t i=0; i<in.frames.size(); ++i) { - auto &f1 = in.frames[i]; - //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream); - //f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); - - Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0); - d1 = f1.getPose() * d1; - - for (size_t j=0; j<in.frames.size(); ++j) { - if (i == j) continue; - - //LOG(INFO) << "Running phase1"; - - auto &f2 = in.frames[j]; - //auto s1 = in.sources[i]; - //auto s2 = in.sources[j]; - - // Are cameras facing similar enough direction? - Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0); - d2 = f2.getPose() * d2; - // No, so skip this combination - if (d1.dot(d2) <= 0.0) continue; - - auto pose2 = MatrixConversion::toCUDA(f2.getPose().cast<float>().inverse() * f1.getPose().cast<float>()); - - //auto transform = pose2 * pose1; - - //Calculate screen positions of estimated corresponding points - ftl::cuda::correspondence( - f1.getTexture<float>(Channel::Depth), - f2.getTexture<float>(Channel::Depth), - f1.getTexture<uchar4>(Channel::Colour), - f2.getTexture<uchar4>(Channel::Colour), - // TODO: Add normals and other things... - f1.getTexture<short2>(Channel::Screen), - f1.getTexture<float>(Channel::Confidence), - f1.getTexture<uint8_t>(Channel::Mask), - pose2, - f1.getLeftCamera(), - f2.getLeftCamera(), - params, - win, - stream - ); - - // Also calculate best source for each point - /*ftl::cuda::best_sources( - f1.getTexture<uchar4>(Channel::Support1), - f2.getTexture<uchar4>(Channel::Support1), - f1.getTexture<float>(Channel::Depth), - f2.getTexture<float>(Channel::Depth), - f1.getTexture<short2>(Channel::Screen), - transform, - s1->parameters(), - s2->parameters(), - i, j, stream - );*/ - } - } - - // Reduce window size for next iteration - win = max(win>>1, 4); - } - - // Find best source for every pixel - /*for (size_t i=0; i<in.frames.size(); ++i) { - auto &f1 = in.frames[i]; - //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream); - //f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); - - Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0); - d1 = in.sources[i]->getPose() * d1; - - for (size_t j=0; j<in.frames.size(); ++j) { - if (i == j) continue; - - //LOG(INFO) << "Running phase1"; - - auto &f2 = in.frames[j]; - auto s1 = in.sources[i]; - auto s2 = in.sources[j]; - - // Are cameras facing similar enough direction? - Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0); - d2 = in.sources[j]->getPose() * d2; - // No, so skip this combination - if (d1.dot(d2) <= 0.0) continue; - - auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>()); - auto pose1_inv = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse()); - auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse()); - auto pose2_inv = MatrixConversion::toCUDA(s2->getPose().cast<float>()); - - auto transform = pose2 * pose1; - - // Also calculate best source for each point - ftl::cuda::best_sources( - f1.getTexture<float4>(Channel::Normals), - f2.getTexture<float4>(Channel::Normals), - f1.getTexture<uchar4>(Channel::Support1), - f2.getTexture<uchar4>(Channel::Support1), - f1.getTexture<float>(Channel::Depth), - f2.getTexture<float>(Channel::Depth), - f1.getTexture<short2>(Channel::Screen), - transform, - s1->parameters(), - s2->parameters(), - i, j, stream - ); - } - }*/ - - // Step 2: - // Do the horizontal and vertical MLS aggregations for each source - // But don't do the final move step. - for (size_t i=0; i<in.frames.size(); ++i) { - auto &f = in.frames[i]; - //auto *s = in.sources[i]; - - // Clear data - //cv::cuda::GpuMat data(contributions_[i]->height(), contributions_[i]->width(), CV_32F, contributions_[i]->devicePtr(), contributions_[i]->pixelPitch()); - //data.setTo(cv::Scalar(0.0f), cvstream); - - if (cull_zero && iter == iters-1) { - ftl::cuda::zero_confidence( - f.getTexture<float>(Channel::Confidence), - f.getTexture<float>(Channel::Depth), - stream - ); - } - - float thresh = (1.0f / f.getLeft().fx) * disconPixels; - - ftl::cuda::mls_aggr_horiz( - f.createTexture<uchar4>(Channel::Support2), - f.createTexture<half4>(Channel::Normals), - *normals_horiz_[i], - f.createTexture<float>(Channel::Depth), - *centroid_horiz_[i], - f.createTexture<uchar4>(Channel::Colour), - thresh, - col_smooth, - radius, - f.getLeftCamera(), - stream - ); - - ftl::cuda::mls_aggr_vert( - f.getTexture<uchar4>(Channel::Support2), - *normals_horiz_[i], - f.getTexture<half4>(Channel::Normals), - *centroid_horiz_[i], - *centroid_vert_[i], - thresh, - col_smooth, - radius, - f.getLeftCamera(), - stream - ); - } - - //return true; - - - // Step 3: - // Find corresponding points and perform aggregation of any correspondences - // For each camera combination - if (do_aggr) { - for (size_t i=0; i<in.frames.size(); ++i) { - auto &f1 = in.frames[i]; - //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream); - //f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); - - Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0); - d1 = f1.getPose() * d1; - - for (size_t j=0; j<in.frames.size(); ++j) { - if (i == j) continue; - - //LOG(INFO) << "Running phase1"; - - auto &f2 = in.frames[j]; - //auto s1 = in.sources[i]; - //auto s2 = in.sources[j]; - - // Are cameras facing similar enough direction? - Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0); - d2 = f2.getPose() * d2; - // No, so skip this combination - if (d1.dot(d2) <= 0.0) continue; - - auto pose1 = MatrixConversion::toCUDA(f1.getPose().cast<float>()); - //auto pose1_inv = MatrixConversion::toCUDA(f1.getPose().cast<float>().inverse()); - auto pose2 = MatrixConversion::toCUDA(f2.getPose().cast<float>().inverse()); - //auto pose2_inv = MatrixConversion::toCUDA(f2.getPose().cast<float>()); - - auto transform = pose2 * pose1; - - // For the corresponding points, combine normals and centroids - ftl::cuda::aggregate_sources( - f1.getTexture<half4>(Channel::Normals), - f2.getTexture<half4>(Channel::Normals), - *centroid_vert_[i], - *centroid_vert_[j], - f1.getTexture<float>(Channel::Depth), - //contributions_[i], - //contributions_[j], - //f1.getTexture<short2>(Channel::Screen), - transform, - f1.getLeftCamera(), - f2.getLeftCamera(), - stream - ); - - //LOG(INFO) << "Correspondences done... " << i; - } - } - } - - // Step 3: - // Normalise aggregations and move the points - for (size_t i=0; i<in.frames.size(); ++i) { - auto &f = in.frames[i]; - //auto *s = in.sources[i]; - auto size = f.get<GpuMat>(Channel::Depth).size(); - - /*if (do_corr) { - ftl::cuda::normalise_aggregations( - f.getTexture<float4>(Channel::Normals), - centroid_vert_[i], - contributions_[i], - stream - ); - }*/ - - ftl::cuda::mls_adjust_depth( - f.getTexture<half4>(Channel::Normals), - *centroid_vert_[i], - f.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(size)), - f.getLeftCamera(), - stream - ); - - f.swapChannels(Channel::Depth, Channel::Depth2); - - /*if (show_best_source) { - ftl::cuda::vis_best_sources( - f.getTexture<short2>(Channel::Screen), - f.getTexture<uchar4>(Channel::Colour), - i, - 7, stream - ); - }*/ - } - } - - return true; -} diff --git a/components/operators/src/weighting.cpp b/components/operators/src/weighting.cpp index 520f136caa9d21e6821f8596f1b7e6d57ce3c9f0..928043d6f5d1f58506447f02ccd7cb2be10c54a2 100644 --- a/components/operators/src/weighting.cpp +++ b/components/operators/src/weighting.cpp @@ -31,18 +31,32 @@ bool PixelWeights::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream params.colour = config()->value("use_colour", false); params.noise = config()->value("use_noise", true); params.normals = config()->value("use_normals", true); + bool output_normals = config()->value("output_normals", params.normals); if (!in.hasChannel(Channel::Depth) || !in.hasChannel(Channel::Support1)) return false; - ftl::cuda::pixel_weighting( - out.createTexture<short>(Channel::Weights, ftl::rgbd::Format<short>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), - out.createTexture<uint8_t>(Channel::Mask, ftl::rgbd::Format<uint8_t>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), - in.createTexture<uchar4>(Channel::Support1), - in.createTexture<float>(Channel::Depth), - in.getLeftCamera(), - in.get<cv::cuda::GpuMat>(Channel::Depth).size(), - params, stream - ); + if (output_normals) { + ftl::cuda::pixel_weighting( + out.createTexture<short>(Channel::Weights, ftl::rgbd::Format<short>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + out.createTexture<uint8_t>(Channel::Mask, ftl::rgbd::Format<uint8_t>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + out.createTexture<half4>(Channel::Normals, ftl::rgbd::Format<half4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + in.createTexture<uchar4>(Channel::Support1), + in.createTexture<float>(Channel::Depth), + in.getLeftCamera(), + in.get<cv::cuda::GpuMat>(Channel::Depth).size(), + params, stream + ); + } else { + ftl::cuda::pixel_weighting( + out.createTexture<short>(Channel::Weights, ftl::rgbd::Format<short>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + out.createTexture<uint8_t>(Channel::Mask, ftl::rgbd::Format<uint8_t>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + in.createTexture<uchar4>(Channel::Support1), + in.createTexture<float>(Channel::Depth), + in.getLeftCamera(), + in.get<cv::cuda::GpuMat>(Channel::Depth).size(), + params, stream + ); + } return true; } diff --git a/components/operators/src/weighting.cu b/components/operators/src/weighting.cu index 286207f391a6f90259b813150e4a26286f245ec8..ccb5d7f4e31389edd17a22a139b9391d40b16068 100644 --- a/components/operators/src/weighting.cu +++ b/components/operators/src/weighting.cu @@ -14,6 +14,7 @@ __device__ inline float square(float v) { return v*v; } __global__ void pixel_weight_kernel( ftl::cuda::TextureObject<short> weight_out, ftl::cuda::TextureObject<uint8_t> mask_out, + ftl::cuda::TextureObject<half4> normals_out, ftl::cuda::TextureObject<uchar4> support, ftl::cuda::TextureObject<float> depth, ftl::rgbd::Camera camera, @@ -63,6 +64,7 @@ __global__ void pixel_weight_kernel( if(l > 0.0f) { n = n / -l; + if (normals_out.isValid()) normals_out(x,y) = make_half4(n, 0.0f); float3 ray = camera.screenToCam(x, y, d); ray = ray / length(ray); weight *= min(1.0f, 1.4*dot(ray,n)); @@ -115,7 +117,28 @@ void ftl::cuda::pixel_weighting( const dim3 gridSize((size.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (size.height + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - pixel_weight_kernel<<<gridSize, blockSize, 0, stream>>>(weight_out, mask_out, support, depth, camera, size, params); + pixel_weight_kernel<<<gridSize, blockSize, 0, stream>>>(weight_out, mask_out, ftl::cuda::TextureObject<half4>(), support, depth, camera, size, params); + cudaSafeCall( cudaGetLastError() ); + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + #endif +} + +void ftl::cuda::pixel_weighting( + ftl::cuda::TextureObject<short> &weight_out, + ftl::cuda::TextureObject<ftl::cuda::Mask::type> &mask_out, + ftl::cuda::TextureObject<half4> &normals_out, + ftl::cuda::TextureObject<uchar4> &support, + ftl::cuda::TextureObject<float> &depth, + const ftl::rgbd::Camera &camera, + const cv::Size size, const ftl::cuda::PixelWeightingParameters ¶ms, + cudaStream_t stream) { + + const dim3 gridSize((size.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (size.height + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + pixel_weight_kernel<<<gridSize, blockSize, 0, stream>>>(weight_out, mask_out, normals_out, support, depth, camera, size, params); cudaSafeCall( cudaGetLastError() ); #ifdef _DEBUG diff --git a/components/operators/src/weighting_cuda.hpp b/components/operators/src/weighting_cuda.hpp index 7cd544012db66c59c578c808e3abc3351d9c54b1..782aae3df36b214f2962093890c0b8a72b3f1ef9 100644 --- a/components/operators/src/weighting_cuda.hpp +++ b/components/operators/src/weighting_cuda.hpp @@ -26,6 +26,16 @@ void pixel_weighting( const cv::Size size, const ftl::cuda::PixelWeightingParameters ¶ms, cudaStream_t stream); +void pixel_weighting( + ftl::cuda::TextureObject<short> &weight_out, + ftl::cuda::TextureObject<ftl::cuda::Mask::type> &mask_out, + ftl::cuda::TextureObject<half4> &normals_out, + ftl::cuda::TextureObject<uchar4> &support, + ftl::cuda::TextureObject<float> &depth, + const ftl::rgbd::Camera &camera, + const cv::Size size, const ftl::cuda::PixelWeightingParameters ¶ms, + cudaStream_t stream); + void cull_weight( ftl::cuda::TextureObject<short> &weights, ftl::cuda::TextureObject<float> &depth, diff --git a/components/renderers/cpp/src/CUDARender.cpp b/components/renderers/cpp/src/CUDARender.cpp index c346b1809182aa0815e7140a54efbda6397c385b..799b990f26d28acc533f38b0739b30b2260deaca 100644 --- a/components/renderers/cpp/src/CUDARender.cpp +++ b/components/renderers/cpp/src/CUDARender.cpp @@ -519,9 +519,7 @@ void CUDARender::_updateParameters(ftl::rgbd::Frame &out) { } void CUDARender::_preprocessColours() { - bool show_discon = value("show_discontinuity_mask", false); - bool show_noise = value("show_noise_mask", false); - bool show_fill = value("show_filled", false); + uint8_t show_mask = value("show_mask", 0); bool colour_sources = value("colour_sources", false); cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream_); @@ -536,14 +534,8 @@ void CUDARender::_preprocessColours() { } if (f.hasChannel(Channel::Mask)) { - if (show_noise) { - ftl::cuda::show_mask(f.getTexture<uchar4>(Channel::Colour), f.getTexture<uint8_t>(Channel::Mask), Mask::kMask_Noise, make_uchar4(0,255,0,255), stream_); - } - if (show_discon) { - ftl::cuda::show_mask(f.getTexture<uchar4>(Channel::Colour), f.getTexture<uint8_t>(Channel::Mask), Mask::kMask_Discontinuity, make_uchar4(0,0,255,255), stream_); - } - if (show_fill) { - ftl::cuda::show_mask(f.getTexture<uchar4>(Channel::Colour), f.getTexture<uint8_t>(Channel::Mask), Mask::kMask_Filled, make_uchar4(0,255,0,255), stream_); + if (show_mask > 0) { + ftl::cuda::show_mask(f.getTexture<uchar4>(Channel::Colour), f.getTexture<uint8_t>(Channel::Mask), show_mask, make_uchar4(255,0,255,255), stream_); } }