diff --git a/components/operators/src/correspondence.cu b/components/operators/src/correspondence.cu index 1611a8483f8faf2bcb6b56640c3f7a07edc3fb22..05d60106059df6855b65003af076bb751176e56f 100644 --- a/components/operators/src/correspondence.cu +++ b/components/operators/src/correspondence.cu @@ -64,6 +64,15 @@ __device__ inline int halfWarpCensus(float e) { return c; } +__device__ inline float halfWarpBest(float e, float c) { + for (int i = WARP_SIZE/4; i > 0; i /= 2) { + const float o1 = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + const float o2 = __shfl_xor_sync(FULL_MASK, c, i, WARP_SIZE); + e = (o2 > c) ? o1 : e; + } + return e; +} + __device__ inline float4 relativeDelta(const float4 &e) { const float e0x = __shfl_sync(FULL_MASK, e.x, 0, WARP_SIZE/2); const float e0y = __shfl_sync(FULL_MASK, e.y, 0, WARP_SIZE/2); @@ -142,14 +151,14 @@ __global__ void corresponding_point_kernel( float depthPos2 = camPosOrigin.z - (float((COR_STEPS/2)) * depthM2); uint badMask = 0; - int bestStep = 0; + int bestStep = COR_STEPS/2; // Project to p2 using cam2 // Each thread takes a possible correspondence and calculates a weighting //const int lane = tid % WARP_SIZE; for (int i=0; i<COR_STEPS; ++i) { - float weight = (linePos.x >= cam2.width || linePos.y >= cam2.height) ? 0.0f : 1.0f; + //float weight = 1.0f; //(linePos.x >= cam2.width || linePos.y >= cam2.height) ? 0.0f : 1.0f; // Generate a colour correspondence value const auto colour2 = c2.tex2D(linePos.x, linePos.y); @@ -161,10 +170,18 @@ __global__ void corresponding_point_kernel( const float depth2 = d2.tex2D(int(linePos.x+0.5f), int(linePos.y+0.5f)); // Record which correspondences are invalid - badMask |= (depth2 <= cam2.minDepth || depth2 >= cam2.maxDepth) ? 1 << i : 0; + badMask |= ( + depth2 <= cam2.minDepth || + depth2 >= cam2.maxDepth || + linePos.x < 0.5f || + linePos.y < 0.5f || + linePos.x >= d2.width()-0.5f || + linePos.y >= d2.height()-0.5f + ) ? 1 << i : 0; //if (FUNCTION == 1) { - weight *= ftl::cuda::weighting(fabs(depth2 - depthPos2), cweight*params.spatial_smooth); + float weight = ftl::cuda::weighting(fabs(depth2 - depthPos2), cweight*params.spatial_smooth); + //weight = ftl::cuda::halfWarpSum(weight); //} else { // const float dweight = ftl::cuda::weighting(fabs(depth2 - depthPos2), params.spatial_smooth); // weight *= weightFunction<FUNCTION>(params, dweight, cweight); @@ -191,23 +208,26 @@ __global__ void corresponding_point_kernel( } //const float avgcolour = totalcolour/(float)count; - const float confidence = bestcolour / totalcolour; //bestcolour - avgcolour; - const float bestadjust = float(bestStep-(COR_STEPS/2))*depthM; + const float confidence = ((bestcolour / totalcolour) - (1.0f / 16.0f)) * (1.0f + (1.0f/16.0f)); + float bestadjust = float(bestStep-(COR_STEPS/2))*depthM; // Detect matches to boundaries, and discard those uint stepMask = 1 << bestStep; - if ((stepMask & (badMask << 1)) || (stepMask & (badMask >> 1))) bestweight = 0.0f; + if ((stepMask & badMask) || (stepMask & (badMask << 1)) || (stepMask & (badMask >> 1))) bestweight = 0.0f; + + //bestadjust = halfWarpBest(bestadjust, (bestweight > 0.0f) ? confidence : 0.0f); //Mask m(mask.tex2D(x,y)); //if (bestweight > 0.0f) { float old = conf.tex2D(x,y); - if (bestweight * confidence > old) { + if (bestweight > 0.0f) { d1(x,y) = (0.4f*bestadjust) + depth1; //d2(bestScreen.x, bestScreen.y) = bestdepth2; //screenOut(x,y) = bestScreen; - conf(x,y) = bestweight * confidence; + conf(x,y) = max(old,confidence); //bestweight * confidence; + //conf(x,y) = max(old,fabs(bestadjust)); } //} diff --git a/components/operators/src/mvmls.cpp b/components/operators/src/mvmls.cpp index 4a61e63223bc14e07b9dd90695a6600b4be99b7e..8ff3c89dd8ce9f10882e4c5386b4de631f7836b9 100644 --- a/components/operators/src/mvmls.cpp +++ b/components/operators/src/mvmls.cpp @@ -72,6 +72,8 @@ bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda f.createTexture<float>(Channel::Confidence); f.create<GpuMat>(Channel::Screen, Format<short2>(size)); f.createTexture<short2>(Channel::Screen); + + f.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); } for (int iter=0; iter<iters; ++iter) { @@ -84,7 +86,7 @@ bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda for (size_t i=0; i<in.frames.size(); ++i) { auto &f1 = in.frames[i]; //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream); - f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); + //f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0); d1 = in.sources[i]->getPose() * d1; diff --git a/components/renderers/cpp/include/ftl/cuda/warp.hpp b/components/renderers/cpp/include/ftl/cuda/warp.hpp index 8c0fdef9802c1ffb86e226c803d806ea56ea003e..10fc460f3f33a30fadce45912caf72d4a7a86ac3 100644 --- a/components/renderers/cpp/include/ftl/cuda/warp.hpp +++ b/components/renderers/cpp/include/ftl/cuda/warp.hpp @@ -76,6 +76,14 @@ __device__ inline int halfWarpSum(int e) { return e; } +__device__ inline float halfWarpMul(float e) { + for (int i = WARP_SIZE/4; i > 0; i /= 2) { + const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e *= other; + } + return e; +} + } } diff --git a/components/renderers/cpp/src/reprojection.cu b/components/renderers/cpp/src/reprojection.cu index b23ad81e9c462f9d4aeb2b6a83d53331487b1db5..9c414f8927572f93795ea4bfb2e17c491f2deb44 100644 --- a/components/renderers/cpp/src/reprojection.cu +++ b/components/renderers/cpp/src/reprojection.cu @@ -97,7 +97,8 @@ __global__ void reprojection_kernel( const auto input = in.tex2D(screenPos.x, screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos); // TODO: Z checks need to interpolate between neighbors if large triangles are used - float weight = ftl::cuda::weighting(fabs(camPos.z - d2), 0.02f); + //float weight = ftl::cuda::weighting(fabs(camPos.z - d2), params.depthThreshold); + float weight = (fabs(camPos.z - d2) <= params.depthThreshold) ? 1.0f : 0.0f; /* Buehler C. et al. 2001. Unstructured Lumigraph Rendering. */ /* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. */ diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu index 2e1966c4252fcf751639cc70a94dbfbdccd43b3b..61f44f320e0b889649d1f82c5f2c090d326e1221 100644 --- a/components/renderers/cpp/src/triangle_render.cu +++ b/components/renderers/cpp/src/triangle_render.cu @@ -77,7 +77,7 @@ __device__ static float3 calculateBarycentricCoordinate(const short2 (&tri)[3], const short2 &point) { float beta = calculateBarycentricCoordinateValue(tri[0], point, tri[2], tri); float gamma = calculateBarycentricCoordinateValue(tri[0], tri[1], point, tri); - float alpha = 1.0 - beta - gamma; + float alpha = 1.0f - beta - gamma; return make_float3(alpha, beta, gamma); } @@ -86,9 +86,9 @@ __device__ static */ __host__ __device__ static bool isBarycentricCoordInBounds(const float3 &barycentricCoord) { - return barycentricCoord.x >= 0.0 && barycentricCoord.x <= 1.0 && - barycentricCoord.y >= 0.0 && barycentricCoord.y <= 1.0 && - barycentricCoord.z >= 0.0 && barycentricCoord.z <= 1.0; + return barycentricCoord.x >= -0.0001f && //barycentricCoord.x <= 1.0f && + barycentricCoord.y >= -0.0001f && //barycentricCoord.y <= 1.0f && + barycentricCoord.z >= -0.0001f; // &&barycentricCoord.z <= 1.0f; } /** @@ -137,6 +137,11 @@ float getZAtCoordinate(const float3 &barycentricCoord, const float (&tri)[3]) { const int maxX = max(v[0].x, max(v[1].x, v[2].x)); const int maxY = max(v[0].y, max(v[1].y, v[2].y)); + // Ensure the points themselves are drawn + //atomicMin(&depth_out(v[0].x,v[0].y), int(d[0]*100000.0f)); + //atomicMin(&depth_out(v[1].x,v[1].y), int(d[1]*100000.0f)); + //atomicMin(&depth_out(v[2].x,v[2].y), int(d[2]*100000.0f)); + // Remove really large triangles if ((maxX - minX) * (maxY - minY) > params.triangle_limit) return;