diff --git a/applications/reconstruct/src/ilw/ilw.cpp b/applications/reconstruct/src/ilw/ilw.cpp index 3f563e25b9016650ef889d531a16ade506b03844..8d8f938678e58d1a01f40b64c1fbd96e86b08014 100644 --- a/applications/reconstruct/src/ilw/ilw.cpp +++ b/applications/reconstruct/src/ilw/ilw.cpp @@ -217,10 +217,13 @@ bool ILW::_phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream) { for (size_t i=0; i<fs.frames.size(); ++i) { auto &f = fs.frames[i]; + auto pose = MatrixConversion::toCUDA(fs.sources[i]->getPose().cast<float>()); //.inverse()); + ftl::cuda::move_points( f.getTexture<float4>(Channel::Points), f.getTexture<float4>(Channel::EnergyVector), fs.sources[i]->parameters(), + pose, rate, motion_window_, stream diff --git a/applications/reconstruct/src/ilw/ilw.cu b/applications/reconstruct/src/ilw/ilw.cu index c2d7600aa1d77b6c4672a4bef52f870fd1274411..45025990555be66fd0c9bba41d9ab6f2003ca19b 100644 --- a/applications/reconstruct/src/ilw/ilw.cu +++ b/applications/reconstruct/src/ilw/ilw.cu @@ -27,6 +27,8 @@ __device__ inline float warpSum(float e) { //#define COR_WIN_RADIUS 17 //#define COR_WIN_SIZE (COR_WIN_RADIUS * COR_WIN_RADIUS) +#define WINDOW_RADIUS 2 + template<int COR_STEPS> __global__ void correspondence_energy_vector_kernel( TextureObject<float4> p1, @@ -57,7 +59,7 @@ __global__ void correspondence_energy_vector_kernel( float bestdepth; int count = 0; - const float step_interval = 0.05f / COR_STEPS; + const float step_interval = 0.05f / (COR_STEPS / 2); // Project to p2 using cam2 // Each thread takes a possible correspondence and calculates a weighting @@ -72,28 +74,37 @@ __global__ void correspondence_energy_vector_kernel( if (screen.x >= cam2.width || screen.y >= cam2.height) continue; - // Now do correspondence evaluation at "screen" location in camera 2 - const float3 world2 = make_float3(p2.tex2D((int)screen.x, (int)screen.y)); - if ((params.flags & ftl::cuda::kILWFlag_IgnoreBad) && world2.x == MINF) continue; - const uchar4 colour2 = c2.tex2D((int)screen.x, (int)screen.y); - - // Determine degree of correspondence - float cost = 1.0f - ftl::cuda::spatialWeighting(world1, world2, params.spatial_smooth); - // Point is too far away to even count - if (world2.x != MINF && cost == 1.0f) continue; - - // Mix ratio of colour and distance costs - const float ccost = 1.0f - ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth); - if ((params.flags & ftl::cuda::kILWFlag_SkipBadColour) && ccost == 1.0f) continue; - cost = params.cost_ratio * (ccost) + (1.0f - params.cost_ratio) * cost; - //cost /= 2.0f; - - ++count; - avgcost += cost; - if (world2.x != MINF && cost < bestcost) { - bestdepth = depth_adjust; - bestcost = cost; - } + // Small window around suggested point + //for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) { + //for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) { + const int u = 0; + const int v = 0; + + // Now do correspondence evaluation at "screen" location in camera 2 + const float3 world2 = make_float3(p2.tex2D((int)screen.x+u, (int)screen.y+v)); + if ((params.flags & ftl::cuda::kILWFlag_IgnoreBad) && world2.x == MINF) continue; + const uchar4 colour2 = c2.tex2D((int)screen.x+u, (int)screen.y+v); + + // Determine degree of correspondence + float cost = 1.0f - ftl::cuda::spatialWeighting(worldPos, world2, params.spatial_smooth); + // Point is too far away to even count + if (world2.x != MINF && cost == 1.0f) continue; + + // Mix ratio of colour and distance costs + const float ccost = 1.0f - ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth); + if ((params.flags & ftl::cuda::kILWFlag_SkipBadColour) && ccost == 1.0f) continue; + cost = params.cost_ratio * (ccost) + (1.0f - params.cost_ratio) * cost; + //cost /= 2.0f; + + ++count; + avgcost += cost; + if (world2.x != MINF && cost < bestcost) { + bestdepth = depth_adjust; + bestcost = cost; + } + + //} + //} } count = warpSum(count); @@ -104,18 +115,22 @@ __global__ void correspondence_energy_vector_kernel( // FIXME: Multiple threads in warp could match this. if (best && mincost < 1.0f) { - float3 tvecA = pose1 * cam1.screenToCam(x, y, bestdepth); + //float3 tvecA = pose1 * cam1.screenToCam(x, y, bestdepth); //float3 tvecB = pose1 * world1; //if (params.flags & ftl::cuda::kILWFlag_RestrictZ) { // tvecA.x = tvecB.x; // tvecA.y = tvecB.y; //} - tvecA = tvecA - world1; - vout(x,y) = make_float4( - tvecA.x, // * (1.0f - mincost) * confidence, - tvecA.y, // * (1.0f - mincost) * confidence, - tvecA.z, // * (1.0f - mincost) * confidence, - (1.0f - mincost) * confidence); + //tvecA = tvecA - world1; + float4 old = vout.tex2D(x,y); + + if ((1.0f - mincost) * confidence > old.w) { + vout(x,y) = make_float4( + depth1, // * (1.0f - mincost) * confidence, + 0.0f, // * (1.0f - mincost) * confidence, + bestdepth-depth1, // * (1.0f - mincost) * confidence, + (1.0f - mincost) * confidence); + } //eout(x,y) = max(eout(x,y), (length(bestpoint-world1) / 0.04f) * 7.0f); //eout(x,y) = max(eout(x,y), (1.0f - mincost) * 7.0f); @@ -166,16 +181,19 @@ __global__ void move_points_kernel( ftl::cuda::TextureObject<float4> p, ftl::cuda::TextureObject<float4> ev, ftl::rgbd::Camera camera, + float4x4 pose, float rate) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float4 vec0 = ev.tex2D((int)x,(int)y); if (x < p.width() && y < p.height()) { const float4 world = p(x,y); if (world.x == MINF) return; - float4 vec = make_float4(0.0f, 0.0f, 0.0f, 0.0f); //ev.tex2D((int)x,(int)y); + float delta = 0.0f; //make_float4(0.0f, 0.0f, 0.0f, 0.0f); //ev.tex2D((int)x,(int)y); float contrib = 0.0f; // Calculate screen space distortion with neighbours @@ -185,14 +203,15 @@ __global__ void move_points_kernel( const float3 pn = make_float3(p.tex2D((int)x+u,(int)y+v)); if (pn.x == MINF) continue; - const float s = ftl::cuda::spatialWeighting(pn, make_float3(world), 0.01f); + const float s = ftl::cuda::spatialWeighting(pn, make_float3(world), 0.04f); contrib += vecn.w * s; - vec += vecn.w * s * vecn; + delta += vecn.w * s * vecn.z; } } - if (vec.w > 0.0f) { - p(x,y) = world + rate * (vec / contrib); + if (contrib > 0.0f) { + const float3 newworld = pose * camera.screenToCam(x, y, vec0.x + rate * (delta / contrib)); + p(x,y) = make_float4(newworld, world.w); //world + rate * (vec / contrib); } } } @@ -202,6 +221,7 @@ void ftl::cuda::move_points( ftl::cuda::TextureObject<float4> &p, ftl::cuda::TextureObject<float4> &v, const ftl::rgbd::Camera &camera, + const float4x4 &pose, float rate, int radius, cudaStream_t stream) { @@ -210,11 +230,11 @@ void ftl::cuda::move_points( const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); switch (radius) { - case 9 : move_points_kernel<9><<<gridSize, blockSize, 0, stream>>>(p,v,camera,rate); break; - case 5 : move_points_kernel<5><<<gridSize, blockSize, 0, stream>>>(p,v,camera,rate); break; - case 3 : move_points_kernel<3><<<gridSize, blockSize, 0, stream>>>(p,v,camera,rate); break; - case 1 : move_points_kernel<1><<<gridSize, blockSize, 0, stream>>>(p,v,camera,rate); break; - case 0 : move_points_kernel<0><<<gridSize, blockSize, 0, stream>>>(p,v,camera,rate); break; + case 9 : move_points_kernel<9><<<gridSize, blockSize, 0, stream>>>(p,v,camera, pose,rate); break; + case 5 : move_points_kernel<5><<<gridSize, blockSize, 0, stream>>>(p,v,camera, pose,rate); break; + case 3 : move_points_kernel<3><<<gridSize, blockSize, 0, stream>>>(p,v,camera, pose,rate); break; + case 1 : move_points_kernel<1><<<gridSize, blockSize, 0, stream>>>(p,v,camera, pose,rate); break; + case 0 : move_points_kernel<0><<<gridSize, blockSize, 0, stream>>>(p,v,camera, pose,rate); break; } cudaSafeCall( cudaGetLastError() ); diff --git a/applications/reconstruct/src/ilw/ilw_cuda.hpp b/applications/reconstruct/src/ilw/ilw_cuda.hpp index 913817bd61f9b77f139fe1e10e54634fbd7e5c81..03bde33d4db58b19597da4f30417c49a97461e60 100644 --- a/applications/reconstruct/src/ilw/ilw_cuda.hpp +++ b/applications/reconstruct/src/ilw/ilw_cuda.hpp @@ -40,6 +40,7 @@ void move_points( ftl::cuda::TextureObject<float4> &p, ftl::cuda::TextureObject<float4> &v, const ftl::rgbd::Camera &camera, + const float4x4 &pose, float rate, int radius, cudaStream_t stream