diff --git a/applications/reconstruct/src/ilw.cu b/applications/reconstruct/src/ilw.cu index b97c49964294b7511be45b69d2a18bd6ca9f454d..999b5ec9031eed08fc4bc527471961c3236d7445 100644 --- a/applications/reconstruct/src/ilw.cu +++ b/applications/reconstruct/src/ilw.cu @@ -31,7 +31,11 @@ __global__ void correspondence_energy_vector_kernel( const int y = blockIdx.y*blockDim.y + threadIdx.y; const float3 world1 = make_float3(p1.tex2D(x, y)); - if (world1.x == MINF) return; + if (world1.x == MINF) { + vout(x,y) = make_float4(0.0f); + eout(x,y) = 0.0f; + return; + } const float3 camPos2 = pose2 * world1; const uint2 screen2 = cam2.camToScreen<uint2>(camPos2); diff --git a/applications/reconstruct/src/ilw/ilw.cpp b/applications/reconstruct/src/ilw/ilw.cpp index 99ce64fe81a2b163cdce98ecd23873aa5fbba887..2d4f7b678ef8d3eb4f9e521309027dd0f522383b 100644 --- a/applications/reconstruct/src/ilw/ilw.cpp +++ b/applications/reconstruct/src/ilw/ilw.cpp @@ -27,7 +27,7 @@ bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { //for (int i=0; i<2; ++i) { _phase1(fs, stream); //for (int j=0; j<3; ++j) { - // _phase2(fs); + _phase2(fs, 0.1f, stream); //} // TODO: Break if no time left @@ -118,12 +118,21 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { return true; } -bool ILW::_phase2(ftl::rgbd::FrameSet &fs) { +bool ILW::_phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream) { // Run energies and motion kernel // Smooth vectors across a window and iteratively // strongly disagreeing vectors should cancel out // A weak vector is overriden by a stronger one. + for (auto &f : fs.frames) { + ftl::cuda::move_points( + f.getTexture<float4>(Channel::Points), + f.getTexture<float4>(Channel::EnergyVector), + rate, + stream + ); + } + return true; } diff --git a/applications/reconstruct/src/ilw/ilw.cu b/applications/reconstruct/src/ilw/ilw.cu index 3c4946ca722efd124def056bdaad9acc74915533..716668b85b1a0cb81af14db2a411a17fffcba9c3 100644 --- a/applications/reconstruct/src/ilw/ilw.cu +++ b/applications/reconstruct/src/ilw/ilw.cu @@ -15,6 +15,9 @@ __device__ inline float warpMax(float e) { return e; } +#define COR_WIN_RADIUS 16 +#define COR_WIN_SIZE (COR_WIN_RADIUS * COR_WIN_RADIUS) + __global__ void correspondence_energy_vector_kernel( TextureObject<float4> p1, TextureObject<float4> p2, @@ -33,34 +36,39 @@ __global__ void correspondence_energy_vector_kernel( const float3 world1 = make_float3(p1.tex2D(x, y)); if (world1.x == MINF) return; const float3 camPos2 = pose2 * world1; - const uint2 screen2 = cam2.camToScreen<uint2>(camPos2); - - const int upsample = 8; + const uint2 screen2 = cam2.camToScreen<uint2>(camPos2); + + float bestconf = 0.0f; + float3 bestpoint; // Project to p2 using cam2 // Each thread takes a possible correspondence and calculates a weighting const int lane = tid % WARP_SIZE; - for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { - const float u = (i % upsample) - (upsample / 2); - const float v = (i / upsample) - (upsample / 2); + for (int i=lane; i<COR_WIN_SIZE; i+=WARP_SIZE) { + const float u = (i % COR_WIN_RADIUS) - (COR_WIN_RADIUS / 2); + const float v = (i / COR_WIN_RADIUS) - (COR_WIN_RADIUS / 2); const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v)); if (world2.x == MINF) continue; // Determine degree of correspondence const float confidence = 1.0f / length(world1 - world2); - const float maxconf = warpMax(confidence); - - // This thread has best confidence value - if (maxconf == confidence) { - vout(x,y) = vout.tex2D(x, y) + make_float4( - (world1.x - world2.x) * maxconf, - (world1.y - world2.y) * maxconf, - (world1.z - world2.z) * maxconf, - maxconf); - eout(x,y) = eout.tex2D(x,y) + length(world1 - world2)*maxconf; + + if (confidence > bestconf) { + bestpoint = world2; + bestconf = confidence; } } + + const float maxconf = warpMax(bestconf); + if (maxconf == bestconf && maxconf > 0.0f) { + vout(x,y) = vout.tex2D(x, y) + make_float4( + (world1.x - bestpoint.x) * maxconf, + (world1.y - bestpoint.y) * maxconf, + (world1.z - bestpoint.z) * maxconf, + maxconf); + eout(x,y) = eout.tex2D(x,y) + length(world1 - bestpoint)*maxconf; + } } void ftl::cuda::correspondence_energy_vector( @@ -84,3 +92,34 @@ void ftl::cuda::correspondence_energy_vector( ); cudaSafeCall( cudaGetLastError() ); } + +//============================================================================== + + +__global__ void move_points_kernel( + ftl::cuda::TextureObject<float4> p, + ftl::cuda::TextureObject<float4> v, + float rate) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < p.width() && y < p.height()) { + p(x,y) = p(x,y) + rate * v.tex2D((int)x,(int)y); + } +} + + +void ftl::cuda::move_points( + ftl::cuda::TextureObject<float4> &p, + ftl::cuda::TextureObject<float4> &v, + float rate, + cudaStream_t stream) { + + const dim3 gridSize((p.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (p.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + move_points_kernel<<<gridSize, blockSize, 0, stream>>>(p,v,rate); + + cudaSafeCall( cudaGetLastError() ); +} diff --git a/applications/reconstruct/src/ilw/ilw.hpp b/applications/reconstruct/src/ilw/ilw.hpp index 0be45d015e976b540263a2c16cc5605376092a43..5dbcdb7338fcd530e9d09b3ea40b93f79c895b8e 100644 --- a/applications/reconstruct/src/ilw/ilw.hpp +++ b/applications/reconstruct/src/ilw/ilw.hpp @@ -56,7 +56,7 @@ class ILW : public ftl::Configurable { /* * Calculate energies and move the points. */ - bool _phase2(ftl::rgbd::FrameSet &fs); + bool _phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream); std::vector<detail::ILWData> data_; }; diff --git a/applications/reconstruct/src/ilw/ilw_cuda.hpp b/applications/reconstruct/src/ilw/ilw_cuda.hpp index a01af75149409fe033ba39ffb0170489ee926be9..32603abeb0870148676ba1be30eab2c02a420641 100644 --- a/applications/reconstruct/src/ilw/ilw_cuda.hpp +++ b/applications/reconstruct/src/ilw/ilw_cuda.hpp @@ -20,6 +20,13 @@ void correspondence_energy_vector( cudaStream_t stream ); +void move_points( + ftl::cuda::TextureObject<float4> &p, + ftl::cuda::TextureObject<float4> &v, + float rate, + cudaStream_t stream +); + } } diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp index 0064aba5fc28b2a06421376085c5e7f74a56cfb1..a791c247be6efa70ffcd93da839d31b68a8df948 100644 --- a/components/renderers/cpp/src/splat_render.cpp +++ b/components/renderers/cpp/src/splat_render.cpp @@ -214,8 +214,10 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda } else { if (ftl::rgbd::isFloatChannel(chan)) { out.create<GpuMat>(chan, Format<float>(camera.width, camera.height)); + out.get<GpuMat>(chan).setTo(cv::Scalar(0.0f), cvstream); } else { out.create<GpuMat>(chan, Format<uchar4>(camera.width, camera.height)); + out.get<GpuMat>(chan).setTo(cv::Scalar(76,76,76,255), cvstream); } renderChannel(params, out, chan, stream); }