diff --git a/applications/reconstruct/src/ilw/ilw.cpp b/applications/reconstruct/src/ilw/ilw.cpp index 833f47ad32bd3d7e24dc0c12cc7c6d224b9effcc..464e1f57e688d328ccd345ee604156d96dbe4791 100644 --- a/applications/reconstruct/src/ilw/ilw.cpp +++ b/applications/reconstruct/src/ilw/ilw.cpp @@ -15,11 +15,16 @@ using cv::cuda::GpuMat; ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) { enabled_ = value("ilw_align", true); + iterations_ = value("iterations", 1); on("ilw_align", [this](const ftl::config::Event &e) { enabled_ = value("ilw_align", true); }); + on("iterations", [this](const ftl::config::Event &e) { + iterations_ = value("iterations", 1); + }); + flags_ = 0; if (value("ignore_bad", false)) flags_ |= ftl::cuda::kILWFlag_IgnoreBad; if (value("restrict_z", true)) flags_ |= ftl::cuda::kILWFlag_RestrictZ; @@ -44,14 +49,20 @@ bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { _phase0(fs, stream); - //for (int i=0; i<2; ++i) { - _phase1(fs, stream); + for (int i=0; i<iterations_; ++i) { + int win; + switch (i) { + case 0: win = 17; break; + case 1: win = 9; break; + default: win = 5; break; + } + _phase1(fs, win, stream); //for (int j=0; j<3; ++j) { _phase2(fs, 0.5f, stream); //} // TODO: Break if no time left - //} + } return true; } @@ -86,21 +97,21 @@ bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { f.createTexture<float4>(Channel::EnergyVector, Format<float4>(f.get<GpuMat>(Channel::Colour).size())); f.createTexture<float>(Channel::Energy, Format<float>(f.get<GpuMat>(Channel::Colour).size())); f.createTexture<uchar4>(Channel::Colour); - - cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); - - f.get<GpuMat>(Channel::EnergyVector).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream); - f.get<GpuMat>(Channel::Energy).setTo(cv::Scalar(0.0f), cvstream); } return true; } -bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { +bool ILW::_phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream) { // Run correspondence kernel to create an energy vector + cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); // For each camera combination for (size_t i=0; i<fs.frames.size(); ++i) { + auto &f1 = fs.frames[i]; + f1.get<GpuMat>(Channel::EnergyVector).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream); + f1.get<GpuMat>(Channel::Energy).setTo(cv::Scalar(0.0f), cvstream); + Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0); d1 = fs.sources[i]->getPose() * d1; @@ -109,7 +120,6 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { //LOG(INFO) << "Running phase1"; - auto &f1 = fs.frames[i]; auto &f2 = fs.frames[j]; auto s1 = fs.sources[i]; auto s2 = fs.sources[j]; @@ -137,6 +147,7 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { pose2, s2->parameters(), flags_, + win, stream ); } catch (ftl::exception &e) { diff --git a/applications/reconstruct/src/ilw/ilw.cu b/applications/reconstruct/src/ilw/ilw.cu index 1c495f3e09c056006527c94d40a5906e2de74ffb..488ed7cb1ff894f8037514430e5d7291194865c8 100644 --- a/applications/reconstruct/src/ilw/ilw.cu +++ b/applications/reconstruct/src/ilw/ilw.cu @@ -24,9 +24,10 @@ __device__ inline float warpSum(float e) { return e; } -#define COR_WIN_RADIUS 17 -#define COR_WIN_SIZE (COR_WIN_RADIUS * COR_WIN_RADIUS) +//#define COR_WIN_RADIUS 17 +//#define COR_WIN_SIZE (COR_WIN_RADIUS * COR_WIN_RADIUS) +template<int COR_WIN_RADIUS> __global__ void correspondence_energy_vector_kernel( TextureObject<float4> p1, TextureObject<float4> p2, @@ -57,7 +58,7 @@ __global__ void correspondence_energy_vector_kernel( // Project to p2 using cam2 // Each thread takes a possible correspondence and calculates a weighting const int lane = tid % WARP_SIZE; - for (int i=lane; i<COR_WIN_SIZE; i+=WARP_SIZE) { + for (int i=lane; i<COR_WIN_RADIUS*COR_WIN_RADIUS; i+=WARP_SIZE) { const float u = (i % COR_WIN_RADIUS) - (COR_WIN_RADIUS / 2); const float v = (i / COR_WIN_RADIUS) - (COR_WIN_RADIUS / 2); @@ -122,7 +123,7 @@ void ftl::cuda::correspondence_energy_vector( TextureObject<float> &eout, float4x4 &pose1, float4x4 &pose2, - const Camera &cam2, uint flags, + const Camera &cam2, uint flags, int win, cudaStream_t stream) { const dim3 gridSize((p1.width() + 2 - 1)/2, (p1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); @@ -130,9 +131,11 @@ void ftl::cuda::correspondence_energy_vector( //printf("COR SIZE %d,%d\n", p1.width(), p1.height()); - correspondence_energy_vector_kernel<<<gridSize, blockSize, 0, stream>>>( - p1, p2, c1, c2, vout, eout, pose1, pose2, cam2, flags - ); + switch (win) { + case 17 : correspondence_energy_vector_kernel<17><<<gridSize, blockSize, 0, stream>>>(p1, p2, c1, c2, vout, eout, pose1, pose2, cam2, flags); break; + case 9 : correspondence_energy_vector_kernel<9><<<gridSize, blockSize, 0, stream>>>(p1, p2, c1, c2, vout, eout, pose1, pose2, cam2, flags); break; + case 5 : correspondence_energy_vector_kernel<5><<<gridSize, blockSize, 0, stream>>>(p1, p2, c1, c2, vout, eout, pose1, pose2, cam2, flags); break; + } cudaSafeCall( cudaGetLastError() ); } diff --git a/applications/reconstruct/src/ilw/ilw.hpp b/applications/reconstruct/src/ilw/ilw.hpp index 5e10cc7cf6f2ff76143369848d7d6ee7618b194d..36c2cbbc0e7c62d234dcc7e9db746a7ed7a1b66f 100644 --- a/applications/reconstruct/src/ilw/ilw.hpp +++ b/applications/reconstruct/src/ilw/ilw.hpp @@ -51,7 +51,7 @@ class ILW : public ftl::Configurable { /* * Find possible correspondences and a confidence value. */ - bool _phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream); + bool _phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream); /* * Calculate energies and move the points. @@ -61,6 +61,7 @@ class ILW : public ftl::Configurable { std::vector<detail::ILWData> data_; bool enabled_; unsigned int flags_; + int iterations_; }; } diff --git a/applications/reconstruct/src/ilw/ilw_cuda.hpp b/applications/reconstruct/src/ilw/ilw_cuda.hpp index 5e399606209ce66e6d48ac9e0be483d135e34ca5..b9b9f89889825b61ff153d9f0c27f32c69f14e1c 100644 --- a/applications/reconstruct/src/ilw/ilw_cuda.hpp +++ b/applications/reconstruct/src/ilw/ilw_cuda.hpp @@ -21,7 +21,7 @@ void correspondence_energy_vector( float4x4 &pose1, float4x4 &pose2, const ftl::rgbd::Camera &cam2, - uint flags, + uint flags, int win, cudaStream_t stream );