diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt index de3aca69c4da32f0145f2d57ee3ac439209d1071..dcee7afa0147378ffaabd91263e200f8fe284c98 100644 --- a/applications/reconstruct/CMakeLists.txt +++ b/applications/reconstruct/CMakeLists.txt @@ -16,6 +16,7 @@ set(REPSRC #src/depth_camera.cu #src/depth_camera.cpp src/ilw.cpp + src/ilw.cu ) add_executable(ftl-reconstruct ${REPSRC}) diff --git a/applications/reconstruct/src/ilw.cpp b/applications/reconstruct/src/ilw.cpp index 90b3a1bee8f08f8a0973cd068c3ff959343da6b3..57ad8634c990e7c5fb05b98fc5e2b8a1773ce008 100644 --- a/applications/reconstruct/src/ilw.cpp +++ b/applications/reconstruct/src/ilw.cpp @@ -2,6 +2,9 @@ #include <ftl/utility/matrix_conversion.hpp> #include <ftl/rgbd/source.hpp> #include <ftl/cuda/points.hpp> +#include <loguru.hpp> + +#include "ilw_cuda.hpp" using ftl::ILW; using ftl::detail::ILWData; @@ -21,14 +24,14 @@ ILW::~ILW() { bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { _phase0(fs, stream); - for (int i=0; i<2; ++i) { - _phase1(fs); - for (int j=0; j<3; ++j) { - _phase2(fs); - } + //for (int i=0; i<2; ++i) { + _phase1(fs, stream); + //for (int j=0; j<3; ++j) { + // _phase2(fs); + //} // TODO: Break if no time left - } + //} return true; } @@ -47,15 +50,48 @@ bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size())); auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse()); ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, stream); + + // TODO: Create energy vector texture and clear it + // Create energy and clear it } return true; } -bool ILW::_phase1(ftl::rgbd::FrameSet &fs) { - // Run correspondence kernel to find points +bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { + // Run correspondence kernel to create an energy vector // For each camera combination + for (size_t i=0; i<fs.frames.size(); ++i) { + for (size_t j=0; j<fs.frames.size(); ++j) { + if (i == j) continue; + + LOG(INFO) << "Running phase1"; + + auto &f1 = fs.frames[i]; + auto &f2 = fs.frames[j]; + //auto s1 = fs.frames[i]; + auto s2 = fs.sources[j]; + + auto pose = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse()); + + //Calculate energy vector to best correspondence + ftl::cuda::correspondence_energy_vector( + f1.getTexture<float4>(Channel::Points), + f2.getTexture<float4>(Channel::Points), + f1.getTexture<uchar4>(Channel::Colour), + f2.getTexture<uchar4>(Channel::Colour), + // TODO: Add normals and other things... + f1.getTexture<float4>(Channel::EnergyVector), + f1.getTexture<float>(Channel::Energy), + pose, + s2->parameters(), + stream + ); + + LOG(INFO) << "Correspondences done... " << i; + } + } return true; } diff --git a/applications/reconstruct/src/ilw.cu b/applications/reconstruct/src/ilw.cu index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..90133a3a57800ee87a91fd50902deea5f701258a 100644 --- a/applications/reconstruct/src/ilw.cu +++ b/applications/reconstruct/src/ilw.cu @@ -0,0 +1,86 @@ +#include "ilw_cuda.hpp" + +using ftl::cuda::TextureObject; +using ftl::rgbd::Camera; + +#define WARP_SIZE 32 +#define T_PER_BLOCK 8 +#define FULL_MASK 0xffffffff + +__device__ inline float warpMax(float e) { + for (int i = WARP_SIZE/2; i > 0; i /= 2) { + const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e = max(e, other); + } + return e; +} + +__global__ void correspondence_energy_vector_kernel( + TextureObject<float4> p1, + TextureObject<float4> p2, + TextureObject<uchar4> c1, + TextureObject<uchar4> c2, + TextureObject<float4> vout, + TextureObject<float> eout, + float4x4 pose2, // Inverse + Camera cam2) { + + // Each warp picks point in p1 + const int tid = (threadIdx.x + threadIdx.y * blockDim.x); + const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float3 world1 = make_float3(p1.tex2D(x, y)); + const float3 camPos2 = pose2 * world1; + const uint2 screen2 = cam2.camToScreen<uint2>(camPos2); + + const int upsample = 8; + + // Project to p2 using cam2 + // Each thread takes a possible correspondence and calculates a weighting + const int lane = tid % WARP_SIZE; + for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { + const float u = (i % upsample) - (upsample / 2); + const float v = (i / upsample) - (upsample / 2); + + const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v)); + + // Determine degree of correspondence + const float confidence = 1.0f / length(world1 - world2); + + printf("conf %f\n", confidence); + const float maxconf = warpMax(confidence); + + // This thread has best confidence value + if (maxconf == confidence) { + vout(x,y) = vout.tex2D(x, y) + make_float4( + (world1.x - world2.x) * maxconf, + (world1.y - world2.y) * maxconf, + (world1.z - world2.z) * maxconf, + maxconf); + eout(x,y) = eout.tex2D(x,y) + length(world1 - world2)*maxconf; + } + } +} + +void ftl::cuda::correspondence_energy_vector( + TextureObject<float4> &p1, + TextureObject<float4> &p2, + TextureObject<uchar4> &c1, + TextureObject<uchar4> &c2, + TextureObject<float4> &vout, + TextureObject<float> &eout, + float4x4 &pose2, + const Camera &cam2, + cudaStream_t stream) { + + const dim3 gridSize((p1.width() + 2 - 1)/2, (p1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK); + + printf("COR SIZE %d,%d\n", p1.width(), p1.height()); + + correspondence_energy_vector_kernel<<<gridSize, blockSize, 0, stream>>>( + p1, p2, c1, c2, vout, eout, pose2, cam2 + ); + cudaSafeCall( cudaGetLastError() ); +} diff --git a/applications/reconstruct/src/ilw.hpp b/applications/reconstruct/src/ilw.hpp index 5bab3387316c47c3f1489acde7990d583364f523..0be45d015e976b540263a2c16cc5605376092a43 100644 --- a/applications/reconstruct/src/ilw.hpp +++ b/applications/reconstruct/src/ilw.hpp @@ -51,7 +51,7 @@ class ILW : public ftl::Configurable { /* * Find possible correspondences and a confidence value. */ - bool _phase1(ftl::rgbd::FrameSet &fs); + bool _phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream); /* * Calculate energies and move the points. diff --git a/applications/reconstruct/src/ilw_cuda.hpp b/applications/reconstruct/src/ilw_cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a01af75149409fe033ba39ffb0170489ee926be9 --- /dev/null +++ b/applications/reconstruct/src/ilw_cuda.hpp @@ -0,0 +1,26 @@ +#ifndef _FTL_ILW_CUDA_HPP_ +#define _FTL_ILW_CUDA_HPP_ + +#include <ftl/cuda_common.hpp> +#include <ftl/rgbd/camera.hpp> +#include <ftl/cuda_matrix_util.hpp> + +namespace ftl { +namespace cuda { + +void correspondence_energy_vector( + ftl::cuda::TextureObject<float4> &p1, + ftl::cuda::TextureObject<float4> &p2, + ftl::cuda::TextureObject<uchar4> &c1, + ftl::cuda::TextureObject<uchar4> &c2, + ftl::cuda::TextureObject<float4> &vout, + ftl::cuda::TextureObject<float> &eout, + float4x4 &pose2, + const ftl::rgbd::Camera &cam2, + cudaStream_t stream +); + +} +} + +#endif // _FTL_ILW_CUDA_HPP_ diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp index c878a4913dfd37d70a4aeddf16397f7aaab509fd..daf9f5f64c019d24ae9afb9f2e4540c59b722922 100644 --- a/components/renderers/cpp/src/splat_render.cpp +++ b/components/renderers/cpp/src/splat_render.cpp @@ -31,6 +31,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda out.create<GpuMat>(Channel::Depth, Format<float>(camera.width, camera.height)); out.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height)); + // FIXME: Use source resolutions, not virtual resolution temp_.create<GpuMat>(Channel::Colour, Format<float4>(camera.width, camera.height)); temp_.create<GpuMat>(Channel::Colour2, Format<uchar4>(camera.width, camera.height)); temp_.create<GpuMat>(Channel::Contribution, Format<float>(camera.width, camera.height)); @@ -115,6 +116,8 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda //LOG(INFO) << "DIBR DONE"; } + // TODO: Add the depth splatting step.. + temp_.createTexture<float4>(Channel::Colour); temp_.createTexture<float>(Channel::Contribution); @@ -157,74 +160,9 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda Eigen::Matrix4f matrix = src->getPose().cast<float>() * transform.matrix(); params.m_viewMatrix = MatrixConversion::toCUDA(matrix.inverse()); params.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix); - } - - /* - //ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, depth3_, scene_->cameraCount(), params, stream); - - // Step 1: Put all points into virtual view to gather them - //ftl::cuda::dibr_raw(depth1_, scene_->cameraCount(), params, stream); - - // Step 2: For each point, use a warp to do MLS and up sample - //ftl::cuda::mls_render_depth(depth1_, depth3_, params, scene_->cameraCount(), stream); - - if (src->getChannel() == Channel::Depth) { - //LOG(INFO) << "Rendering depth"; - //ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); - if (value("splatting", false)) { - //ftl::cuda::splat_points(depth1_, colour1_, normal1_, depth2_, colour2_, params, stream); - //ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); - - temp_.get<GpuMat>(Channel::Depth).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream); - } else { - temp_.get<GpuMat>(Channel::Depth).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream); - //ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); - //src->writeFrames(ts, colour1_, depth2_, stream); - //src->write(scene_.timestamp, output_, stream); - } - } else if (src->getChannel() == Channel::Energy) { - //ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); - //if (src->value("splatting", false)) { - //ftl::cuda::splat_points(depth1_, colour1_, normal1_, depth2_, colour2_, params, stream); - //ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); - //src->writeFrames(ts, colour1_, depth2_, stream); - //src->write(scene_.timestamp, output_, stream); - //} else { - //ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); - // src->writeFrames(colour1_, depth2_, stream); - //} - } else if (src->getChannel() == Channel::Right) { - //LOG(INFO) << "Rendering right"; - // Adjust pose to right eye position - Eigen::Affine3f transform(Eigen::Translation3f(camera.baseline,0.0f,0.0f)); - Eigen::Matrix4f matrix = src->getPose().cast<float>() * transform.matrix(); - params.m_viewMatrix = MatrixConversion::toCUDA(matrix.inverse()); - params.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix); - - //ftl::cuda::clear_depth(depth1_, stream); - //ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, depth3_, scene_->cameraCount(), params, stream); - //src->writeFrames(ts, colour1_, colour2_, stream); - //src->write(scene_.timestamp, output_, stream); - } else { - //LOG(INFO) << "No second rendering"; - //if (value("splatting", false)) { - //ftl::cuda::splat_points(depth1_, colour1_, normal1_, depth2_, colour2_, params, stream); - //src->writeFrames(ts, colour1_, depth2_, stream); - //src->write(scene_.timestamp, out, stream); - //} else { - //ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream); - temp_.get<GpuMat>(Channel::Depth).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream); - //src->writeFrames(ts, colour1_, depth2_, stream); - //src->write(scene_.timestamp, output_, stream); - //} - } - //} - */ - - //ftl::cuda::median_filter(depth1_, depth2_, stream); - //ftl::cuda::splat_points(depth1_, depth2_, params, stream); - // TODO: Second pass + // TODO: Repeat rendering process... + } return true; } diff --git a/components/rgbd-sources/include/ftl/rgbd/channels.hpp b/components/rgbd-sources/include/ftl/rgbd/channels.hpp index a87d03e803523da7ad9370866b27061ea64e6044..9bf731a5319fa47c501a91e09f1e2acc48c5a4a8 100644 --- a/components/rgbd-sources/include/ftl/rgbd/channels.hpp +++ b/components/rgbd-sources/include/ftl/rgbd/channels.hpp @@ -21,6 +21,7 @@ enum struct Channel : int { Points = 6, // 32FC4 Confidence = 7, // 32F Contribution = 7, // 32F + EnergyVector, // 32FC4 Flow, // 32F Energy, // 32F LeftGray,