diff --git a/components/renderers/cpp/include/ftl/render/CUDARender.hpp b/components/renderers/cpp/include/ftl/render/CUDARender.hpp index 7369d5c9cc9762159b6aaf241f1768ac908fda3c..59eb1377685af76a5eed1d78314dd5c9c76b698d 100644 --- a/components/renderers/cpp/include/ftl/render/CUDARender.hpp +++ b/components/renderers/cpp/include/ftl/render/CUDARender.hpp @@ -59,6 +59,9 @@ class CUDARender : public ftl::render::FSRenderer { ftl::cuda::TextureObject<int> contrib_; //ftl::cuda::TextureObject<half4> normals_; cv::cuda::GpuMat colour_scale_; + cv::cuda::GpuMat mls_contrib_; + cv::cuda::GpuMat mls_centroid_; + cv::cuda::GpuMat mls_normals_; std::list<cv::cuda::GpuMat*> screen_buffers_; std::list<cv::cuda::GpuMat*> depth_buffers_; diff --git a/components/renderers/cpp/src/CUDARender.cpp b/components/renderers/cpp/src/CUDARender.cpp index 9ee7b0457d13acd5cffa8ea33789224a1c6d235a..9dab392f3b651a6250e5d455986d8d8efff0348c 100644 --- a/components/renderers/cpp/src/CUDARender.cpp +++ b/components/renderers/cpp/src/CUDARender.cpp @@ -308,11 +308,11 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre ); } - ftl::cuda::apply_colour_scaling(colour_scale_, f.create<cv::cuda::GpuMat>(Channel::Colour), 3, stream_); + //ftl::cuda::apply_colour_scaling(colour_scale_, f.create<cv::cuda::GpuMat>(Channel::Colour), 3, stream_); } } - if (!colour_scale_.empty()) ftl::utility::show_image(colour_scale_, "CScale", 1.0f, ftl::utility::ImageVisualisation::HEAT_MAPPED); + //if (!colour_scale_.empty()) ftl::utility::show_image(colour_scale_, "CScale", 1.0f, ftl::utility::ImageVisualisation::HEAT_MAPPED); // For each source depth map for (size_t i=0; i < scene_->frames.size(); ++i) { @@ -424,17 +424,21 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre // Now merge new render to any existing frameset render, detecting collisions ftl::cuda::touch_merge(depth_out_, out.createTexture<float>(_getDepthChannel()), collisions_, 1024, touch_dist_, stream_); - // For each source depth map, verify results - /*if (value("post_carve_result", false)) { + // Generate actual depth map using MLS with mesh as estimate + float mls_smoothing = value("mls_smooth", 0.005f); + if (value("mls_full", true)) { + // Clear buffers + mls_centroid_.create(params_.camera.height, params_.camera.width, CV_32FC4); + mls_contrib_.create(params_.camera.height, params_.camera.width, CV_32F); + mls_normals_.create(params_.camera.height, params_.camera.width, CV_16FC4); + mls_centroid_.setTo(cv::Scalar(0,0,0,0), cvstream); + mls_contrib_.setTo(cv::Scalar(0), cvstream); + mls_normals_.setTo(cv::Scalar(0,0,0,0), cvstream); + for (size_t i=0; i < scene_->frames.size(); ++i) { - //if (!scene_->hasFrame(i)) continue; auto &f = scene_->frames[i].cast<ftl::rgbd::Frame>(); - //auto *s = scene_->sources[i]; - if (!f.has(Channel::Colour)) { - //LOG(ERROR) << "Missing required channel"; - continue; - } + if (!f.has(Channel::Colour)) continue; // We have the needed depth data? if (use_depth && !f.hasOwn(Channel::Depth) && !f.hasOwn(Channel::GroundTruth)) { @@ -442,54 +446,63 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre } //auto pose = MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>()); + // VCAM to Original auto transformR = MatrixConversion::toCUDA(f.getPose().cast<float>().inverse() * t.cast<float>().inverse()) * poseInverse_; - //auto transform = pose_ * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>()); + // Original to VCAM + auto transform = pose_ * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>()); - ftl::cuda::depth_carve( - out.create<cv::cuda::GpuMat>(_getDepthChannel()), + ftl::cuda::mls_gather( + f.get<cv::cuda::GpuMat>(Channel::Normals), + mls_normals_, + out.get<cv::cuda::GpuMat>(_getDepthChannel()), f.get<cv::cuda::GpuMat>(Channel::Depth), + mls_centroid_, + mls_contrib_, + mls_smoothing, transformR, + transform, params_.camera, f.getLeft(), stream_ ); } - }*/ - - //filters_->filter(out, src, stream); - - // Generate normals for final virtual image - /*ftl::cuda::normals( - out.createTexture<half4>(_getNormalsChannel()), - temp_.createTexture<half4>(Channel::Normals), - out.getTexture<float>(_getDepthChannel()), - value("normal_radius", 1), value("normal_smoothing", 0.02f), - params_.camera, pose_.getFloat3x3(), poseInverse_.getFloat3x3(), stream_);*/ - - float mls_smoothing = value("mls_smooth", 0.01f); - int mls_radius = value("mls_radius", 0); - if (mls_radius == 0) { - ftl::cuda::normals( - out.createTexture<half4>(_getNormalsChannel()), - out.getTexture<float>(_getDepthChannel()), - params_.camera, stream_); - } else { - ftl::cuda::normals( - temp_.createTexture<half4>(Channel::Normals), - out.getTexture<float>(_getDepthChannel()), - params_.camera, stream_); - - ftl::cuda::mls_smooth( - temp_.createTexture<half4>(Channel::Normals), - out.createTexture<half4>(_getNormalsChannel()), - out.getTexture<float>(_getDepthChannel()), - //out.getTexture<float>(_getDepthChannel()), - value("mls_smooth", 0.01f), - value("mls_radius", 2), + // Now reduce MLS results to new depth+normals + ftl::cuda::mls_reduce( + mls_centroid_, + mls_normals_, + mls_contrib_, + out.create<cv::cuda::GpuMat>(_getNormalsChannel()), + out.create<cv::cuda::GpuMat>(_getDepthChannel()), params_.camera, stream_ ); + + } else { + int mls_radius = value("mls_radius", 0); + + if (mls_radius == 0) { + ftl::cuda::normals( + out.createTexture<half4>(_getNormalsChannel()), + out.getTexture<float>(_getDepthChannel()), + params_.camera, stream_); + } else { + ftl::cuda::normals( + temp_.createTexture<half4>(Channel::Normals), + out.getTexture<float>(_getDepthChannel()), + params_.camera, stream_); + + ftl::cuda::mls_smooth( + temp_.createTexture<half4>(Channel::Normals), + out.createTexture<half4>(_getNormalsChannel()), + out.getTexture<float>(_getDepthChannel()), + //out.getTexture<float>(_getDepthChannel()), + value("mls_smooth", 0.01f), + value("mls_radius", 2), + params_.camera, + stream_ + ); + } } ftl::cuda::transform_normals( diff --git a/components/renderers/cpp/src/carver.cu b/components/renderers/cpp/src/carver.cu index 79c78fc36823f16f3138edfb14eef738161940fa..edc4a5315cdf6df8382895d524735a4039fa6520 100644 --- a/components/renderers/cpp/src/carver.cu +++ b/components/renderers/cpp/src/carver.cu @@ -1,5 +1,6 @@ #include "carver.hpp" #include <cudatl/fixed.hpp> +#include <ftl/cuda/weighting.hpp> __device__ inline float depthErrorCoef(const ftl::rgbd::Camera &cam, float disps=1.0f) { return disps / (cam.baseline*cam.fx); @@ -103,9 +104,9 @@ __global__ void reverse_check_kernel( } // We found a match, so do a colour check - float idiff = 0.0f; + //float idiff = 127.0f; //if (match) { - // Generate colour scaling + /* // Generate colour scaling const float ximgscale = float(cwidth) / float(ointrin.width); ox = float(ox) * ximgscale; const float yimgscale = float(cheight) / float(ointrin.height); @@ -126,10 +127,10 @@ __global__ void reverse_check_kernel( //const float scaleZ = (vcol.z == 0) ? 1.0f : float(ocol.z) / float(vcol.z); //scale = (0.2126f*scaleZ + 0.7152f*scaleY + 0.0722f*scaleX); //} - colour_scale[x+pitch*y] = int8_t(max(-127.0f,min(127.0f,idiff))); + colour_scale[x+pitch*y] = int8_t(max(-127.0f,min(127.0f,idiff)));*/ // Too much carving means just outright remove the point. - depth_in[y*pitch4+x] = (count < 0 || fabsf(idiff) > 50.0f) ? 0.0f : d; + depth_in[y*pitch4+x] = (count < 0) ? 0.0f : d; } void ftl::cuda::depth_carve( @@ -170,6 +171,193 @@ void ftl::cuda::depth_carve( cudaSafeCall( cudaGetLastError() ); } +// ==== Multi image MLS ======================================================== + +/* + * Gather points for Moving Least Squares, from each source image + */ + template <int SEARCH_RADIUS> + __global__ void mls_gather_kernel( + const half4* __restrict__ normals_in, + half4* __restrict__ normals_out, + const float* __restrict__ depth_origin, + const float* __restrict__ depth_in, + float4* __restrict__ centroid_out, + float* __restrict__ contrib_out, + float smoothing, + float4x4 o_2_in, + float4x4 in_2_o, + ftl::rgbd::Camera camera_origin, + ftl::rgbd::Camera camera_in, + int npitch_out, + int cpitch_out, + int wpitch_out, + int dpitch_o, + int dpitch_i, + int npitch_in +) { + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < 0 || y < 0 || x >= camera_origin.width || y >= camera_origin.height) return; + + float3 nX = make_float3(normals_out[y*npitch_out+x]); + float3 aX = make_float3(centroid_out[y*cpitch_out+x]); + float contrib = contrib_out[y*wpitch_out+x]; + + float d0 = depth_origin[x+y*dpitch_o]; + if (d0 <= camera_origin.minDepth || d0 >= camera_origin.maxDepth) return; + + float3 X = camera_origin.screenToCam((int)(x),(int)(y),d0); + + int2 s = camera_in.camToScreen<int2>(o_2_in * X); + + // Neighbourhood + for (int v=-SEARCH_RADIUS; v<=SEARCH_RADIUS; ++v) { + for (int u=-SEARCH_RADIUS; u<=SEARCH_RADIUS; ++u) { + const float d = (s.x+u >= 0 && s.x+u < camera_in.width && s.y+v >= 0 && s.y+v < camera_in.height) ? depth_in[s.x+u+(s.y+v)*dpitch_i] : 0.0f; + if (d <= camera_in.minDepth || d >= camera_in.maxDepth) continue; + + // Point and normal of neighbour + const float3 Xi = in_2_o * camera_in.screenToCam(s.x+u, s.y+v, d); + const float3 Ni = in_2_o.getFloat3x3() * make_float3(normals_in[s.x+u+(s.y+v)*npitch_in]); + + // Gauss approx weighting function using point distance + const float w = (Ni.x+Ni.y+Ni.z > 0.0f) ? ftl::cuda::spatialWeighting(X,Xi,smoothing) : 0.0f; + + aX += Xi*w; + nX += Ni*w; + contrib += w; + } + } + + normals_out[y*npitch_out+x] = make_half4(nX, 0.0f); + centroid_out[y*cpitch_out+x] = make_float4(aX, 0.0f); + contrib_out[y*wpitch_out+x] = contrib; +} + +/** + * Convert accumulated values into estimate of depth and normals at pixel. + */ +__global__ void mls_reduce_kernel( + const float4* __restrict__ centroid, + const half4* __restrict__ normals, + const float* __restrict__ contrib_out, + half4* __restrict__ normals_out, + float* __restrict__ depth, + ftl::rgbd::Camera camera, + int npitch_in, + int cpitch_in, + int wpitch, + int npitch, + int dpitch +) { + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x >= 0 && y >= 0 && x < camera.width && y < camera.height) { + float3 nX = make_float3(normals[y*npitch_in+x]); + float3 aX = make_float3(centroid[y*cpitch_in+x]); + float contrib = contrib_out[y*wpitch+x]; + + //depth[x+y*dpitch] = X.z; + normals_out[x+y*npitch] = make_half4(0.0f, 0.0f, 0.0f, 0.0f); + + float d0 = depth[x+y*dpitch]; + if (d0 < camera.minDepth || d0 > camera.maxDepth) return; + float3 X = camera.screenToCam((int)(x),(int)(y),d0); + + nX /= contrib; // Weighted average normal + aX /= contrib; // Weighted average point (centroid) + + // Signed-Distance Field function + float fX = nX.x * (X.x - aX.x) + nX.y * (X.y - aX.y) + nX.z * (X.z - aX.z); + + // Calculate new point using SDF function to adjust depth (and position) + X = X - nX * fX; + + depth[x+y*dpitch] = X.z; + normals_out[x+y*npitch] = make_half4(nX / length(nX), 0.0f); + } +} + +#define T_PER_BLOCK 8 + +void ftl::cuda::mls_gather( + const cv::cuda::GpuMat &normals_in, // Source frame + cv::cuda::GpuMat &normals_out, + const cv::cuda::GpuMat &depth_origin, // Rendered image + const cv::cuda::GpuMat &depth_in, + cv::cuda::GpuMat ¢roid_out, + cv::cuda::GpuMat &contrib_out, + float smoothing, + const float4x4 &o_2_in, + const float4x4 &in_2_o, + const ftl::rgbd::Camera &camera_origin, // Virtual camera + const ftl::rgbd::Camera &camera_in, + cudaStream_t stream +) { + + const dim3 gridSize((depth_origin.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_origin.rows + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + normals_out.create(depth_origin.size(), CV_16FC4); + centroid_out.create(depth_origin.size(), CV_32FC4); + contrib_out.create(depth_origin.size(), CV_32F); + + mls_gather_kernel<2><<<gridSize, blockSize, 0, stream>>>( + normals_in.ptr<half4>(), + normals_out.ptr<half4>(), + depth_origin.ptr<float>(), + depth_in.ptr<float>(), + centroid_out.ptr<float4>(), + contrib_out.ptr<float>(), + smoothing, + o_2_in, + in_2_o, + camera_origin, + camera_in, + normals_out.step1()/4, + centroid_out.step1()/4, + contrib_out.step1(), + depth_origin.step1(), + depth_in.step1(), + normals_in.step1()/4 + ); + cudaSafeCall( cudaGetLastError() ); +} + +void ftl::cuda::mls_reduce( + const cv::cuda::GpuMat ¢roid, + const cv::cuda::GpuMat &normals, + const cv::cuda::GpuMat &contrib, + cv::cuda::GpuMat &normals_out, + cv::cuda::GpuMat &depth, + const ftl::rgbd::Camera &camera, + cudaStream_t stream +) { + + const dim3 gridSize((depth.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.rows + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + normals_out.create(depth.size(), CV_16FC4); + + mls_reduce_kernel<<<gridSize, blockSize, 0, stream>>>( + centroid.ptr<float4>(), + normals.ptr<half4>(), + contrib.ptr<float>(), + normals_out.ptr<half4>(), + depth.ptr<float>(), + camera, + normals.step1()/4, + centroid.step1()/4, + contrib.step1(), + normals_out.step1()/4, + depth.step1() + ); + cudaSafeCall( cudaGetLastError() ); +} + // ==== Apply colour scale ===================================================== template <int RADIUS> @@ -191,23 +379,27 @@ __global__ void apply_colour_scaling_kernel( int sy = (float(sheight) / float(cheight)) * float(y); float s = 0.0f; + int count = 0; //float mindiff = 100.0f; for (int v=-RADIUS; v<=RADIUS; ++v) { #pragma unroll for (int u=-RADIUS; u<=RADIUS; ++u) { float ns = (sx >= RADIUS && sy >= RADIUS && sx < swidth-RADIUS && sy < sheight-RADIUS) ? scale[sx+u+(sy+v)*spitch] : 0.0f; - s += ns; + if (fabsf(ns) < 30) { + s += ns; + ++count; + } } } - s /= float((2*RADIUS+1)*(2*RADIUS+1)); + if (count > 0) s /= float(count); uchar4 c = colour[x+y*cpitch]; colour[x+y*cpitch] = make_uchar4( - max(0.0f, min(255.0f, float(c.x) + 0.0722f*s)), - max(0.0f, min(255.0f, float(c.y) + 0.7152f*s)), - max(0.0f, min(255.0f, float(c.z) + 0.2126f*s)), + max(0.0f, min(255.0f, float(c.x) + s)), + max(0.0f, min(255.0f, float(c.y) + s)), + max(0.0f, min(255.0f, float(c.z) + s)), 255.0f ); } diff --git a/components/renderers/cpp/src/carver.hpp b/components/renderers/cpp/src/carver.hpp index a50a8d396d725db63572df3cfca7a070d62d4f35..41e39350135cd68937bb9aed5afec56e423a8470 100644 --- a/components/renderers/cpp/src/carver.hpp +++ b/components/renderers/cpp/src/carver.hpp @@ -28,6 +28,31 @@ void apply_colour_scaling( int radius, cudaStream_t stream); +void mls_reduce( + const cv::cuda::GpuMat ¢roid, + const cv::cuda::GpuMat &normals, + const cv::cuda::GpuMat &contrib, + cv::cuda::GpuMat &normals_out, + cv::cuda::GpuMat &depth, + const ftl::rgbd::Camera &camera, + cudaStream_t stream +); + +void mls_gather( + const cv::cuda::GpuMat &normals_in, // Source frame + cv::cuda::GpuMat &normals_out, + const cv::cuda::GpuMat &depth_origin, // Rendered image + const cv::cuda::GpuMat &depth_in, + cv::cuda::GpuMat ¢roid_out, + cv::cuda::GpuMat &contrib_out, + float smoothing, + const float4x4 &o_2_in, + const float4x4 &in_2_o, + const ftl::rgbd::Camera &camera_origin, // Virtual camera + const ftl::rgbd::Camera &camera_in, + cudaStream_t stream +); + } }