diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp index 06111688a1ed1f46a5b86a02ea5e58f0d57c4d91..3db5d6603fad4dc7b93106d3974dcec8d8ff9704 100644 --- a/applications/reconstruct/src/main.cpp +++ b/applications/reconstruct/src/main.cpp @@ -319,7 +319,7 @@ static void run(ftl::Configurable *root) { pipeline1->append<ftl::operators::CrossSupport>("cross"); pipeline1->append<ftl::operators::DiscontinuityMask>("discontinuity"); pipeline1->append<ftl::operators::CullDiscontinuity>("remove_discontinuity"); - pipeline1->append<ftl::operators::ColourMLS>("mls"); // Perform MLS (using smoothing channel) + pipeline1->append<ftl::operators::AggreMLS>("mls"); // Perform MLS (using smoothing channel) pipeline1->append<ftl::operators::VisCrossSupport>("viscross")->set("enabled", false); // Alignment diff --git a/components/common/cpp/include/ftl/cuda_common.hpp b/components/common/cpp/include/ftl/cuda_common.hpp index 3ce2452dcafd77d76ac8aab5e20ef96b585704fd..1b4f3a47afb000a9d1e05d264a5dd79a9d1c3e4a 100644 --- a/components/common/cpp/include/ftl/cuda_common.hpp +++ b/components/common/cpp/include/ftl/cuda_common.hpp @@ -183,7 +183,7 @@ TextureObject<T>::TextureObject(const cv::cuda::GpuMat &d, bool interpolated) { height_ = d.rows; needsfree_ = false; cvType_ = ftl::traits::OpenCVType<T>::value; - //needsdestroy_ = true; + needsdestroy_ = true; } #endif // __CUDACC__ @@ -218,7 +218,7 @@ TextureObject<T>::TextureObject(const cv::cuda::PtrStepSz<T> &d) { height_ = d.rows; needsfree_ = false; cvType_ = ftl::traits::OpenCVType<T>::value; - //needsdestroy_ = true; + needsdestroy_ = true; } /** @@ -252,7 +252,7 @@ TextureObject<T>::TextureObject(T *ptr, int pitch, int width, int height) { height_ = height; needsfree_ = false; cvType_ = ftl::traits::OpenCVType<T>::value; - //needsdestroy_ = true; + needsdestroy_ = true; } template <typename T> @@ -285,7 +285,7 @@ TextureObject<T>::TextureObject(size_t width, size_t height) { needsfree_ = true; pitch2_ = pitch_ / sizeof(T); cvType_ = ftl::traits::OpenCVType<T>::value; - //needsdestroy_ = true; + needsdestroy_ = true; } #ifndef __CUDACC__ @@ -312,6 +312,7 @@ TextureObject<T>::TextureObject(const TextureObject<T> &p) { pitch2_ = pitch_ / sizeof(T); cvType_ = ftl::traits::OpenCVType<T>::value; needsfree_ = false; + needsdestroy_ = false; } template <typename T> @@ -323,14 +324,17 @@ TextureObject<T>::TextureObject(TextureObject<T> &&p) { pitch_ = p.pitch_; pitch2_ = pitch_ / sizeof(T); needsfree_ = p.needsfree_; + needsdestroy_ = p.needsdestroy_; p.texobj_ = 0; p.needsfree_ = false; + p.needsdestroy_ = false; p.ptr_ = nullptr; cvType_ = ftl::traits::OpenCVType<T>::value; } template <typename T> TextureObject<T> &TextureObject<T>::operator=(const TextureObject<T> &p) { + free(); texobj_ = p.texobj_; ptr_ = p.ptr_; width_ = p.width_; @@ -339,11 +343,13 @@ TextureObject<T> &TextureObject<T>::operator=(const TextureObject<T> &p) { pitch2_ = pitch_ / sizeof(T); cvType_ = ftl::traits::OpenCVType<T>::value; needsfree_ = false; + needsdestroy_ = false; return *this; } template <typename T> TextureObject<T> &TextureObject<T>::operator=(TextureObject<T> &&p) { + free(); texobj_ = p.texobj_; ptr_ = p.ptr_; width_ = p.width_; @@ -351,8 +357,10 @@ TextureObject<T> &TextureObject<T>::operator=(TextureObject<T> &&p) { pitch_ = p.pitch_; pitch2_ = pitch_ / sizeof(T); needsfree_ = p.needsfree_; + needsdestroy_ = p.needsdestroy_; p.texobj_ = 0; p.needsfree_ = false; + p.needsdestroy_ = false; p.ptr_ = nullptr; cvType_ = ftl::traits::OpenCVType<T>::value; return *this; diff --git a/components/operators/include/ftl/operators/smoothing.hpp b/components/operators/include/ftl/operators/smoothing.hpp index f6683fcc3e665f67b708a8bf11140bfabf179060..6e0c6ff75e382f8a54d99587f545195c70d59e6b 100644 --- a/components/operators/include/ftl/operators/smoothing.hpp +++ b/components/operators/include/ftl/operators/smoothing.hpp @@ -79,6 +79,51 @@ class ColourMLS : public ftl::operators::Operator { private: }; +/** + * Use cross aggregation of neighbors for faster MLS smoothing. Utilises the + * cross support regions and colour weighting. This algorithm is based upon the + * cross aggregation method in the papers below. MLS was adapted into an + * approximate form which is not identical to real MLS as the weights are not + * perfectly correct for each point. The errors are visually minimal and it + * has linear performance wrt radius, rather than quadratic. + * + * Zhang K, Lu J, Lafruit G. Cross-based local stereo matching using orthogonal + * integral images. (2009). + * + * Better explained in: + * X. Mei, X. Sun, M. Zhou et al. On building an accurate stereo matching system + * on graphics hardware. (2011). + * + * The use of colour weighting is done as in: + * C. Kuster et al. Spatio-temporal geometry fusion for multiple hybrid cameras + * using moving least squares surfaces. (2014) + * + * The above paper also indicates the importance of stopping MLS at depth + * boundaries. They use k-means clustering (K=2) of depths, but we are using + * an approach (discontinuity mask) indicated in the following paper combined + * with the cross support regions to define the extent of the MLS neighbourhood. + * + * S. Orts-Escolano et al. Holoportation: Virtual 3D teleportation in real-time. + * (2016). + * + * The use of all these approaches in this combination is novel I believe. + * + */ +class AggreMLS : public ftl::operators::Operator { + public: + explicit AggreMLS(ftl::Configurable*); + ~AggreMLS(); + + inline Operator::Type type() const override { return Operator::Type::OneToOne; } + + bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override; + + private: + ftl::cuda::TextureObject<float4> centroid_horiz_; + ftl::cuda::TextureObject<float4> centroid_vert_; + ftl::cuda::TextureObject<float4> normals_horiz_; +}; + /** * A version of Moving Least Squares where both the smoothing factor and * neighbourhood size are adapted over an extra large range using the colour diff --git a/components/operators/src/mls.cu b/components/operators/src/mls.cu index 64fb921df7b25954ac133d51a94c1204145f868b..e07178cdf9606444c307f59a2adbb8818b049ed4 100644 --- a/components/operators/src/mls.cu +++ b/components/operators/src/mls.cu @@ -5,6 +5,7 @@ using ftl::cuda::TextureObject; #define T_PER_BLOCK 8 +#define WARP_SIZE 32 // ===== MLS Smooth ============================================================ @@ -229,7 +230,7 @@ __device__ inline int segmentID(int u, int v) { * level of single pixels can be subject to noise. Colour noise should first * be removed from the image. */ - template <bool FILLING> + template <bool FILLING, int RADIUS> __global__ void colour_mls_smooth_csr_kernel( TextureObject<uchar4> region, TextureObject<float4> normals_in, @@ -268,34 +269,38 @@ __device__ inline int segmentID(int u, int v) { // TODO: Does using a fixed loop size with range test work better? // Or with warp per pixel version, this would be less of a problem... // TODO: Do a separate vote fill step? - for (int v=-base.z; v<=base.w; ++v) { + for (int v=-RADIUS; v<=RADIUS; ++v) { uchar4 baseY = region.tex2D(x,y+v); - for (int u=-baseY.x; u<=baseY.y; ++u) { + #pragma unroll + for (int u=-RADIUS; u<=RADIUS; ++u) { const float d = depth_in.tex2D(x+u, y+v); - if (d > camera.minDepth && d < camera.maxDepth) { + //if (d > camera.minDepth && d < camera.maxDepth) { + + float w = (d <= camera.minDepth || d >= camera.maxDepth || u < -baseY.x || u > baseY.y || v < -base.z || v > base.z) ? 0.0f : 1.0f; // Point and normal of neighbour const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y)+v,d); const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y)+v)); - if (Ni.x+Ni.y+Ni.z == 0.0f) continue; + // FIXME: Ensure bad normals are removed by setting depth invalid + //if (Ni.x+Ni.y+Ni.z == 0.0f) continue; const float4 c = colour_in.tex2D(float(x+u) + 0.5f, float(y+v) + 0.5f); - const float cw = ftl::cuda::colourWeighting(c0,c,colour_smoothing); + w *= ftl::cuda::colourWeighting(c0,c,colour_smoothing); // Allow missing point to borrow z value // TODO: This is a bad choice of Z! Perhaps try histogram vote approach - if (FILLING && d0 == 0.0f) X = camera.screenToCam((int)(x),(int)(y),Xi.z); + //if (FILLING && d0 == 0.0f) X = camera.screenToCam((int)(x),(int)(y),Xi.z); // Gauss approx weighting function using point distance - const float w = ftl::cuda::spatialWeighting(X,Xi,smoothing*cw); + w = ftl::cuda::spatialWeighting(X,Xi,smoothing*w); aX += Xi*w; nX += Ni*w; contrib += w; - if (FILLING && w > 0.0f && v > -base.z+1 && v < base.w-1 && u > -baseY.x+1 && u < baseY.y-1) segment_check |= segmentID(u,v); - } + //if (FILLING && w > 0.0f && v > -base.z+1 && v < base.w-1 && u > -baseY.x+1 && u < baseY.y-1) segment_check |= segmentID(u,v); + //} } } @@ -341,11 +346,294 @@ void ftl::cuda::colour_mls_smooth_csr( const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); if (filling) { - colour_mls_smooth_csr_kernel<true><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera); + colour_mls_smooth_csr_kernel<true,5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera); } else { - colour_mls_smooth_csr_kernel<false><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera); + colour_mls_smooth_csr_kernel<false,5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera); + } + + cudaSafeCall( cudaGetLastError() ); + + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + #endif +} + + +// ===== Cross Aggregate MLS =================================================== + +/* + * Smooth depth map using Moving Least Squares. This version uses colour + * similarity weights to adjust the spatial smoothing factor. It also uses + * cross support windows to prevent unwanted edge crossing. This function does + * the weighted aggregation of centroids and normals in the horizontal + * direction. + */ + template <int RADIUS> + __global__ void mls_aggr_horiz_kernel( + TextureObject<uchar4> region, + TextureObject<float4> normals_in, + TextureObject<float4> normals_out, + TextureObject<float> depth_in, // Virtual depth map + TextureObject<float4> centroid_out, // Accumulated output + TextureObject<uchar4> colour_in, + float smoothing, + float colour_smoothing, + ftl::rgbd::Camera camera) { + + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return; + + float3 aX = make_float3(0.0f,0.0f,0.0f); + float3 nX = make_float3(0.0f,0.0f,0.0f); + float contrib = 0.0f; + + float d0 = depth_in.tex2D(x, y); + + // Note: x and y flipped as output is rotated. + centroid_out(y,x) = make_float4(0.0f); + normals_out(y,x) = normals_in(x,y); + + if (d0 <= camera.minDepth || d0 >= camera.maxDepth) return; + + float3 X = camera.screenToCam((int)(x),(int)(y),d0); + float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f); + + // Cross-Support Neighbourhood + uchar4 base = region.tex2D(x,y); + + #pragma unroll + for (int u=-RADIUS; u<=RADIUS; ++u) { + const float d = depth_in.tex2D(x+u, y); + + // If outside of cross support range, set weight to 0 to ignore + float w = (d <= camera.minDepth || d >= camera.maxDepth || u < -base.x || u > base.y) ? 0.0f : 1.0f; + + // Point and normal of neighbour + const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y),d); + const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y))); + + // Bad or missing normals should be ignored + if (Ni.x+Ni.y+Ni.z == 0.0f) w = 0.0f; + + // Gauss approx colour weighting. + const float4 c = colour_in.tex2D(float(x+u) + 0.5f, float(y) + 0.5f); + w *= ftl::cuda::colourWeighting(c0,c,colour_smoothing); + + // Gauss approx weighting function using point distance + w = ftl::cuda::spatialWeighting(X,Xi,smoothing*w); + + aX += Xi*w; + nX += Ni*w; + contrib += w; + } + + if (contrib > 0.0f) { + nX /= contrib; // Weighted average normal + aX /= contrib; // Weighted average point (centroid) + + // Note: x and y flipped since output is rotated 90 degrees. + centroid_out(y,x) = make_float4(aX, 0.0f); + normals_out(y,x) = make_float4(nX / length(nX), 0.0f); + } +} + +/** + * This function does a vertical weighted aggregation of the centroids and + * normals generated by the horizontal aggregation. + */ +template <int RADIUS> + __global__ void mls_aggr_vert_kernel( + TextureObject<uchar4> region, + TextureObject<float4> normals_in, + TextureObject<float4> normals_out, + TextureObject<float4> centroid_in, // Virtual depth map + TextureObject<float4> centroid_out, // Accumulated output + TextureObject<uchar4> colour_in, + TextureObject<float> depth_in, + float smoothing, + float colour_smoothing, + ftl::rgbd::Camera camera) { + + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return; + + float3 aX = make_float3(0.0f,0.0f,0.0f); + float3 nX = make_float3(0.0f,0.0f,0.0f); + float contrib = 0.0f; + + float d0 = depth_in.tex2D(x, y); + if (d0 <= camera.minDepth || d0 >= camera.maxDepth) return; + + float3 X = camera.screenToCam((int)(x),(int)(y),d0); + + centroid_out(x,y) = make_float4(0.0f); + normals_out(x,y) = make_float4(0.0f); + + float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f); + + // Cross-Support Neighbourhood + uchar4 base = region.tex2D(x,y); + + #pragma unroll + for (int v=-RADIUS; v<=RADIUS; ++v) { + const float d = depth_in.tex2D(x, y+v); + const float3 Xi = camera.screenToCam(x,y+v,d); + + // Note: x and y flipped, input image is rotated. + float3 Ai = make_float3(centroid_in.tex2D(y+v, x)); + + // If outside the cross support range, set weight to 0 to ignore + float w = (Ai.z <= camera.minDepth || Ai.z >= camera.maxDepth || v < -base.z || v > base.w) ? 0.0f : 1.0f; + + // Note: x and y flipped, input image is rotated. + const float3 Ni = make_float3(normals_in.tex2D(y+v, x)); + + // Bad normals should be ignored. + if (Ni.x+Ni.y+Ni.z == 0.0f) w = 0.0f; + + // Gauss approx colour weighting. + const float4 c = colour_in.tex2D(float(x) + 0.5f, float(y+v) + 0.5f); + w *= ftl::cuda::colourWeighting(c0,c,colour_smoothing); + + // Gauss approx weighting function using point distance + w = ftl::cuda::spatialWeighting(X,Xi,smoothing*w); + + aX += Ai*w; + nX += Ni*w; + contrib += w; } + + // Normalise the summed points and normals + if (contrib > 0.0f) { + nX /= contrib; // Weighted average normal + aX /= contrib; // Weighted average point (centroid) + centroid_out(x,y) = make_float4(aX, 0.0f); + normals_out(x,y) = make_float4(nX / length(nX), 0.0f); + } +} + +/** + * Using the aggregated centroids and normals, calculate the signed-distance- + * field and move the depth value accordingly using the calculated normal. + */ +__global__ void mls_adjust_depth_kernel( + TextureObject<float4> normals_in, + TextureObject<float4> centroid_in, // Virtual depth map + TextureObject<float> depth_in, + TextureObject<float> depth_out, + ftl::rgbd::Camera camera) { + + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < 0 || y < 0 || x >= depth_out.width() || y >= depth_out.height()) return; + + float3 aX = make_float3(centroid_in(x,y)); + float3 nX = make_float3(normals_in(x,y)); + + float d0 = depth_in.tex2D(x, y); + depth_out(x,y) = d0; + + if (d0 > camera.minDepth && d0 < camera.maxDepth && aX.z > camera.minDepth && aX.z < camera.maxDepth) { + float3 X = camera.screenToCam((int)(x),(int)(y),d0); + + // Signed-Distance Field function + float fX = nX.x * (X.x - aX.x) + nX.y * (X.y - aX.y) + nX.z * (X.z - aX.z); + + // Calculate new point using SDF function to adjust depth (and position) + X = X - nX * fX; + depth_out(x,y) = X.z; + } +} + + +void ftl::cuda::mls_aggr_horiz( + ftl::cuda::TextureObject<uchar4> ®ion, + ftl::cuda::TextureObject<float4> &normals_in, + ftl::cuda::TextureObject<float4> &normals_out, + ftl::cuda::TextureObject<float> &depth_in, + ftl::cuda::TextureObject<float4> ¢roid_out, + ftl::cuda::TextureObject<uchar4> &colour_in, + float smoothing, + float colour_smoothing, + int radius, + const ftl::rgbd::Camera &camera, + cudaStream_t stream) { + + const dim3 gridSize((normals_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (normals_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + switch(radius) { + case 1: mls_aggr_horiz_kernel<1><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break; + case 2: mls_aggr_horiz_kernel<2><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break; + case 3: mls_aggr_horiz_kernel<3><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break; + case 5: mls_aggr_horiz_kernel<5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break; + case 10: mls_aggr_horiz_kernel<10><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break; + case 15: mls_aggr_horiz_kernel<15><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break; + case 20: mls_aggr_horiz_kernel<20><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break; + default: return; + } + cudaSafeCall( cudaGetLastError() ); + + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + #endif +} + +void ftl::cuda::mls_aggr_vert( + ftl::cuda::TextureObject<uchar4> ®ion, + ftl::cuda::TextureObject<float4> &normals_in, + ftl::cuda::TextureObject<float4> &normals_out, + ftl::cuda::TextureObject<float4> ¢roid_in, + ftl::cuda::TextureObject<float4> ¢roid_out, + ftl::cuda::TextureObject<uchar4> &colour_in, + ftl::cuda::TextureObject<float> &depth_in, + float smoothing, + float colour_smoothing, + int radius, + const ftl::rgbd::Camera &camera, + cudaStream_t stream) { + + const dim3 gridSize((normals_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (normals_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + switch(radius) { + case 1: mls_aggr_vert_kernel<1><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break; + case 2: mls_aggr_vert_kernel<2><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break; + case 3: mls_aggr_vert_kernel<3><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break; + case 5: mls_aggr_vert_kernel<5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break; + case 10: mls_aggr_vert_kernel<10><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break; + case 15: mls_aggr_vert_kernel<15><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break; + case 20: mls_aggr_vert_kernel<20><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break; + default: return; + } + cudaSafeCall( cudaGetLastError() ); + + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + #endif +} + +void ftl::cuda::mls_adjust_depth( + ftl::cuda::TextureObject<float4> &normals_in, + ftl::cuda::TextureObject<float4> ¢roid_in, + ftl::cuda::TextureObject<float> &depth_in, + ftl::cuda::TextureObject<float> &depth_out, + const ftl::rgbd::Camera &camera, + cudaStream_t stream) { + + const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + mls_adjust_depth_kernel<<<gridSize, blockSize, 0, stream>>>(normals_in, centroid_in, depth_in, depth_out, camera); cudaSafeCall( cudaGetLastError() ); diff --git a/components/operators/src/smoothing.cpp b/components/operators/src/smoothing.cpp index 8b49b9676b4cb44d1da769d0e2538ba801aae2df..a4b23ab7c46fae80a13c5c4a4dd761dd81b6d822 100644 --- a/components/operators/src/smoothing.cpp +++ b/components/operators/src/smoothing.cpp @@ -6,6 +6,7 @@ using ftl::operators::HFSmoother; using ftl::operators::SimpleMLS; using ftl::operators::ColourMLS; +using ftl::operators::AggreMLS; using ftl::operators::AdaptiveMLS; using ftl::operators::SmoothChannel; using ftl::codecs::Channel; @@ -230,6 +231,102 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::So } +// ====== Aggregating MLS ====================================================== + +AggreMLS::AggreMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { + +} + +AggreMLS::~AggreMLS() { + +} + +bool AggreMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) { + float thresh = config()->value("mls_threshold", 0.4f); + float col_smooth = config()->value("mls_colour_smoothing", 30.0f); + int iters = config()->value("mls_iterations", 3); + int radius = config()->value("mls_radius",5); + bool aggre = config()->value("aggregation", true); + + if (!in.hasChannel(Channel::Normals)) { + LOG(ERROR) << "Required normals channel missing for MLS"; + return false; + } + + auto size = in.get<GpuMat>(Channel::Depth).size(); + centroid_horiz_.create(size.height, size.width); + normals_horiz_.create(size.height, size.width); + centroid_vert_.create(size.width, size.height); + + // FIXME: Assume in and out are the same frame. + for (int i=0; i<iters; ++i) { + + if (aggre) { + ftl::cuda::mls_aggr_horiz( + in.createTexture<uchar4>(Channel::Support1), + in.createTexture<float4>(Channel::Normals), + normals_horiz_, + in.createTexture<float>(Channel::Depth), + centroid_horiz_, + in.createTexture<uchar4>(Channel::Colour), + thresh, + col_smooth, + radius, + s->parameters(), + stream + ); + + ftl::cuda::mls_aggr_vert( + in.createTexture<uchar4>(Channel::Support1), + normals_horiz_, + in.createTexture<float4>(Channel::Normals), + centroid_horiz_, + centroid_vert_, + in.createTexture<uchar4>(Channel::Colour), + in.createTexture<float>(Channel::Depth), + thresh, + col_smooth, + radius, + s->parameters(), + stream + ); + + ftl::cuda::mls_adjust_depth( + in.createTexture<float4>(Channel::Normals), + centroid_vert_, + in.createTexture<float>(Channel::Depth), + in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(size)), + s->parameters(), + stream + ); + + in.swapChannels(Channel::Depth, Channel::Depth2); + //in.swapChannels(Channel::Normals, Channel::Points); + + } else { + ftl::cuda::colour_mls_smooth_csr( + in.createTexture<uchar4>(Channel::Support1), + in.createTexture<float4>(Channel::Normals), + in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + in.createTexture<float>(Channel::Depth), + in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())), + in.createTexture<uchar4>(Channel::Colour), + thresh, + col_smooth, + false, + s->parameters(), + stream + ); + + in.swapChannels(Channel::Depth, Channel::Depth2); + in.swapChannels(Channel::Normals, Channel::Points); + } + } + + return true; +} + + // ====== Adaptive MLS ========================================================= AdaptiveMLS::AdaptiveMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) { diff --git a/components/operators/src/smoothing_cuda.hpp b/components/operators/src/smoothing_cuda.hpp index a92a49260a48dcabe115768b347dfca5cc2283ab..6681c1a2a0383fdf69c7a62ab0b78346fede5f3a 100644 --- a/components/operators/src/smoothing_cuda.hpp +++ b/components/operators/src/smoothing_cuda.hpp @@ -42,6 +42,41 @@ void colour_mls_smooth_csr( const ftl::rgbd::Camera &camera, cudaStream_t stream); +void mls_adjust_depth( + ftl::cuda::TextureObject<float4> &normals_in, + ftl::cuda::TextureObject<float4> ¢roid_in, + ftl::cuda::TextureObject<float> &depth_in, + ftl::cuda::TextureObject<float> &depth_out, + const ftl::rgbd::Camera &camera, + cudaStream_t stream); + +void mls_aggr_horiz( + ftl::cuda::TextureObject<uchar4> ®ion, + ftl::cuda::TextureObject<float4> &normals_in, + ftl::cuda::TextureObject<float4> &normals_out, + ftl::cuda::TextureObject<float> &depth_in, + ftl::cuda::TextureObject<float4> ¢roid_out, + ftl::cuda::TextureObject<uchar4> &colour_in, + float smoothing, + float colour_smoothing, + int radius, + const ftl::rgbd::Camera &camera, + cudaStream_t stream); + +void mls_aggr_vert( + ftl::cuda::TextureObject<uchar4> ®ion, + ftl::cuda::TextureObject<float4> &normals_in, + ftl::cuda::TextureObject<float4> &normals_out, + ftl::cuda::TextureObject<float4> ¢roid_in, + ftl::cuda::TextureObject<float4> ¢roid_out, + ftl::cuda::TextureObject<uchar4> &colour_in, + ftl::cuda::TextureObject<float> &depth_in, + float smoothing, + float colour_smoothing, + int radius, + const ftl::rgbd::Camera &camera, + cudaStream_t stream); + void adaptive_mls_smooth( ftl::cuda::TextureObject<float4> &normals_in, ftl::cuda::TextureObject<float4> &normals_out,