Skip to content
Snippets Groups Projects
Commit 3ec52ef6 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Merge branch 'feature/237/optmls2' into 'master'

Resolves #237 optimising MLS smoothing

Closes #237

See merge request nicolas.pope/ftl!167
parents 63ef42cf dd857ff3
No related branches found
No related tags found
1 merge request!167Resolves #237 optimising MLS smoothing
Pipeline #16420 passed
......@@ -319,7 +319,7 @@ static void run(ftl::Configurable *root) {
pipeline1->append<ftl::operators::CrossSupport>("cross");
pipeline1->append<ftl::operators::DiscontinuityMask>("discontinuity");
pipeline1->append<ftl::operators::CullDiscontinuity>("remove_discontinuity");
pipeline1->append<ftl::operators::ColourMLS>("mls"); // Perform MLS (using smoothing channel)
pipeline1->append<ftl::operators::AggreMLS>("mls"); // Perform MLS (using smoothing channel)
pipeline1->append<ftl::operators::VisCrossSupport>("viscross")->set("enabled", false);
// Alignment
......
......@@ -183,7 +183,7 @@ TextureObject<T>::TextureObject(const cv::cuda::GpuMat &d, bool interpolated) {
height_ = d.rows;
needsfree_ = false;
cvType_ = ftl::traits::OpenCVType<T>::value;
//needsdestroy_ = true;
needsdestroy_ = true;
}
#endif // __CUDACC__
......@@ -218,7 +218,7 @@ TextureObject<T>::TextureObject(const cv::cuda::PtrStepSz<T> &d) {
height_ = d.rows;
needsfree_ = false;
cvType_ = ftl::traits::OpenCVType<T>::value;
//needsdestroy_ = true;
needsdestroy_ = true;
}
/**
......@@ -252,7 +252,7 @@ TextureObject<T>::TextureObject(T *ptr, int pitch, int width, int height) {
height_ = height;
needsfree_ = false;
cvType_ = ftl::traits::OpenCVType<T>::value;
//needsdestroy_ = true;
needsdestroy_ = true;
}
template <typename T>
......@@ -285,7 +285,7 @@ TextureObject<T>::TextureObject(size_t width, size_t height) {
needsfree_ = true;
pitch2_ = pitch_ / sizeof(T);
cvType_ = ftl::traits::OpenCVType<T>::value;
//needsdestroy_ = true;
needsdestroy_ = true;
}
#ifndef __CUDACC__
......@@ -312,6 +312,7 @@ TextureObject<T>::TextureObject(const TextureObject<T> &p) {
pitch2_ = pitch_ / sizeof(T);
cvType_ = ftl::traits::OpenCVType<T>::value;
needsfree_ = false;
needsdestroy_ = false;
}
template <typename T>
......@@ -323,14 +324,17 @@ TextureObject<T>::TextureObject(TextureObject<T> &&p) {
pitch_ = p.pitch_;
pitch2_ = pitch_ / sizeof(T);
needsfree_ = p.needsfree_;
needsdestroy_ = p.needsdestroy_;
p.texobj_ = 0;
p.needsfree_ = false;
p.needsdestroy_ = false;
p.ptr_ = nullptr;
cvType_ = ftl::traits::OpenCVType<T>::value;
}
template <typename T>
TextureObject<T> &TextureObject<T>::operator=(const TextureObject<T> &p) {
free();
texobj_ = p.texobj_;
ptr_ = p.ptr_;
width_ = p.width_;
......@@ -339,11 +343,13 @@ TextureObject<T> &TextureObject<T>::operator=(const TextureObject<T> &p) {
pitch2_ = pitch_ / sizeof(T);
cvType_ = ftl::traits::OpenCVType<T>::value;
needsfree_ = false;
needsdestroy_ = false;
return *this;
}
template <typename T>
TextureObject<T> &TextureObject<T>::operator=(TextureObject<T> &&p) {
free();
texobj_ = p.texobj_;
ptr_ = p.ptr_;
width_ = p.width_;
......@@ -351,8 +357,10 @@ TextureObject<T> &TextureObject<T>::operator=(TextureObject<T> &&p) {
pitch_ = p.pitch_;
pitch2_ = pitch_ / sizeof(T);
needsfree_ = p.needsfree_;
needsdestroy_ = p.needsdestroy_;
p.texobj_ = 0;
p.needsfree_ = false;
p.needsdestroy_ = false;
p.ptr_ = nullptr;
cvType_ = ftl::traits::OpenCVType<T>::value;
return *this;
......
......@@ -79,6 +79,51 @@ class ColourMLS : public ftl::operators::Operator {
private:
};
/**
* Use cross aggregation of neighbors for faster MLS smoothing. Utilises the
* cross support regions and colour weighting. This algorithm is based upon the
* cross aggregation method in the papers below. MLS was adapted into an
* approximate form which is not identical to real MLS as the weights are not
* perfectly correct for each point. The errors are visually minimal and it
* has linear performance wrt radius, rather than quadratic.
*
* Zhang K, Lu J, Lafruit G. Cross-based local stereo matching using orthogonal
* integral images. (2009).
*
* Better explained in:
* X. Mei, X. Sun, M. Zhou et al. On building an accurate stereo matching system
* on graphics hardware. (2011).
*
* The use of colour weighting is done as in:
* C. Kuster et al. Spatio-temporal geometry fusion for multiple hybrid cameras
* using moving least squares surfaces. (2014)
*
* The above paper also indicates the importance of stopping MLS at depth
* boundaries. They use k-means clustering (K=2) of depths, but we are using
* an approach (discontinuity mask) indicated in the following paper combined
* with the cross support regions to define the extent of the MLS neighbourhood.
*
* S. Orts-Escolano et al. Holoportation: Virtual 3D teleportation in real-time.
* (2016).
*
* The use of all these approaches in this combination is novel I believe.
*
*/
class AggreMLS : public ftl::operators::Operator {
public:
explicit AggreMLS(ftl::Configurable*);
~AggreMLS();
inline Operator::Type type() const override { return Operator::Type::OneToOne; }
bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
private:
ftl::cuda::TextureObject<float4> centroid_horiz_;
ftl::cuda::TextureObject<float4> centroid_vert_;
ftl::cuda::TextureObject<float4> normals_horiz_;
};
/**
* A version of Moving Least Squares where both the smoothing factor and
* neighbourhood size are adapted over an extra large range using the colour
......
......@@ -5,6 +5,7 @@
using ftl::cuda::TextureObject;
#define T_PER_BLOCK 8
#define WARP_SIZE 32
// ===== MLS Smooth ============================================================
......@@ -229,7 +230,7 @@ __device__ inline int segmentID(int u, int v) {
* level of single pixels can be subject to noise. Colour noise should first
* be removed from the image.
*/
template <bool FILLING>
template <bool FILLING, int RADIUS>
__global__ void colour_mls_smooth_csr_kernel(
TextureObject<uchar4> region,
TextureObject<float4> normals_in,
......@@ -268,34 +269,38 @@ __device__ inline int segmentID(int u, int v) {
// TODO: Does using a fixed loop size with range test work better?
// Or with warp per pixel version, this would be less of a problem...
// TODO: Do a separate vote fill step?
for (int v=-base.z; v<=base.w; ++v) {
for (int v=-RADIUS; v<=RADIUS; ++v) {
uchar4 baseY = region.tex2D(x,y+v);
for (int u=-baseY.x; u<=baseY.y; ++u) {
#pragma unroll
for (int u=-RADIUS; u<=RADIUS; ++u) {
const float d = depth_in.tex2D(x+u, y+v);
if (d > camera.minDepth && d < camera.maxDepth) {
//if (d > camera.minDepth && d < camera.maxDepth) {
float w = (d <= camera.minDepth || d >= camera.maxDepth || u < -baseY.x || u > baseY.y || v < -base.z || v > base.z) ? 0.0f : 1.0f;
// Point and normal of neighbour
const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y)+v,d);
const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y)+v));
if (Ni.x+Ni.y+Ni.z == 0.0f) continue;
// FIXME: Ensure bad normals are removed by setting depth invalid
//if (Ni.x+Ni.y+Ni.z == 0.0f) continue;
const float4 c = colour_in.tex2D(float(x+u) + 0.5f, float(y+v) + 0.5f);
const float cw = ftl::cuda::colourWeighting(c0,c,colour_smoothing);
w *= ftl::cuda::colourWeighting(c0,c,colour_smoothing);
// Allow missing point to borrow z value
// TODO: This is a bad choice of Z! Perhaps try histogram vote approach
if (FILLING && d0 == 0.0f) X = camera.screenToCam((int)(x),(int)(y),Xi.z);
//if (FILLING && d0 == 0.0f) X = camera.screenToCam((int)(x),(int)(y),Xi.z);
// Gauss approx weighting function using point distance
const float w = ftl::cuda::spatialWeighting(X,Xi,smoothing*cw);
w = ftl::cuda::spatialWeighting(X,Xi,smoothing*w);
aX += Xi*w;
nX += Ni*w;
contrib += w;
if (FILLING && w > 0.0f && v > -base.z+1 && v < base.w-1 && u > -baseY.x+1 && u < baseY.y-1) segment_check |= segmentID(u,v);
}
//if (FILLING && w > 0.0f && v > -base.z+1 && v < base.w-1 && u > -baseY.x+1 && u < baseY.y-1) segment_check |= segmentID(u,v);
//}
}
}
......@@ -341,11 +346,294 @@ void ftl::cuda::colour_mls_smooth_csr(
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
if (filling) {
colour_mls_smooth_csr_kernel<true><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera);
colour_mls_smooth_csr_kernel<true,5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera);
} else {
colour_mls_smooth_csr_kernel<false><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera);
colour_mls_smooth_csr_kernel<false,5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera);
}
cudaSafeCall( cudaGetLastError() );
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
#endif
}
// ===== Cross Aggregate MLS ===================================================
/*
* Smooth depth map using Moving Least Squares. This version uses colour
* similarity weights to adjust the spatial smoothing factor. It also uses
* cross support windows to prevent unwanted edge crossing. This function does
* the weighted aggregation of centroids and normals in the horizontal
* direction.
*/
template <int RADIUS>
__global__ void mls_aggr_horiz_kernel(
TextureObject<uchar4> region,
TextureObject<float4> normals_in,
TextureObject<float4> normals_out,
TextureObject<float> depth_in, // Virtual depth map
TextureObject<float4> centroid_out, // Accumulated output
TextureObject<uchar4> colour_in,
float smoothing,
float colour_smoothing,
ftl::rgbd::Camera camera) {
const int x = blockIdx.x*blockDim.x + threadIdx.x;
const int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return;
float3 aX = make_float3(0.0f,0.0f,0.0f);
float3 nX = make_float3(0.0f,0.0f,0.0f);
float contrib = 0.0f;
float d0 = depth_in.tex2D(x, y);
// Note: x and y flipped as output is rotated.
centroid_out(y,x) = make_float4(0.0f);
normals_out(y,x) = normals_in(x,y);
if (d0 <= camera.minDepth || d0 >= camera.maxDepth) return;
float3 X = camera.screenToCam((int)(x),(int)(y),d0);
float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f);
// Cross-Support Neighbourhood
uchar4 base = region.tex2D(x,y);
#pragma unroll
for (int u=-RADIUS; u<=RADIUS; ++u) {
const float d = depth_in.tex2D(x+u, y);
// If outside of cross support range, set weight to 0 to ignore
float w = (d <= camera.minDepth || d >= camera.maxDepth || u < -base.x || u > base.y) ? 0.0f : 1.0f;
// Point and normal of neighbour
const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y),d);
const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y)));
// Bad or missing normals should be ignored
if (Ni.x+Ni.y+Ni.z == 0.0f) w = 0.0f;
// Gauss approx colour weighting.
const float4 c = colour_in.tex2D(float(x+u) + 0.5f, float(y) + 0.5f);
w *= ftl::cuda::colourWeighting(c0,c,colour_smoothing);
// Gauss approx weighting function using point distance
w = ftl::cuda::spatialWeighting(X,Xi,smoothing*w);
aX += Xi*w;
nX += Ni*w;
contrib += w;
}
if (contrib > 0.0f) {
nX /= contrib; // Weighted average normal
aX /= contrib; // Weighted average point (centroid)
// Note: x and y flipped since output is rotated 90 degrees.
centroid_out(y,x) = make_float4(aX, 0.0f);
normals_out(y,x) = make_float4(nX / length(nX), 0.0f);
}
}
/**
* This function does a vertical weighted aggregation of the centroids and
* normals generated by the horizontal aggregation.
*/
template <int RADIUS>
__global__ void mls_aggr_vert_kernel(
TextureObject<uchar4> region,
TextureObject<float4> normals_in,
TextureObject<float4> normals_out,
TextureObject<float4> centroid_in, // Virtual depth map
TextureObject<float4> centroid_out, // Accumulated output
TextureObject<uchar4> colour_in,
TextureObject<float> depth_in,
float smoothing,
float colour_smoothing,
ftl::rgbd::Camera camera) {
const int x = blockIdx.x*blockDim.x + threadIdx.x;
const int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return;
float3 aX = make_float3(0.0f,0.0f,0.0f);
float3 nX = make_float3(0.0f,0.0f,0.0f);
float contrib = 0.0f;
float d0 = depth_in.tex2D(x, y);
if (d0 <= camera.minDepth || d0 >= camera.maxDepth) return;
float3 X = camera.screenToCam((int)(x),(int)(y),d0);
centroid_out(x,y) = make_float4(0.0f);
normals_out(x,y) = make_float4(0.0f);
float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f);
// Cross-Support Neighbourhood
uchar4 base = region.tex2D(x,y);
#pragma unroll
for (int v=-RADIUS; v<=RADIUS; ++v) {
const float d = depth_in.tex2D(x, y+v);
const float3 Xi = camera.screenToCam(x,y+v,d);
// Note: x and y flipped, input image is rotated.
float3 Ai = make_float3(centroid_in.tex2D(y+v, x));
// If outside the cross support range, set weight to 0 to ignore
float w = (Ai.z <= camera.minDepth || Ai.z >= camera.maxDepth || v < -base.z || v > base.w) ? 0.0f : 1.0f;
// Note: x and y flipped, input image is rotated.
const float3 Ni = make_float3(normals_in.tex2D(y+v, x));
// Bad normals should be ignored.
if (Ni.x+Ni.y+Ni.z == 0.0f) w = 0.0f;
// Gauss approx colour weighting.
const float4 c = colour_in.tex2D(float(x) + 0.5f, float(y+v) + 0.5f);
w *= ftl::cuda::colourWeighting(c0,c,colour_smoothing);
// Gauss approx weighting function using point distance
w = ftl::cuda::spatialWeighting(X,Xi,smoothing*w);
aX += Ai*w;
nX += Ni*w;
contrib += w;
}
// Normalise the summed points and normals
if (contrib > 0.0f) {
nX /= contrib; // Weighted average normal
aX /= contrib; // Weighted average point (centroid)
centroid_out(x,y) = make_float4(aX, 0.0f);
normals_out(x,y) = make_float4(nX / length(nX), 0.0f);
}
}
/**
* Using the aggregated centroids and normals, calculate the signed-distance-
* field and move the depth value accordingly using the calculated normal.
*/
__global__ void mls_adjust_depth_kernel(
TextureObject<float4> normals_in,
TextureObject<float4> centroid_in, // Virtual depth map
TextureObject<float> depth_in,
TextureObject<float> depth_out,
ftl::rgbd::Camera camera) {
const int x = blockIdx.x*blockDim.x + threadIdx.x;
const int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < 0 || y < 0 || x >= depth_out.width() || y >= depth_out.height()) return;
float3 aX = make_float3(centroid_in(x,y));
float3 nX = make_float3(normals_in(x,y));
float d0 = depth_in.tex2D(x, y);
depth_out(x,y) = d0;
if (d0 > camera.minDepth && d0 < camera.maxDepth && aX.z > camera.minDepth && aX.z < camera.maxDepth) {
float3 X = camera.screenToCam((int)(x),(int)(y),d0);
// Signed-Distance Field function
float fX = nX.x * (X.x - aX.x) + nX.y * (X.y - aX.y) + nX.z * (X.z - aX.z);
// Calculate new point using SDF function to adjust depth (and position)
X = X - nX * fX;
depth_out(x,y) = X.z;
}
}
void ftl::cuda::mls_aggr_horiz(
ftl::cuda::TextureObject<uchar4> &region,
ftl::cuda::TextureObject<float4> &normals_in,
ftl::cuda::TextureObject<float4> &normals_out,
ftl::cuda::TextureObject<float> &depth_in,
ftl::cuda::TextureObject<float4> &centroid_out,
ftl::cuda::TextureObject<uchar4> &colour_in,
float smoothing,
float colour_smoothing,
int radius,
const ftl::rgbd::Camera &camera,
cudaStream_t stream) {
const dim3 gridSize((normals_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (normals_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
switch(radius) {
case 1: mls_aggr_horiz_kernel<1><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break;
case 2: mls_aggr_horiz_kernel<2><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break;
case 3: mls_aggr_horiz_kernel<3><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break;
case 5: mls_aggr_horiz_kernel<5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break;
case 10: mls_aggr_horiz_kernel<10><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break;
case 15: mls_aggr_horiz_kernel<15><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break;
case 20: mls_aggr_horiz_kernel<20><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break;
default: return;
}
cudaSafeCall( cudaGetLastError() );
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
#endif
}
void ftl::cuda::mls_aggr_vert(
ftl::cuda::TextureObject<uchar4> &region,
ftl::cuda::TextureObject<float4> &normals_in,
ftl::cuda::TextureObject<float4> &normals_out,
ftl::cuda::TextureObject<float4> &centroid_in,
ftl::cuda::TextureObject<float4> &centroid_out,
ftl::cuda::TextureObject<uchar4> &colour_in,
ftl::cuda::TextureObject<float> &depth_in,
float smoothing,
float colour_smoothing,
int radius,
const ftl::rgbd::Camera &camera,
cudaStream_t stream) {
const dim3 gridSize((normals_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (normals_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
switch(radius) {
case 1: mls_aggr_vert_kernel<1><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break;
case 2: mls_aggr_vert_kernel<2><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break;
case 3: mls_aggr_vert_kernel<3><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break;
case 5: mls_aggr_vert_kernel<5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break;
case 10: mls_aggr_vert_kernel<10><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break;
case 15: mls_aggr_vert_kernel<15><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break;
case 20: mls_aggr_vert_kernel<20><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break;
default: return;
}
cudaSafeCall( cudaGetLastError() );
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
#endif
}
void ftl::cuda::mls_adjust_depth(
ftl::cuda::TextureObject<float4> &normals_in,
ftl::cuda::TextureObject<float4> &centroid_in,
ftl::cuda::TextureObject<float> &depth_in,
ftl::cuda::TextureObject<float> &depth_out,
const ftl::rgbd::Camera &camera,
cudaStream_t stream) {
const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
mls_adjust_depth_kernel<<<gridSize, blockSize, 0, stream>>>(normals_in, centroid_in, depth_in, depth_out, camera);
cudaSafeCall( cudaGetLastError() );
......
......@@ -6,6 +6,7 @@
using ftl::operators::HFSmoother;
using ftl::operators::SimpleMLS;
using ftl::operators::ColourMLS;
using ftl::operators::AggreMLS;
using ftl::operators::AdaptiveMLS;
using ftl::operators::SmoothChannel;
using ftl::codecs::Channel;
......@@ -230,6 +231,102 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::So
}
// ====== Aggregating MLS ======================================================
AggreMLS::AggreMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
}
AggreMLS::~AggreMLS() {
}
bool AggreMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
float thresh = config()->value("mls_threshold", 0.4f);
float col_smooth = config()->value("mls_colour_smoothing", 30.0f);
int iters = config()->value("mls_iterations", 3);
int radius = config()->value("mls_radius",5);
bool aggre = config()->value("aggregation", true);
if (!in.hasChannel(Channel::Normals)) {
LOG(ERROR) << "Required normals channel missing for MLS";
return false;
}
auto size = in.get<GpuMat>(Channel::Depth).size();
centroid_horiz_.create(size.height, size.width);
normals_horiz_.create(size.height, size.width);
centroid_vert_.create(size.width, size.height);
// FIXME: Assume in and out are the same frame.
for (int i=0; i<iters; ++i) {
if (aggre) {
ftl::cuda::mls_aggr_horiz(
in.createTexture<uchar4>(Channel::Support1),
in.createTexture<float4>(Channel::Normals),
normals_horiz_,
in.createTexture<float>(Channel::Depth),
centroid_horiz_,
in.createTexture<uchar4>(Channel::Colour),
thresh,
col_smooth,
radius,
s->parameters(),
stream
);
ftl::cuda::mls_aggr_vert(
in.createTexture<uchar4>(Channel::Support1),
normals_horiz_,
in.createTexture<float4>(Channel::Normals),
centroid_horiz_,
centroid_vert_,
in.createTexture<uchar4>(Channel::Colour),
in.createTexture<float>(Channel::Depth),
thresh,
col_smooth,
radius,
s->parameters(),
stream
);
ftl::cuda::mls_adjust_depth(
in.createTexture<float4>(Channel::Normals),
centroid_vert_,
in.createTexture<float>(Channel::Depth),
in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(size)),
s->parameters(),
stream
);
in.swapChannels(Channel::Depth, Channel::Depth2);
//in.swapChannels(Channel::Normals, Channel::Points);
} else {
ftl::cuda::colour_mls_smooth_csr(
in.createTexture<uchar4>(Channel::Support1),
in.createTexture<float4>(Channel::Normals),
in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
in.createTexture<float>(Channel::Depth),
in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
in.createTexture<uchar4>(Channel::Colour),
thresh,
col_smooth,
false,
s->parameters(),
stream
);
in.swapChannels(Channel::Depth, Channel::Depth2);
in.swapChannels(Channel::Normals, Channel::Points);
}
}
return true;
}
// ====== Adaptive MLS =========================================================
AdaptiveMLS::AdaptiveMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
......
......@@ -42,6 +42,41 @@ void colour_mls_smooth_csr(
const ftl::rgbd::Camera &camera,
cudaStream_t stream);
void mls_adjust_depth(
ftl::cuda::TextureObject<float4> &normals_in,
ftl::cuda::TextureObject<float4> &centroid_in,
ftl::cuda::TextureObject<float> &depth_in,
ftl::cuda::TextureObject<float> &depth_out,
const ftl::rgbd::Camera &camera,
cudaStream_t stream);
void mls_aggr_horiz(
ftl::cuda::TextureObject<uchar4> &region,
ftl::cuda::TextureObject<float4> &normals_in,
ftl::cuda::TextureObject<float4> &normals_out,
ftl::cuda::TextureObject<float> &depth_in,
ftl::cuda::TextureObject<float4> &centroid_out,
ftl::cuda::TextureObject<uchar4> &colour_in,
float smoothing,
float colour_smoothing,
int radius,
const ftl::rgbd::Camera &camera,
cudaStream_t stream);
void mls_aggr_vert(
ftl::cuda::TextureObject<uchar4> &region,
ftl::cuda::TextureObject<float4> &normals_in,
ftl::cuda::TextureObject<float4> &normals_out,
ftl::cuda::TextureObject<float4> &centroid_in,
ftl::cuda::TextureObject<float4> &centroid_out,
ftl::cuda::TextureObject<uchar4> &colour_in,
ftl::cuda::TextureObject<float> &depth_in,
float smoothing,
float colour_smoothing,
int radius,
const ftl::rgbd::Camera &camera,
cudaStream_t stream);
void adaptive_mls_smooth(
ftl::cuda::TextureObject<float4> &normals_in,
ftl::cuda::TextureObject<float4> &normals_out,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment