Skip to content
Snippets Groups Projects
Commit ad97c03a authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Merge branch 'feature/223/presmooth' into 'master'

Implements #223 colour based smooth, but is too simplistic

Closes #223

See merge request nicolas.pope/ftl!153
parents f19008a6 0cf5c92c
No related branches found
No related tags found
1 merge request!153Implements #223 colour based smooth, but is too simplistic
Pipeline #16059 canceled
......@@ -20,6 +20,7 @@ set(REPSRC
src/ilw/fill.cu
src/ilw/discontinuity.cu
src/ilw/correspondence.cu
src/filters/smoothing.cu
)
add_executable(ftl-reconstruct ${REPSRC})
......
#include "smoothing.hpp"
#include <ftl/cuda/weighting.hpp>
#define T_PER_BLOCK 8
template <int RADIUS>
__global__ void depth_smooth_kernel(
ftl::cuda::TextureObject<float> depth_in,
ftl::cuda::TextureObject<uchar4> colour_in,
ftl::cuda::TextureObject<float> depth_out,
ftl::rgbd::Camera camera,
float factor, float thresh) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < depth_in.width() && y < depth_in.height()) {
float d = depth_in.tex2D((int)x,(int)y);
depth_out(x,y) = 0.0f;
if (d < camera.minDepth || d > camera.maxDepth) return;
uchar4 c = colour_in.tex2D((int)x, (int)y);
float3 pos = camera.screenToCam(x,y,d);
float contrib = 0.0f;
float new_depth = 0.0f;
for (int v=-RADIUS; v<=RADIUS; ++v) {
for (int u=-RADIUS; u<=RADIUS; ++u) {
// Get colour difference to center
const uchar4 cN = colour_in.tex2D((int)x+u, (int)y+v);
const float colourWeight = ftl::cuda::colourWeighting(c, cN, thresh);
const float dN = depth_in.tex2D((int)x + u, (int)y + v);
const float3 posN = camera.screenToCam(x+u, y+v, dN);
const float weight = ftl::cuda::spatialWeighting(posN, pos, factor * colourWeight);
contrib += weight;
new_depth += dN * weight;
}
}
if (contrib > 0.0f) {
depth_out(x,y) = new_depth / contrib;
}
}
}
void ftl::cuda::depth_smooth(
ftl::cuda::TextureObject<float> &depth_in,
ftl::cuda::TextureObject<uchar4> &colour_in,
ftl::cuda::TextureObject<float> &depth_out,
const ftl::rgbd::Camera &camera,
int radius, float factor, float thresh, int iters, cudaStream_t stream) {
const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
for (int n=0; n<iters; ++n) {
switch (radius) {
case 5 : depth_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
case 4 : depth_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
case 3 : depth_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
case 2 : depth_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
case 1 : depth_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
default: break;
}
cudaSafeCall( cudaGetLastError() );
switch (radius) {
case 5 : depth_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
case 4 : depth_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
case 3 : depth_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
case 2 : depth_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
case 1 : depth_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
default: break;
}
cudaSafeCall( cudaGetLastError() );
}
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
#endif
}
#ifndef _FTL_CUDA_SMOOTHING_HPP_
#define _FTL_CUDA_SMOOTHING_HPP_
#include <ftl/rgbd/camera.hpp>
#include <ftl/cuda_common.hpp>
namespace ftl {
namespace cuda {
void depth_smooth(
ftl::cuda::TextureObject<float> &depth_in,
ftl::cuda::TextureObject<uchar4> &colour_in,
ftl::cuda::TextureObject<float> &depth_out,
const ftl::rgbd::Camera &camera,
int radius, float factor, float thresh, int iters,
cudaStream_t stream);
}
}
#endif // _FTL_CUDA_SMOOTHING_HPP_
#include "ilw.hpp"
#include <ftl/utility/matrix_conversion.hpp>
#include <ftl/rgbd/source.hpp>
#include <ftl/cuda/points.hpp>
#include <loguru.hpp>
#include "ilw_cuda.hpp"
using ftl::ILW;
using ftl::detail::ILWData;
using ftl::codecs::Channel;
using ftl::codecs::Channels;
using ftl::rgbd::Format;
using cv::cuda::GpuMat;
ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) {
}
ILW::~ILW() {
}
bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
_phase0(fs, stream);
//for (int i=0; i<2; ++i) {
_phase1(fs, stream);
//for (int j=0; j<3; ++j) {
// _phase2(fs);
//}
// TODO: Break if no time left
//}
return true;
}
bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
// Make points channel...
for (size_t i=0; i<fs.frames.size(); ++i) {
auto &f = fs.frames[i];
auto *s = fs.sources[i];
if (f.empty(Channel::Depth + Channel::Colour)) {
LOG(ERROR) << "Missing required channel";
continue;
}
auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size()));
auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse());
ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, stream);
// TODO: Create energy vector texture and clear it
// Create energy and clear it
// Convert colour from BGR to BGRA if needed
if (f.get<GpuMat>(Channel::Colour).type() == CV_8UC3) {
// Convert to 4 channel colour
auto &col = f.get<GpuMat>(Channel::Colour);
GpuMat tmp(col.size(), CV_8UC4);
cv::cuda::swap(col, tmp);
cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA);
}
f.createTexture<float4>(Channel::EnergyVector, Format<float4>(f.get<GpuMat>(Channel::Colour).size()));
f.createTexture<float>(Channel::Energy, Format<float>(f.get<GpuMat>(Channel::Colour).size()));
f.createTexture<uchar4>(Channel::Colour);
cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
f.get<GpuMat>(Channel::EnergyVector).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
f.get<GpuMat>(Channel::Energy).setTo(cv::Scalar(0.0f), cvstream);
}
return true;
}
bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
// Run correspondence kernel to create an energy vector
// For each camera combination
for (size_t i=0; i<fs.frames.size(); ++i) {
for (size_t j=0; j<fs.frames.size(); ++j) {
if (i == j) continue;
LOG(INFO) << "Running phase1";
auto &f1 = fs.frames[i];
auto &f2 = fs.frames[j];
//auto s1 = fs.frames[i];
auto s2 = fs.sources[j];
auto pose = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse());
try {
//Calculate energy vector to best correspondence
ftl::cuda::correspondence_energy_vector(
f1.getTexture<float4>(Channel::Points),
f2.getTexture<float4>(Channel::Points),
f1.getTexture<uchar4>(Channel::Colour),
f2.getTexture<uchar4>(Channel::Colour),
// TODO: Add normals and other things...
f1.getTexture<float4>(Channel::EnergyVector),
f1.getTexture<float>(Channel::Energy),
pose,
s2->parameters(),
stream
);
} catch (ftl::exception &e) {
LOG(ERROR) << "Exception in correspondence: " << e.what();
}
LOG(INFO) << "Correspondences done... " << i;
}
}
return true;
}
bool ILW::_phase2(ftl::rgbd::FrameSet &fs) {
// Run energies and motion kernel
return true;
}
#include "ilw_cuda.hpp"
using ftl::cuda::TextureObject;
using ftl::rgbd::Camera;
#define WARP_SIZE 32
#define T_PER_BLOCK 8
#define FULL_MASK 0xffffffff
__device__ inline float warpMax(float e) {
for (int i = WARP_SIZE/2; i > 0; i /= 2) {
const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
e = max(e, other);
}
return e;
}
__global__ void correspondence_energy_vector_kernel(
TextureObject<float4> p1,
TextureObject<float4> p2,
TextureObject<uchar4> c1,
TextureObject<uchar4> c2,
TextureObject<float4> vout,
TextureObject<float> eout,
float4x4 pose2, // Inverse
Camera cam2) {
// Each warp picks point in p1
const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
const int y = blockIdx.y*blockDim.y + threadIdx.y;
const float3 world1 = make_float3(p1.tex2D(x, y));
if (world1.x == MINF) {
vout(x,y) = make_float4(0.0f);
eout(x,y) = 0.0f;
return;
}
const float3 camPos2 = pose2 * world1;
const uint2 screen2 = cam2.camToScreen<uint2>(camPos2);
const int upsample = 8;
// Project to p2 using cam2
// Each thread takes a possible correspondence and calculates a weighting
const int lane = tid % WARP_SIZE;
for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
const float u = (i % upsample) - (upsample / 2);
const float v = (i / upsample) - (upsample / 2);
const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v));
if (world2.x == MINF) continue;
// Determine degree of correspondence
const float confidence = 1.0f / length(world1 - world2);
const float maxconf = warpMax(confidence);
// This thread has best confidence value
if (maxconf == confidence) {
vout(x,y) = vout.tex2D(x, y) + make_float4(
(world1.x - world2.x) * maxconf,
(world1.y - world2.y) * maxconf,
(world1.z - world2.z) * maxconf,
maxconf);
eout(x,y) = eout.tex2D(x,y) + length(world1 - world2)*maxconf;
}
}
}
void ftl::cuda::correspondence_energy_vector(
TextureObject<float4> &p1,
TextureObject<float4> &p2,
TextureObject<uchar4> &c1,
TextureObject<uchar4> &c2,
TextureObject<float4> &vout,
TextureObject<float> &eout,
float4x4 &pose2,
const Camera &cam2,
cudaStream_t stream) {
const dim3 gridSize((p1.width() + 2 - 1)/2, (p1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK);
printf("COR SIZE %d,%d\n", p1.width(), p1.height());
correspondence_energy_vector_kernel<<<gridSize, blockSize, 0, stream>>>(
p1, p2, c1, c2, vout, eout, pose2, cam2
);
cudaSafeCall( cudaGetLastError() );
}
......@@ -30,6 +30,7 @@
#include <opencv2/opencv.hpp>
#include <ftl/net/universe.hpp>
#include "filters/smoothing.hpp"
#include <ftl/registration.hpp>
#include <cuda_profiler_api.h>
......@@ -244,9 +245,11 @@ static void run(ftl::Configurable *root) {
bool busy = false;
auto *filter = ftl::config::create<ftl::Configurable>(root, "filters");
group->setLatency(4);
group->setName("ReconGroup");
group->sync([splat,virt,&busy,&slave,&scene_A,&scene_B,&align,controls](ftl::rgbd::FrameSet &fs) -> bool {
group->sync([splat,virt,&busy,&slave,&scene_A,&scene_B,&align,controls,filter](ftl::rgbd::FrameSet &fs) -> bool {
//cudaSetDevice(scene->getCUDADevice());
//if (slave.isPaused()) return true;
......@@ -261,13 +264,46 @@ static void run(ftl::Configurable *root) {
// Swap the entire frameset to allow rapid return
fs.swapTo(scene_A);
ftl::pool.push([&scene_B,&scene_A,&busy,&slave,&align](int id) {
ftl::pool.push([&scene_B,&scene_A,&busy,&slave,&align, filter](int id) {
//cudaSetDevice(scene->getCUDADevice());
// TODO: Release frameset here...
//cudaSafeCall(cudaStreamSynchronize(scene->getIntegrationStream()));
UNIQUE_LOCK(scene_A.mtx, lk);
cv::cuda::GpuMat tmp;
float factor = filter->value("smooth_factor", 0.4f);
float colour_limit = filter->value("colour_limit", 30.0f);
bool do_smooth = filter->value("pre_smooth", false);
int iters = filter->value("iterations", 3);
int radius = filter->value("radius", 5);
if (do_smooth) {
// Presmooth...
for (int i=0; i<scene_A.frames.size(); ++i) {
auto &f = scene_A.frames[i];
auto s = scene_A.sources[i];
// Convert colour from BGR to BGRA if needed
if (f.get<cv::cuda::GpuMat>(Channel::Colour).type() == CV_8UC3) {
//cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
// Convert to 4 channel colour
auto &col = f.get<cv::cuda::GpuMat>(Channel::Colour);
tmp.create(col.size(), CV_8UC4);
cv::cuda::swap(col, tmp);
cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA, 0);
}
ftl::cuda::depth_smooth(
f.createTexture<float>(Channel::Depth),
f.createTexture<uchar4>(Channel::Colour),
f.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(f.get<cv::cuda::GpuMat>(Channel::Depth).size())),
s->parameters(),
radius, factor, colour_limit, iters, 0
);
}
}
// Send all frames to GPU, block until done?
//scene_A.upload(Channel::Colour + Channel::Depth); // TODO: (Nick) Add scene stream.
align->process(scene_A);
......
......@@ -45,11 +45,11 @@ __global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output,
output(x,y) = make_float4(0, 0, 0, 0);
if(x > 0 && x < input.width()-1 && y > 0 && y < input.height()-1) {
const float3 CC = camera.screenToCam(x+0, y+0, (float)input.tex2D((int)x+0, (int)y+0) / 10000.0f);
const float3 PC = camera.screenToCam(x+0, y+1, (float)input.tex2D((int)x+0, (int)y+1) / 10000.0f);
const float3 CP = camera.screenToCam(x+1, y+0, (float)input.tex2D((int)x+1, (int)y+0) / 10000.0f);
const float3 MC = camera.screenToCam(x+0, y-1, (float)input.tex2D((int)x+0, (int)y-1) / 10000.0f);
const float3 CM = camera.screenToCam(x-1, y+0, (float)input.tex2D((int)x-1, (int)y+0) / 10000.0f);
const float3 CC = camera.screenToCam(x+0, y+0, (float)input.tex2D((int)x+0, (int)y+0) / 100000.0f);
const float3 PC = camera.screenToCam(x+0, y+1, (float)input.tex2D((int)x+0, (int)y+1) / 100000.0f);
const float3 CP = camera.screenToCam(x+1, y+0, (float)input.tex2D((int)x+1, (int)y+0) / 100000.0f);
const float3 MC = camera.screenToCam(x+0, y-1, (float)input.tex2D((int)x+0, (int)y-1) / 100000.0f);
const float3 CM = camera.screenToCam(x-1, y+0, (float)input.tex2D((int)x-1, (int)y+0) / 100000.0f);
//if(CC.z < && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) {
if (isValid(camera,CC) && isValid(camera,PC) && isValid(camera,CP) && isValid(camera,MC) && isValid(camera,CM)) {
......@@ -118,7 +118,7 @@ __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
if(x >= depth.width() || y >= depth.height()) return;
const float3 p0 = camera.screenToCam(x,y, (float)depth.tex2D((int)x,(int)y) / 10000.0f);
const float3 p0 = camera.screenToCam(x,y, (float)depth.tex2D((int)x,(int)y) / 100000.0f);
float3 nsum = make_float3(0.0f);
float contrib = 0.0f;
......@@ -128,7 +128,7 @@ __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
for (int v=-RADIUS; v<=RADIUS; ++v) {
for (int u=-RADIUS; u<=RADIUS; ++u) {
const float3 p = camera.screenToCam(x+u,y+v, (float)depth.tex2D((int)x+u,(int)y+v) / 10000.0f);
const float3 p = camera.screenToCam(x+u,y+v, (float)depth.tex2D((int)x+u,(int)y+v) / 100000.0f);
if (p.z < camera.minDepth || p.z > camera.maxDepth) continue;
const float s = ftl::cuda::spatialWeighting(p0, p, smoothing);
//const float s = 1.0f;
......
......@@ -71,7 +71,7 @@ __global__ void reprojection_kernel(
const int x = (blockIdx.x*blockDim.x + threadIdx.x);
const int y = blockIdx.y*blockDim.y + threadIdx.y;
const float d = (float)depth_in.tex2D((int)x, (int)y) / 10000.0f;
const float d = (float)depth_in.tex2D((int)x, (int)y) / 100000.0f;
if (d < params.camera.minDepth || d > params.camera.maxDepth) return;
const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d);
......@@ -192,7 +192,7 @@ __global__ void reprojection_kernel(
const int x = (blockIdx.x*blockDim.x + threadIdx.x);
const int y = blockIdx.y*blockDim.y + threadIdx.y;
const float d = (float)depth_in.tex2D((int)x, (int)y) / 10000.0f;
const float d = (float)depth_in.tex2D((int)x, (int)y) / 100000.0f;
if (d < params.camera.minDepth || d > params.camera.maxDepth) return;
const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d);
......
......@@ -482,7 +482,7 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
ftl::cuda::normals(accum_.createTexture<float4>(Channel::Normals, Format<float4>(camera.width, camera.height)),
temp_.createTexture<float4>(Channel::Normals),
temp_.getTexture<int>(Channel::Depth2),
1, 0.02f,
value("normal_radius", 1), value("normal_smoothing", 0.02f),
params_.camera, params_.m_viewMatrix.getFloat3x3(), params_.m_viewMatrixInverse.getFloat3x3(), stream_);
// Reprojection of colours onto surface
......@@ -491,7 +491,7 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
if (chan == Channel::Depth)
{
// Just convert int depth to float depth
temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 10000.0f, cvstream);
temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 100000.0f, cvstream);
} else if (chan == Channel::Normals) {
// Visualise normals to RGBA
out.create<GpuMat>(Channel::Normals, Format<uchar4>(camera.width, camera.height)).setTo(cv::Scalar(0,0,0,0), cvstream);
......
......@@ -146,7 +146,7 @@ float getZAtCoordinate(const float3 &barycentricCoord, const float (&tri)[3]) {
if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
float new_depth = getZAtCoordinate(baryCentricCoordinate, d);
atomicMin(&depth_out(sx,sy), int(new_depth*10000.0f));
atomicMin(&depth_out(sx,sy), int(new_depth*100000.0f));
}
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment