Skip to content
Snippets Groups Projects
Commit e1bcc046 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

WIP: Initial move points cuda kernel

parent d1e3fc6b
No related branches found
No related tags found
2 merge requests!116Implements #133 point alignment,!114Ongoing #133 improvements
This commit is part of merge request !114. Comments created here will be created in the context of that merge request.
......@@ -31,7 +31,11 @@ __global__ void correspondence_energy_vector_kernel(
const int y = blockIdx.y*blockDim.y + threadIdx.y;
const float3 world1 = make_float3(p1.tex2D(x, y));
if (world1.x == MINF) return;
if (world1.x == MINF) {
vout(x,y) = make_float4(0.0f);
eout(x,y) = 0.0f;
return;
}
const float3 camPos2 = pose2 * world1;
const uint2 screen2 = cam2.camToScreen<uint2>(camPos2);
......
......@@ -27,7 +27,7 @@ bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
//for (int i=0; i<2; ++i) {
_phase1(fs, stream);
//for (int j=0; j<3; ++j) {
// _phase2(fs);
_phase2(fs, 0.1f, stream);
//}
// TODO: Break if no time left
......@@ -118,12 +118,21 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
return true;
}
bool ILW::_phase2(ftl::rgbd::FrameSet &fs) {
bool ILW::_phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream) {
// Run energies and motion kernel
// Smooth vectors across a window and iteratively
// strongly disagreeing vectors should cancel out
// A weak vector is overriden by a stronger one.
for (auto &f : fs.frames) {
ftl::cuda::move_points(
f.getTexture<float4>(Channel::Points),
f.getTexture<float4>(Channel::EnergyVector),
rate,
stream
);
}
return true;
}
......@@ -15,6 +15,9 @@ __device__ inline float warpMax(float e) {
return e;
}
#define COR_WIN_RADIUS 16
#define COR_WIN_SIZE (COR_WIN_RADIUS * COR_WIN_RADIUS)
__global__ void correspondence_energy_vector_kernel(
TextureObject<float4> p1,
TextureObject<float4> p2,
......@@ -33,34 +36,39 @@ __global__ void correspondence_energy_vector_kernel(
const float3 world1 = make_float3(p1.tex2D(x, y));
if (world1.x == MINF) return;
const float3 camPos2 = pose2 * world1;
const uint2 screen2 = cam2.camToScreen<uint2>(camPos2);
const int upsample = 8;
const uint2 screen2 = cam2.camToScreen<uint2>(camPos2);
float bestconf = 0.0f;
float3 bestpoint;
// Project to p2 using cam2
// Each thread takes a possible correspondence and calculates a weighting
const int lane = tid % WARP_SIZE;
for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
const float u = (i % upsample) - (upsample / 2);
const float v = (i / upsample) - (upsample / 2);
for (int i=lane; i<COR_WIN_SIZE; i+=WARP_SIZE) {
const float u = (i % COR_WIN_RADIUS) - (COR_WIN_RADIUS / 2);
const float v = (i / COR_WIN_RADIUS) - (COR_WIN_RADIUS / 2);
const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v));
if (world2.x == MINF) continue;
// Determine degree of correspondence
const float confidence = 1.0f / length(world1 - world2);
const float maxconf = warpMax(confidence);
// This thread has best confidence value
if (maxconf == confidence) {
vout(x,y) = vout.tex2D(x, y) + make_float4(
(world1.x - world2.x) * maxconf,
(world1.y - world2.y) * maxconf,
(world1.z - world2.z) * maxconf,
maxconf);
eout(x,y) = eout.tex2D(x,y) + length(world1 - world2)*maxconf;
if (confidence > bestconf) {
bestpoint = world2;
bestconf = confidence;
}
}
const float maxconf = warpMax(bestconf);
if (maxconf == bestconf && maxconf > 0.0f) {
vout(x,y) = vout.tex2D(x, y) + make_float4(
(world1.x - bestpoint.x) * maxconf,
(world1.y - bestpoint.y) * maxconf,
(world1.z - bestpoint.z) * maxconf,
maxconf);
eout(x,y) = eout.tex2D(x,y) + length(world1 - bestpoint)*maxconf;
}
}
void ftl::cuda::correspondence_energy_vector(
......@@ -84,3 +92,34 @@ void ftl::cuda::correspondence_energy_vector(
);
cudaSafeCall( cudaGetLastError() );
}
//==============================================================================
__global__ void move_points_kernel(
ftl::cuda::TextureObject<float4> p,
ftl::cuda::TextureObject<float4> v,
float rate) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < p.width() && y < p.height()) {
p(x,y) = p(x,y) + rate * v.tex2D((int)x,(int)y);
}
}
void ftl::cuda::move_points(
ftl::cuda::TextureObject<float4> &p,
ftl::cuda::TextureObject<float4> &v,
float rate,
cudaStream_t stream) {
const dim3 gridSize((p.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (p.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
move_points_kernel<<<gridSize, blockSize, 0, stream>>>(p,v,rate);
cudaSafeCall( cudaGetLastError() );
}
......@@ -56,7 +56,7 @@ class ILW : public ftl::Configurable {
/*
* Calculate energies and move the points.
*/
bool _phase2(ftl::rgbd::FrameSet &fs);
bool _phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream);
std::vector<detail::ILWData> data_;
};
......
......@@ -20,6 +20,13 @@ void correspondence_energy_vector(
cudaStream_t stream
);
void move_points(
ftl::cuda::TextureObject<float4> &p,
ftl::cuda::TextureObject<float4> &v,
float rate,
cudaStream_t stream
);
}
}
......
......@@ -214,8 +214,10 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
} else {
if (ftl::rgbd::isFloatChannel(chan)) {
out.create<GpuMat>(chan, Format<float>(camera.width, camera.height));
out.get<GpuMat>(chan).setTo(cv::Scalar(0.0f), cvstream);
} else {
out.create<GpuMat>(chan, Format<uchar4>(camera.width, camera.height));
out.get<GpuMat>(chan).setTo(cv::Scalar(76,76,76,255), cvstream);
}
renderChannel(params, out, chan, stream);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment