Skip to content
Snippets Groups Projects
Commit e1bcc046 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

WIP: Initial move points cuda kernel

parent d1e3fc6b
No related branches found
No related tags found
2 merge requests!116Implements #133 point alignment,!114Ongoing #133 improvements
This commit is part of merge request !114. Comments created here will be created in the context of that merge request.
...@@ -31,7 +31,11 @@ __global__ void correspondence_energy_vector_kernel( ...@@ -31,7 +31,11 @@ __global__ void correspondence_energy_vector_kernel(
const int y = blockIdx.y*blockDim.y + threadIdx.y; const int y = blockIdx.y*blockDim.y + threadIdx.y;
const float3 world1 = make_float3(p1.tex2D(x, y)); const float3 world1 = make_float3(p1.tex2D(x, y));
if (world1.x == MINF) return; if (world1.x == MINF) {
vout(x,y) = make_float4(0.0f);
eout(x,y) = 0.0f;
return;
}
const float3 camPos2 = pose2 * world1; const float3 camPos2 = pose2 * world1;
const uint2 screen2 = cam2.camToScreen<uint2>(camPos2); const uint2 screen2 = cam2.camToScreen<uint2>(camPos2);
......
...@@ -27,7 +27,7 @@ bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { ...@@ -27,7 +27,7 @@ bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
//for (int i=0; i<2; ++i) { //for (int i=0; i<2; ++i) {
_phase1(fs, stream); _phase1(fs, stream);
//for (int j=0; j<3; ++j) { //for (int j=0; j<3; ++j) {
// _phase2(fs); _phase2(fs, 0.1f, stream);
//} //}
// TODO: Break if no time left // TODO: Break if no time left
...@@ -118,12 +118,21 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { ...@@ -118,12 +118,21 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
return true; return true;
} }
bool ILW::_phase2(ftl::rgbd::FrameSet &fs) { bool ILW::_phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream) {
// Run energies and motion kernel // Run energies and motion kernel
// Smooth vectors across a window and iteratively // Smooth vectors across a window and iteratively
// strongly disagreeing vectors should cancel out // strongly disagreeing vectors should cancel out
// A weak vector is overriden by a stronger one. // A weak vector is overriden by a stronger one.
for (auto &f : fs.frames) {
ftl::cuda::move_points(
f.getTexture<float4>(Channel::Points),
f.getTexture<float4>(Channel::EnergyVector),
rate,
stream
);
}
return true; return true;
} }
...@@ -15,6 +15,9 @@ __device__ inline float warpMax(float e) { ...@@ -15,6 +15,9 @@ __device__ inline float warpMax(float e) {
return e; return e;
} }
#define COR_WIN_RADIUS 16
#define COR_WIN_SIZE (COR_WIN_RADIUS * COR_WIN_RADIUS)
__global__ void correspondence_energy_vector_kernel( __global__ void correspondence_energy_vector_kernel(
TextureObject<float4> p1, TextureObject<float4> p1,
TextureObject<float4> p2, TextureObject<float4> p2,
...@@ -33,34 +36,39 @@ __global__ void correspondence_energy_vector_kernel( ...@@ -33,34 +36,39 @@ __global__ void correspondence_energy_vector_kernel(
const float3 world1 = make_float3(p1.tex2D(x, y)); const float3 world1 = make_float3(p1.tex2D(x, y));
if (world1.x == MINF) return; if (world1.x == MINF) return;
const float3 camPos2 = pose2 * world1; const float3 camPos2 = pose2 * world1;
const uint2 screen2 = cam2.camToScreen<uint2>(camPos2); const uint2 screen2 = cam2.camToScreen<uint2>(camPos2);
const int upsample = 8; float bestconf = 0.0f;
float3 bestpoint;
// Project to p2 using cam2 // Project to p2 using cam2
// Each thread takes a possible correspondence and calculates a weighting // Each thread takes a possible correspondence and calculates a weighting
const int lane = tid % WARP_SIZE; const int lane = tid % WARP_SIZE;
for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { for (int i=lane; i<COR_WIN_SIZE; i+=WARP_SIZE) {
const float u = (i % upsample) - (upsample / 2); const float u = (i % COR_WIN_RADIUS) - (COR_WIN_RADIUS / 2);
const float v = (i / upsample) - (upsample / 2); const float v = (i / COR_WIN_RADIUS) - (COR_WIN_RADIUS / 2);
const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v)); const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v));
if (world2.x == MINF) continue; if (world2.x == MINF) continue;
// Determine degree of correspondence // Determine degree of correspondence
const float confidence = 1.0f / length(world1 - world2); const float confidence = 1.0f / length(world1 - world2);
const float maxconf = warpMax(confidence);
if (confidence > bestconf) {
// This thread has best confidence value bestpoint = world2;
if (maxconf == confidence) { bestconf = confidence;
vout(x,y) = vout.tex2D(x, y) + make_float4(
(world1.x - world2.x) * maxconf,
(world1.y - world2.y) * maxconf,
(world1.z - world2.z) * maxconf,
maxconf);
eout(x,y) = eout.tex2D(x,y) + length(world1 - world2)*maxconf;
} }
} }
const float maxconf = warpMax(bestconf);
if (maxconf == bestconf && maxconf > 0.0f) {
vout(x,y) = vout.tex2D(x, y) + make_float4(
(world1.x - bestpoint.x) * maxconf,
(world1.y - bestpoint.y) * maxconf,
(world1.z - bestpoint.z) * maxconf,
maxconf);
eout(x,y) = eout.tex2D(x,y) + length(world1 - bestpoint)*maxconf;
}
} }
void ftl::cuda::correspondence_energy_vector( void ftl::cuda::correspondence_energy_vector(
...@@ -84,3 +92,34 @@ void ftl::cuda::correspondence_energy_vector( ...@@ -84,3 +92,34 @@ void ftl::cuda::correspondence_energy_vector(
); );
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
//==============================================================================
__global__ void move_points_kernel(
ftl::cuda::TextureObject<float4> p,
ftl::cuda::TextureObject<float4> v,
float rate) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < p.width() && y < p.height()) {
p(x,y) = p(x,y) + rate * v.tex2D((int)x,(int)y);
}
}
void ftl::cuda::move_points(
ftl::cuda::TextureObject<float4> &p,
ftl::cuda::TextureObject<float4> &v,
float rate,
cudaStream_t stream) {
const dim3 gridSize((p.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (p.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
move_points_kernel<<<gridSize, blockSize, 0, stream>>>(p,v,rate);
cudaSafeCall( cudaGetLastError() );
}
...@@ -56,7 +56,7 @@ class ILW : public ftl::Configurable { ...@@ -56,7 +56,7 @@ class ILW : public ftl::Configurable {
/* /*
* Calculate energies and move the points. * Calculate energies and move the points.
*/ */
bool _phase2(ftl::rgbd::FrameSet &fs); bool _phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream);
std::vector<detail::ILWData> data_; std::vector<detail::ILWData> data_;
}; };
......
...@@ -20,6 +20,13 @@ void correspondence_energy_vector( ...@@ -20,6 +20,13 @@ void correspondence_energy_vector(
cudaStream_t stream cudaStream_t stream
); );
void move_points(
ftl::cuda::TextureObject<float4> &p,
ftl::cuda::TextureObject<float4> &v,
float rate,
cudaStream_t stream
);
} }
} }
......
...@@ -214,8 +214,10 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda ...@@ -214,8 +214,10 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
} else { } else {
if (ftl::rgbd::isFloatChannel(chan)) { if (ftl::rgbd::isFloatChannel(chan)) {
out.create<GpuMat>(chan, Format<float>(camera.width, camera.height)); out.create<GpuMat>(chan, Format<float>(camera.width, camera.height));
out.get<GpuMat>(chan).setTo(cv::Scalar(0.0f), cvstream);
} else { } else {
out.create<GpuMat>(chan, Format<uchar4>(camera.width, camera.height)); out.create<GpuMat>(chan, Format<uchar4>(camera.width, camera.height));
out.get<GpuMat>(chan).setTo(cv::Scalar(76,76,76,255), cvstream);
} }
renderChannel(params, out, chan, stream); renderChannel(params, out, chan, stream);
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment