Skip to content
Snippets Groups Projects
Commit 317b2094 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Use 5x5 proper census for SGM

parent d585ac50
No related branches found
No related tags found
No related merge requests found
......@@ -21,14 +21,15 @@ namespace sgm {
namespace {
static constexpr int WINDOW_WIDTH = 9;
static constexpr int WINDOW_HEIGHT = 7;
static constexpr int WINDOW_WIDTH = 5;
static constexpr int WINDOW_HEIGHT = 5;
static constexpr int BLOCK_SIZE = 128;
static constexpr int LINES_PER_BLOCK = 16;
/* Centre symmetric census */
template <typename T>
__global__ void census_transform_kernel(
__global__ void cs_census_transform_kernel(
feature_type *dest,
const T *src,
int width,
......@@ -103,6 +104,39 @@ __global__ void census_transform_kernel(
}
}
template <typename T>
__global__ void census_transform_kernel(
feature_type* __restrict__ dest,
const T* __restrict__ src,
int width,
int height,
int pitch)
{
static constexpr int RADIUS_X = WINDOW_WIDTH/2;
static constexpr int RADIUS_Y = WINDOW_HEIGHT/2;
const int x = (blockIdx.x*blockDim.x + threadIdx.x);
const int y = blockIdx.y*blockDim.y + threadIdx.y;
feature_type res = 0;
if (x >= RADIUS_X && y >= RADIUS_Y && x < width-RADIUS_X && y < height-RADIUS_Y) {
const T center = src[y*pitch+x];
#pragma unroll
for (int wy = -RADIUS_Y; wy <= RADIUS_Y; ++wy) {
const int i = (y + wy) * pitch + x;
#pragma unroll
for (int wx = -RADIUS_X; wx <= RADIUS_X; ++wx) {
res = (res << 1) | (center < (src[i+wx]) ? 1 : 0);
}
}
}
dest[x+y*width] = res;
}
template <typename T>
void enqueue_census_transform(
feature_type *dest,
......@@ -112,13 +146,23 @@ void enqueue_census_transform(
int pitch,
cudaStream_t stream)
{
const int width_per_block = BLOCK_SIZE - WINDOW_WIDTH + 1;
const int height_per_block = LINES_PER_BLOCK;
const dim3 gdim(
(width + width_per_block - 1) / width_per_block,
(height + height_per_block - 1) / height_per_block);
const dim3 bdim(BLOCK_SIZE);
census_transform_kernel<<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch);
/* Disable the original center symmetric algorithm */
if (false) {
const int width_per_block = BLOCK_SIZE - WINDOW_WIDTH + 1;
const int height_per_block = LINES_PER_BLOCK;
const dim3 gdim(
(width + width_per_block - 1) / width_per_block,
(height + height_per_block - 1) / height_per_block);
const dim3 bdim(BLOCK_SIZE);
cs_census_transform_kernel<<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch);
} else {
static constexpr int THREADS_X = 16;
static constexpr int THREADS_Y = 16;
const dim3 gdim((width + THREADS_X - 1)/THREADS_X, (height + THREADS_Y - 1)/THREADS_Y);
const dim3 bdim(THREADS_X, THREADS_Y);
census_transform_kernel<<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch);
}
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment