Skip to content
Snippets Groups Projects
Commit dc8c9637 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Use 5x5 standard census for SGM

parent d585ac50
No related branches found
No related tags found
1 merge request!349Use 5x5 proper census for SGM
Pipeline #32700 passed
This commit is part of merge request !349. Comments created here will be created in the context of that merge request.
......@@ -21,14 +21,15 @@ namespace sgm {
namespace {
static constexpr int WINDOW_WIDTH = 9;
static constexpr int WINDOW_HEIGHT = 7;
static constexpr int WINDOW_WIDTH = 5;
static constexpr int WINDOW_HEIGHT = 5;
static constexpr int BLOCK_SIZE = 128;
static constexpr int LINES_PER_BLOCK = 16;
/* Centre symmetric census */
template <typename T>
__global__ void census_transform_kernel(
__global__ void cs_census_transform_kernel(
feature_type *dest,
const T *src,
int width,
......@@ -103,6 +104,57 @@ __global__ void census_transform_kernel(
}
}
template <typename T>
__global__ void census_transform_kernel(
feature_type* __restrict__ dest,
const T* __restrict__ src,
int width,
int height,
int pitch)
{
static constexpr int RADIUS_X = WINDOW_WIDTH/2;
static constexpr int RADIUS_Y = WINDOW_HEIGHT/2;
const int x = (blockIdx.x*blockDim.x + threadIdx.x);
const int y = blockIdx.y*blockDim.y + threadIdx.y;
dest[x+y*width] = 0;
if (x >= RADIUS_X && y >= RADIUS_Y && x < width-RADIUS_X && y < height-RADIUS_Y) {
short center = src[y*pitch+x];
uint8_t i = 0; // bit counter for *out
// possible BUG in operator(), gets called more than once per pixel;
// local variable for sub-bitstring to avoid data race (no read
// dependency to out; writes are identical)
feature_type res = 0;
for (int wy = -RADIUS_Y; wy <= RADIUS_Y; wy++) {
for (int wx = -RADIUS_X; wx <= RADIUS_X; wx++) {
const int y_ = y + wy;
const int x_ = x + wx;
if (y == 0 && x == 0) {
continue;
}
// zero if first value, otherwise shift to left
res = (res << 1);
res |= (center < (src[y_*pitch+x_]) ? 1 : 0);
// if all bits set, continue to next element
/*if (++i % 64 == 0) {
*out = res;
out++;
}*/
}
}
//if ((i - 1)%64 != 0) {
// write remaining bits
dest[x+y*width] = res;
//}
}
}
template <typename T>
void enqueue_census_transform(
feature_type *dest,
......@@ -112,13 +164,23 @@ void enqueue_census_transform(
int pitch,
cudaStream_t stream)
{
const int width_per_block = BLOCK_SIZE - WINDOW_WIDTH + 1;
const int height_per_block = LINES_PER_BLOCK;
const dim3 gdim(
(width + width_per_block - 1) / width_per_block,
(height + height_per_block - 1) / height_per_block);
const dim3 bdim(BLOCK_SIZE);
census_transform_kernel<<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch);
/* Disable the original center symmetric algorithm */
if (false) {
const int width_per_block = BLOCK_SIZE - WINDOW_WIDTH + 1;
const int height_per_block = LINES_PER_BLOCK;
const dim3 gdim(
(width + width_per_block - 1) / width_per_block,
(height + height_per_block - 1) / height_per_block);
const dim3 bdim(BLOCK_SIZE);
cs_census_transform_kernel<<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch);
} else {
static constexpr int THREADS_X = 16;
static constexpr int THREADS_Y = 16;
const dim3 gdim((width + THREADS_X - 1)/THREADS_X, (height + THREADS_Y - 1)/THREADS_Y);
const dim3 bdim(THREADS_X, THREADS_Y);
census_transform_kernel<<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch);
}
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment