From dc8c96375515b7aad2af739e55a565223ddd6c88 Mon Sep 17 00:00:00 2001 From: Nicolas Pope <nwpope@utu.fi> Date: Thu, 22 Oct 2020 15:53:34 +0300 Subject: [PATCH] Use 5x5 standard census for SGM --- lib/libsgm/src/census_transform.cu | 82 ++++++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 10 deletions(-) diff --git a/lib/libsgm/src/census_transform.cu b/lib/libsgm/src/census_transform.cu index 70f67bcff..2687bc1bf 100644 --- a/lib/libsgm/src/census_transform.cu +++ b/lib/libsgm/src/census_transform.cu @@ -21,14 +21,15 @@ namespace sgm { namespace { -static constexpr int WINDOW_WIDTH = 9; -static constexpr int WINDOW_HEIGHT = 7; +static constexpr int WINDOW_WIDTH = 5; +static constexpr int WINDOW_HEIGHT = 5; static constexpr int BLOCK_SIZE = 128; static constexpr int LINES_PER_BLOCK = 16; +/* Centre symmetric census */ template <typename T> -__global__ void census_transform_kernel( +__global__ void cs_census_transform_kernel( feature_type *dest, const T *src, int width, @@ -103,6 +104,57 @@ __global__ void census_transform_kernel( } } +template <typename T> +__global__ void census_transform_kernel( + feature_type* __restrict__ dest, + const T* __restrict__ src, + int width, + int height, + int pitch) +{ + static constexpr int RADIUS_X = WINDOW_WIDTH/2; + static constexpr int RADIUS_Y = WINDOW_HEIGHT/2; + + const int x = (blockIdx.x*blockDim.x + threadIdx.x); + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + dest[x+y*width] = 0; + + if (x >= RADIUS_X && y >= RADIUS_Y && x < width-RADIUS_X && y < height-RADIUS_Y) { + short center = src[y*pitch+x]; + uint8_t i = 0; // bit counter for *out + // possible BUG in operator(), gets called more than once per pixel; + // local variable for sub-bitstring to avoid data race (no read + // dependency to out; writes are identical) + feature_type res = 0; + + for (int wy = -RADIUS_Y; wy <= RADIUS_Y; wy++) { + for (int wx = -RADIUS_X; wx <= RADIUS_X; wx++) { + const int y_ = y + wy; + const int x_ = x + wx; + + if (y == 0 && x == 0) { + continue; + } + + // zero if first value, otherwise shift to left + res = (res << 1); + res |= (center < (src[y_*pitch+x_]) ? 1 : 0); + + // if all bits set, continue to next element + /*if (++i % 64 == 0) { + *out = res; + out++; + }*/ + } + } + //if ((i - 1)%64 != 0) { + // write remaining bits + dest[x+y*width] = res; + //} + } +} + template <typename T> void enqueue_census_transform( feature_type *dest, @@ -112,13 +164,23 @@ void enqueue_census_transform( int pitch, cudaStream_t stream) { - const int width_per_block = BLOCK_SIZE - WINDOW_WIDTH + 1; - const int height_per_block = LINES_PER_BLOCK; - const dim3 gdim( - (width + width_per_block - 1) / width_per_block, - (height + height_per_block - 1) / height_per_block); - const dim3 bdim(BLOCK_SIZE); - census_transform_kernel<<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch); + /* Disable the original center symmetric algorithm */ + if (false) { + const int width_per_block = BLOCK_SIZE - WINDOW_WIDTH + 1; + const int height_per_block = LINES_PER_BLOCK; + const dim3 gdim( + (width + width_per_block - 1) / width_per_block, + (height + height_per_block - 1) / height_per_block); + const dim3 bdim(BLOCK_SIZE); + cs_census_transform_kernel<<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch); + } else { + static constexpr int THREADS_X = 16; + static constexpr int THREADS_Y = 16; + + const dim3 gdim((width + THREADS_X - 1)/THREADS_X, (height + THREADS_Y - 1)/THREADS_Y); + const dim3 bdim(THREADS_X, THREADS_Y); + census_transform_kernel<<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch); + } } } -- GitLab