From a7da228c47d5bf692c292d3b21f15e414d7dda29 Mon Sep 17 00:00:00 2001 From: Nicolas Pope <nwpope@utu.fi> Date: Thu, 22 Oct 2020 16:37:50 +0300 Subject: [PATCH] Tune CT a little --- lib/libsgm/src/census_transform.cu | 40 ++++++++---------------------- 1 file changed, 11 insertions(+), 29 deletions(-) diff --git a/lib/libsgm/src/census_transform.cu b/lib/libsgm/src/census_transform.cu index 2687bc1bf..df3888618 100644 --- a/lib/libsgm/src/census_transform.cu +++ b/lib/libsgm/src/census_transform.cu @@ -116,43 +116,25 @@ __global__ void census_transform_kernel( static constexpr int RADIUS_Y = WINDOW_HEIGHT/2; const int x = (blockIdx.x*blockDim.x + threadIdx.x); - const int y = blockIdx.y*blockDim.y + threadIdx.y; + const int y = blockIdx.y*blockDim.y + threadIdx.y; - dest[x+y*width] = 0; + feature_type res = 0; if (x >= RADIUS_X && y >= RADIUS_Y && x < width-RADIUS_X && y < height-RADIUS_Y) { - short center = src[y*pitch+x]; - uint8_t i = 0; // bit counter for *out - // possible BUG in operator(), gets called more than once per pixel; - // local variable for sub-bitstring to avoid data race (no read - // dependency to out; writes are identical) - feature_type res = 0; - - for (int wy = -RADIUS_Y; wy <= RADIUS_Y; wy++) { - for (int wx = -RADIUS_X; wx <= RADIUS_X; wx++) { - const int y_ = y + wy; - const int x_ = x + wx; - - if (y == 0 && x == 0) { - continue; - } + const T center = src[y*pitch+x]; - // zero if first value, otherwise shift to left - res = (res << 1); - res |= (center < (src[y_*pitch+x_]) ? 1 : 0); + #pragma unroll + for (int wy = -RADIUS_Y; wy <= RADIUS_Y; ++wy) { + const int i = (y + wy) * pitch + x; - // if all bits set, continue to next element - /*if (++i % 64 == 0) { - *out = res; - out++; - }*/ + #pragma unroll + for (int wx = -RADIUS_X; wx <= RADIUS_X; ++wx) { + res = (res << 1) | (center < (src[i+wx]) ? 1 : 0); } } - //if ((i - 1)%64 != 0) { - // write remaining bits - dest[x+y*width] = res; - //} } + + dest[x+y*width] = res; } template <typename T> -- GitLab