From a7da228c47d5bf692c292d3b21f15e414d7dda29 Mon Sep 17 00:00:00 2001
From: Nicolas Pope <nwpope@utu.fi>
Date: Thu, 22 Oct 2020 16:37:50 +0300
Subject: [PATCH] Tune CT a little

---
 lib/libsgm/src/census_transform.cu | 40 ++++++++----------------------
 1 file changed, 11 insertions(+), 29 deletions(-)

diff --git a/lib/libsgm/src/census_transform.cu b/lib/libsgm/src/census_transform.cu
index 2687bc1bf..df3888618 100644
--- a/lib/libsgm/src/census_transform.cu
+++ b/lib/libsgm/src/census_transform.cu
@@ -116,43 +116,25 @@ __global__ void census_transform_kernel(
 	static constexpr int RADIUS_Y = WINDOW_HEIGHT/2;
 
 	const int x = (blockIdx.x*blockDim.x + threadIdx.x);
-	const int y = blockIdx.y*blockDim.y + threadIdx.y;	
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
 
-	dest[x+y*width] = 0;
+	feature_type res = 0;
 
 	if (x >= RADIUS_X && y >= RADIUS_Y && x < width-RADIUS_X && y < height-RADIUS_Y) {
-		short center = src[y*pitch+x];
-		uint8_t i = 0; // bit counter for *out
-		// possible BUG in operator(), gets called more than once per pixel;
-		// local variable for sub-bitstring to avoid data race (no read
-		// dependency to out; writes are identical)
-		feature_type res = 0;
-
-		for (int wy = -RADIUS_Y; wy <= RADIUS_Y; wy++) {
-			for (int wx = -RADIUS_X; wx <= RADIUS_X; wx++) {
-				const int y_ = y + wy;
-				const int x_ = x + wx;
-
-				if (y == 0 && x == 0) {
-					continue;
-				}
+		const T center = src[y*pitch+x];
 
-				// zero if first value, otherwise shift to left
-				res = (res << 1);
-				res |= (center < (src[y_*pitch+x_]) ? 1 : 0);
+		#pragma unroll
+		for (int wy = -RADIUS_Y; wy <= RADIUS_Y; ++wy) {
+			const int i = (y + wy) * pitch + x;
 
-				// if all bits set, continue to next element
-				/*if (++i % 64 == 0) {
-					*out = res;
-					out++;
-				}*/
+			#pragma unroll
+			for (int wx = -RADIUS_X; wx <= RADIUS_X; ++wx) {
+				res = (res << 1) | (center < (src[i+wx]) ? 1 : 0);
 			}
 		}
-		//if ((i - 1)%64 != 0) {
-			// write remaining bits
-			dest[x+y*width] = res;
-		//}
 	}
+
+	dest[x+y*width] = res;
 }
 
 template <typename T>
-- 
GitLab