diff --git a/cv-node/src/algorithms/rtcensus.cu b/cv-node/src/algorithms/rtcensus.cu index 71d7e5378d8da4291fd1d0cfc64afe34b57e61ce..ff0e489a5ddcb32e5578da8f79032cac0e8191e7 100644 --- a/cv-node/src/algorithms/rtcensus.cu +++ b/cv-node/src/algorithms/rtcensus.cu @@ -16,10 +16,10 @@ using namespace cv::cuda; using namespace cv; -#define BLOCK_W 128 +#define BLOCK_W 60 #define RADIUS 7 #define RADIUS2 2 -#define ROWSperTHREAD 20 +#define ROWSperTHREAD 2 #define XHI(P1,P2) ((P1 <= P2) ? 0 : 1) @@ -80,28 +80,38 @@ __global__ void census_kernel(PtrStepSzb l, PtrStepSzb r, uint64_t *census) { } __global__ void disp_kernel(float *disp_l, float *disp_r, size_t width, size_t height, uint64_t *census, size_t ds) { - //extern __shared__ uint64_t census[]; + //extern __shared__ uint64_t cache[]; size_t u = (blockIdx.x * BLOCK_W) + threadIdx.x + RADIUS2; size_t v_start = (blockIdx.y * ROWSperTHREAD) + RADIUS2; size_t v_end = v_start + ROWSperTHREAD; + + // Prepare the cache load + //const int cache_thread_width = (BLOCK_W+ds / BLOCK_W + RADIUS2*2 + 1)*2; + //uint64_t *cache_ptr = cache + (threadIdx.x * cache_thread_width); if (v_end >= height) v_end = height; //if (u >= width-ds) return; for (size_t v=v_start; v<v_end; v++) { - //for (size_t u=7; u<width-7; u++) { - //const size_t eu = (sign>0) ? w-2-ds : w-2; - - //for (size_t v=7; v<height-7; v++) { - //for (size_t u=7; u<width-7; u++) { - //const size_t ix = v*w*ds+u*ds; + /*const int cache_start = v*width*2 + cache_thread_width*blockIdx.x; + for (int i=0; i<cache_thread_width; i+=2) { + cache_ptr[i] = census[cache_start+i]; + cache_ptr[i+1] = census[cache_start+i+1]; + } - uint16_t last_ham[2] = {65535,65535}; - uint16_t min_disp[2] = {65535,65535}; - uint16_t min_before[2] = {0,0}; - uint16_t min_after[2] = {0,0}; - size_t dix[2] = {0,0}; + __syncthreads();*/ + + uint16_t last_ham1 = 65535; + uint16_t last_ham2 = 65535; + uint16_t min_disp1 = 65535; + uint16_t min_disp2 = 65535; + uint16_t min_before1 = 0; + uint16_t min_before2 = 0; + uint16_t min_after1 = 0; + uint16_t min_after2 = 0; + int dix1 = 0; + int dix2 = 0; for (size_t d=0; d<ds; d++) { uint16_t hamming1 = 0; @@ -109,18 +119,18 @@ __global__ void disp_kernel(float *disp_l, float *disp_r, size_t width, size_t h //if (u+2+ds >= width) break; - for (int n=-2; n<=2; n++) { - const auto u_ = u + n; + for (int m=-2; m<=2; m++) { + const auto v_ = (v + m)*width; + for (int n=-2; n<=2; n++) { + const auto u_ = u + n; - for (int m=-2; m<=2; m++) { - const auto v_ = (v + m)*width; + + - // Correct for disp_R + auto l2 = census[(u_+v_)*2]; auto l1 = census[(u_+v_)*2+1]; - auto r1 = census[(v_+(u_+d))*2]; - // Correct for disp_L - auto l2 = census[(u_+v_)*2]; + auto r1 = census[(v_+(u_+d))*2]; auto r2 = census[(v_+(u_-d))*2+1]; hamming1 += __popcll(r1^l1); @@ -128,31 +138,30 @@ __global__ void disp_kernel(float *disp_l, float *disp_r, size_t width, size_t h } } - if (hamming1 < min_disp[0]) { - min_before[0] = last_ham[0]; - min_disp[0] = hamming1; - dix[0] = d; + if (hamming1 < min_disp1) { + min_before1 = last_ham1; + min_disp1 = hamming1; + dix1 = d; } - if (dix[0] == d) min_after[0] = hamming1; - last_ham[0] = hamming1; + if (dix1 == d) min_after1 = hamming1; + last_ham1 = hamming1; - if (hamming2 < min_disp[1]) { - min_before[1] = last_ham[1]; - min_disp[1] = hamming2; - dix[1] = d; + if (hamming2 < min_disp2) { + min_before2 = last_ham2; + min_disp2 = hamming2; + dix2 = d; } - if (dix[1] == d) min_after[1] = hamming2; - last_ham[1] = hamming2; + if (dix2 == d) min_after2 = hamming2; + last_ham2 = hamming2; } - float d1 = (dix[0] == 0 || dix[0] == ds-1) ? (float)dix[0] : fit_parabola(dix[0], min_disp[0], min_before[0], min_after[0]); - float d2 = (dix[1] == 0 || dix[1] == ds-1) ? (float)dix[1] : fit_parabola(dix[1], min_disp[1], min_before[1], min_after[1]); + //float d1 = (dix1 == 0 || dix1 == ds-1) ? (float)dix1 : fit_parabola(dix1, min_disp1, min_before1, min_after1); + //float d2 = (dix2 == 0 || dix2 == ds-1) ? (float)dix2 : fit_parabola(dix2, min_disp2, min_before2, min_after2); + + float d1 = fit_parabola(dix1, min_disp1, min_before1, min_after1); + float d2 = fit_parabola(dix2, min_disp2, min_before2, min_after2); - //if (abs(d1-d2) <= 1.0) disp(v,u) = abs((d1+d2)/2); - //else disp(v,u) = 0.0f; - - //disp(v,u) = d1; disp_l[v*width+u] = d2; disp_r[v*width+u] = d1; @@ -185,17 +194,6 @@ __global__ void consistency_kernel(float *d_sub_l, float *d_sub_r, PtrStepSz<flo } -/*__global__ void test_kernel(const PtrStepSzb l, const PtrStepSzb r, PtrStepSz<float> disp) -{ - int x = threadIdx.x + blockIdx.x * blockDim.x; - int y = threadIdx.y + blockIdx.y * blockDim.y; - if (x < l.cols && y < l.rows) { - const unsigned char lv = l(y, x); - const unsigned char rv = r(y, x); - disp(y, x) = (float)lv - (float)rv; //make_uchar1(v.z, v.y, v.x); - } -}*/ - void rtcensus_call(const PtrStepSzb &l, const PtrStepSzb &r, const PtrStepSz<float> &disp, size_t num_disp, const int &stream) { dim3 grid(1,1,1); dim3 threads(BLOCK_W, 1, 1); diff --git a/cv-node/src/main.cpp b/cv-node/src/main.cpp index f70b92fb60e6ce360981adb2577ea477f71ad3e8..b35b6e31ec17b07d10cb641c227be798b848cac3 100644 --- a/cv-node/src/main.cpp +++ b/cv-node/src/main.cpp @@ -124,6 +124,7 @@ int main(int argc, char **argv) { Mat l, r, disparity32F, depth32F, lbw, rbw; cv::viz::Viz3d myWindow("FTL"); + myWindow.setBackgroundColor(cv::viz::Color::white()); float base_line = (float)config["camera"]["base_line"]; float focal = (float)(config["camera"]["focal_length"]) / (float)(config["camera"]["sensor_width"]); @@ -150,9 +151,16 @@ int main(int argc, char **argv) { disparity32F.convertTo(disparity32F, CV_32F); disparity32F += 10.0f; - //Rect rect((int)config["disparity"]["maximum"],7,disparity32F.cols-(int)config["disparity"]["maximum"],disparity32F.rows-14); - //disparity32F = disparity32F(rect); - //l = l(rect); + + // Clip the left edge + Rect rect((int)config["disparity"]["maximum"],7,disparity32F.cols-(int)config["disparity"]["maximum"],disparity32F.rows-14); + disparity32F = disparity32F(rect); + l = l(rect); + + // HACK to make bad pixels invisible. + normalize(disparity32F, depth32F, 0, 255, NORM_MINMAX, CV_8U); + r = Mat(l.size(), CV_8UC3, Vec3i(255,255,255)); + l.copyTo(r,depth32F); // TODO Send RGB+D data somewhere @@ -173,7 +181,7 @@ int main(int argc, char **argv) { //cv::imshow("Points",XYZ); - cv::viz::WCloud cloud_widget = cv::viz::WCloud( XYZ, l ); + cv::viz::WCloud cloud_widget = cv::viz::WCloud( XYZ, r ); cloud_widget.setRenderingProperty( cv::viz::POINT_SIZE, 2 ); /* Rotation using rodrigues */ @@ -196,6 +204,7 @@ int main(int argc, char **argv) { if (config["display"]["depth"]) { depth32F = (focal * (float)l.cols * base_line) / disparity32F; + normalize(depth32F, depth32F, 0, 255, NORM_MINMAX, CV_8U); cv::imshow("Depth", depth32F); if(cv::waitKey(10) == 27){ //exit if ESC is pressed