diff --git a/cv-node/CMakeLists.txt b/cv-node/CMakeLists.txt index 2b2d24cd36476860793585e05deb82d42a2ba884..dab94af564545322531eb4935bb736220e454a19 100644 --- a/cv-node/CMakeLists.txt +++ b/cv-node/CMakeLists.txt @@ -31,6 +31,7 @@ set(CMAKE_CUDA_FLAGS "-Xcompiler -Wall") set(CMAKE_CUDA_FLAGS_DEBUG "-g -DDEBUG -D_DEBUG -Wall") set(CMAKE_CUDA_FLAGS_RELEASE "") add_definitions(-DHAVE_CUDA) +include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) endif (CUDA_FOUND) # Need to include staged files and libs diff --git a/cv-node/include/ftl/algorithms/rtcensus.hpp b/cv-node/include/ftl/algorithms/rtcensus.hpp index 2981b8d2e1a6aac2456142acfd2d0fd82eebacfd..aed1b122b86a69673bec711855c399d2d4594ebb 100644 --- a/cv-node/include/ftl/algorithms/rtcensus.hpp +++ b/cv-node/include/ftl/algorithms/rtcensus.hpp @@ -27,11 +27,13 @@ class RTCensus : public ftl::Disparity { float gamma_; float tau_; bool use_cuda_; + bool alternate_; #if defined HAVE_CUDA cv::cuda::GpuMat disp_; cv::cuda::GpuMat filtered_; cv::cuda::GpuMat left_; + cv::cuda::GpuMat left2_; cv::cuda::GpuMat right_; #endif diff --git a/cv-node/src/algorithms/rtcensus.cpp b/cv-node/src/algorithms/rtcensus.cpp index 47f1acc96039996ab2e1e78925bdc28af475bb0a..a2c323ea32239fc93157e05f255e4ea33a4c4ca7 100644 --- a/cv-node/src/algorithms/rtcensus.cpp +++ b/cv-node/src/algorithms/rtcensus.cpp @@ -183,7 +183,8 @@ RTCensus::RTCensus(nlohmann::json &config) : Disparity(config), gamma_(0.0f), tau_(0.0f), - use_cuda_(config.value("use_cuda",true)) {} + use_cuda_(config.value("use_cuda",true)), + alternate_(false) {} /* * Choose the implementation and perform disparity calculation. @@ -231,25 +232,31 @@ void RTCensus::computeCPU(const cv::Mat &l, const cv::Mat &r, cv::Mat &disp) { using namespace cv::cuda; using namespace cv; +#include <vector_types.h> + namespace ftl { namespace gpu { -void rtcensus_call(const PtrStepSzb &l, const PtrStepSzb &r, const PtrStepSz<float> &disp, size_t num_disp, const int &s=0); +void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const PtrStepSz<float> &disp, size_t num_disp, const int &s=0); }} void RTCensus::computeCUDA(const cv::Mat &l, const cv::Mat &r, cv::Mat &disp) { // Initialise gpu memory here because we need image size if (disp_.empty()) disp_ = cuda::GpuMat(l.size(), CV_32FC1); - if (left_.empty()) left_ = cuda::GpuMat(l.size(), CV_8U); - if (right_.empty()) right_ = cuda::GpuMat(l.size(), CV_8U); + if (left_.empty()) left_ = cuda::GpuMat(l.size(), CV_8UC4); + if (left2_.empty()) left2_ = cuda::GpuMat(l.size(), CV_8UC4); + if (right_.empty()) right_ = cuda::GpuMat(l.size(), CV_8UC4); // Send images to GPU - left_.upload(l); + if (alternate_) left_.upload(l); + else left2_.upload(l); right_.upload(r); auto start = std::chrono::high_resolution_clock::now(); - ftl::gpu::rtcensus_call(left_, right_, disp_, max_disp_); + ftl::gpu::rtcensus_call((alternate_)?left_:left2_, right_, disp_, max_disp_); std::chrono::duration<double> elapsed = std::chrono::high_resolution_clock::now() - start; LOG(INFO) << "CUDA census in " << elapsed.count() << "s"; + alternate_ = !alternate_; + // Read disparity from GPU disp_.download(disp); } diff --git a/cv-node/src/algorithms/rtcensus.cu b/cv-node/src/algorithms/rtcensus.cu index d008e06e61d4a41a59e6223ed1a43a6fa8c08c9c..583a823876bf089d832a2263921011ec339b56d1 100644 --- a/cv-node/src/algorithms/rtcensus.cu +++ b/cv-node/src/algorithms/rtcensus.cu @@ -75,13 +75,13 @@ cudaTextureObject_t makeTexture2D(void *ptr, int pitch, int width, int height) { __device__ uint64_t sparse_census(cudaTextureObject_t tex, int u, int v) { uint64_t r = 0; - unsigned char t = tex2D<unsigned char>(tex, u,v); + unsigned char t = tex2D<uchar4>(tex, u,v).z; for (int m=-7; m<=7; m+=2) { //auto start_ix = (v + m)*w + u; for (int n=-7; n<=7; n+=2) { r <<= 1; - r |= XHI(t, tex2D<unsigned char>(tex, u+n, v+m)); + r |= XHI(t, tex2D<uchar4>(tex, u+n, v+m).z); } } @@ -141,7 +141,7 @@ __global__ void disp_kernel(float *disp_l, float *disp_r, size_t ds) { //extern __shared__ uint64_t cache[]; - const int gamma = 100; + const int gamma = 20; int u = (blockIdx.x * BLOCK_W) + threadIdx.x + RADIUS2; int v_start = (blockIdx.y * ROWSperTHREAD) + RADIUS2; @@ -275,7 +275,7 @@ __global__ void consistency_kernel(cudaTextureObject_t d_sub_l, //disp(v,u) = a; //abs((a+b)/2); - if (abs(a-b) <= 1.0) disp[v*pitch+u] = abs((a+b)/2); // was 1.0 + if (abs(a-b) <= 1.0) disp[v*pitch+u] = (u < 300) ? b : a;//abs((a+b)/2); // was 1.0 else disp[v*pitch+u] = NAN; //} } @@ -284,8 +284,8 @@ __global__ void consistency_kernel(cudaTextureObject_t d_sub_l, #define FILTER_WINDOW 11 #define FILTER_WINDOW_R 5 -#define FILTER_SIM_THRESH 20 -#define FILTER_DISP_THRESH 10.0f +#define FILTER_SIM_THRESH 5 +#define FILTER_DISP_THRESH 50.0f __global__ void filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d, cudaTextureObject_t prevD, @@ -293,44 +293,51 @@ __global__ void filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d, size_t u = (blockIdx.x * BLOCK_W) + threadIdx.x + RADIUS; size_t v = blockIdx.y + RADIUS; - if (u+num_disp > f.cols) { + /*if (u+num_disp > f.cols) { f(v,u) = NAN; return; - } + }*/ float disp = tex2D<float>(d,u,v); - /*if (!isnan(disp)) { - f(v,u) = disp; - return; - }*/ - //if (isnan(disp)) disp = 100000.0f; //tex2D<float>(prev, u, v); cudaTextureObject_t nTex = (prevT) ? prevT : t; cudaTextureObject_t nDisp = (prevD) ? prevD : d; float pdisp = tex2D<float>(nDisp,u,v); if (isnan(pdisp)) pdisp = disp; - //if (isnan(disp)) disp = pdisp; - int pixel = tex2D<unsigned char>(t, u, v); - int ppixel = tex2D<unsigned char>(nTex, u, v); + if (isnan(disp)) disp = pdisp; + int pixel = tex2D<uchar4>(t, u, v).x; + int ppixel = tex2D<uchar4>(nTex, u, v).x; float est = 0.0f; //(isnan(disp)) ? tex2D<float>(prev, u, v) : disp; int nn = 0; //(isnan(disp)) ? 0 : 1; int neigh_sq = 0; int neigh_sum = 0; + + /*if (abs(ppixel-pixel) <= FILTER_SIM_THRESH) { + f(v,u) = disp; + } else { + f(v,u) = NAN; + } + return;*/ + + if (!isnan(pdisp) && isnan(disp) && abs(pixel-ppixel) <= FILTER_SIM_THRESH) { + disp = pdisp; + } for (int m=-FILTER_WINDOW_R; m<=FILTER_WINDOW_R; m++) { for (int n=-FILTER_WINDOW_R; n<=FILTER_WINDOW_R; n++) { - int neigh = tex2D<unsigned char>(t, u+n, v+m); + int neigh = tex2D<uchar4>(t, u+n, v+m).x; neigh_sq += neigh*neigh; neigh_sum += neigh; float ndisp = tex2D<float>(d,u+n,v+m); - if (isnan(ndisp)) { + /*if (isnan(ndisp)) { ndisp = tex2D<float>(nDisp,u+n,v+m); - neigh = tex2D<unsigned char>(nTex, u+n, v+m); - } + neigh = tex2D<uchar4>(nTex, u+n, v+m).x; + }*/ - if (m+n == 0) continue; + //if (isnan(tex2D<float>(nDisp,u+n,v+m))) continue; + if (m == 0 && n == 0) continue; if (ndisp > 1.0f && !isnan(ndisp) && (abs(neigh-pixel) <= FILTER_SIM_THRESH)) { // && (isnan(disp) || abs(ndisp-disp) < FILTER_DISP_THRESH)) { est += ndisp; @@ -341,21 +348,25 @@ __global__ void filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d, // Texture map filtering int tm = (neigh_sq / (FILTER_WINDOW*FILTER_WINDOW)) - ((neigh_sum*neigh_sum) / (FILTER_WINDOW*FILTER_WINDOW)); - if (tm >= -9000000) { + //if (tm >= -9000000) { // ) { - if (!isnan(disp) && (abs(ppixel-pixel) > FILTER_SIM_THRESH || abs(pdisp - disp) <= FILTER_DISP_THRESH)) { + /*if (!isnan(disp) && disp > 1.0f) { // && (abs(ppixel-pixel) > FILTER_SIM_THRESH || abs(pdisp - disp) <= FILTER_DISP_THRESH)) { f(v,u) = disp; - } else if (nn > 2) f(v,u) = (nn==0) ? NAN : est / nn; - else f(v,u) = NAN; - } else { - f(v,u) = NAN; - } + } else*/ + if (nn > 4) { + f(v,u) = (est+disp) / (nn+1); + } else if (!isnan(pdisp) && abs(pixel-ppixel) <= FILTER_SIM_THRESH) { + f(v,u) = pdisp; + } else f(v,u) = NAN; + //} else { + // f(v,u) = NAN; + //} } cudaTextureObject_t prevDisp = 0; cudaTextureObject_t prevImage = 0; -void rtcensus_call(const PtrStepSzb &l, const PtrStepSzb &r, const PtrStepSz<float> &disp, size_t num_disp, const int &stream) { +void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const PtrStepSz<float> &disp, size_t num_disp, const int &stream) { dim3 grid(1,1,1); dim3 threads(BLOCK_W, 1, 1); @@ -389,8 +400,8 @@ void rtcensus_call(const PtrStepSzb &l, const PtrStepSzb &r, const PtrStepSz<flo memset(&texDesc, 0, sizeof(texDesc)); texDesc.readMode = cudaReadModeElementType; - cudaTextureObject_t texLeft = makeTexture2D<unsigned char>(l); - cudaTextureObject_t texRight = makeTexture2D<unsigned char>(r); + cudaTextureObject_t texLeft = makeTexture2D<uchar4>(l); + cudaTextureObject_t texRight = makeTexture2D<uchar4>(r); //size_t smem_size = (2 * l.cols * l.rows) * sizeof(uint64_t); diff --git a/cv-node/src/main.cpp b/cv-node/src/main.cpp index ad055b968e111d7e352b88f153867f2d3e85a5ee..157a1d65be95af2f3f549063c25381b0ea26c7b2 100644 --- a/cv-node/src/main.cpp +++ b/cv-node/src/main.cpp @@ -134,19 +134,24 @@ static void run(const string &file) { sync->get(RIGHT,r); // Black and white - cvtColor(l, lbw, COLOR_BGR2GRAY); - cvtColor(r, rbw, COLOR_BGR2GRAY); + cvtColor(l, lbw, COLOR_BGR2HSV); + cvtColor(r, rbw, COLOR_BGR2HSV); + int from_to[] = {0,0,1,1,2,2,-1,3}; + Mat hsval(lbw.size(), CV_8UC4); + Mat hsvar(lbw.size(), CV_8UC4); + mixChannels(&lbw, 1, &hsval, 1, from_to, 4); + mixChannels(&rbw, 1, &hsvar, 1, from_to, 4); - disparity->compute(lbw,rbw,disparity32F); + disparity->compute(hsval,hsvar,disparity32F); //LOG(INFO) << "Disparity complete "; disparity32F.convertTo(disparity32F, CV_32F); - disparity32F += 50.0f; // TODO REMOVE + //disparity32F += 10.0f; // TODO REMOVE // Clip the left edge Rect rect((int)config["disparity"]["maximum"],7,disparity32F.cols-(int)config["disparity"]["maximum"],disparity32F.rows-14); - disparity32F = disparity32F(rect); - l = l(rect); + //disparity32F = disparity32F(rect); + //l = l(rect); // HACK to make bad pixels invisible. //normalize(disparity32F, depth32F, 0, 255, NORM_MINMAX, CV_8U); @@ -202,6 +207,7 @@ static void run(const string &file) { break; } } else if (config["display"]["disparity"]) { + disparity32F = disparity32F / (float)config["disparity"]["maximum"]; //normalize(disparity32F, disparity32F, 0, 255, NORM_MINMAX, CV_8U); cv::imshow("Disparity", disparity32F); if(cv::waitKey(10) == 27){