diff --git a/cv-node/CMakeLists.txt b/cv-node/CMakeLists.txt index 55a3dc0cc276c4d21dff2c859e5405c08552bcdb..5d27aa10a28cb39a6df1c7c0bb7df46358ce5102 100644 --- a/cv-node/CMakeLists.txt +++ b/cv-node/CMakeLists.txt @@ -15,16 +15,16 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/") if (WIN32) include_directories(${glog_DIR}) endif (WIN32) +find_package( CUDA ) find_package( OpenCV REQUIRED ) find_package( Threads REQUIRED ) find_package( LibSGM ) -#find_package( CUDA ) #find_package(PkgConfig) #pkg_check_modules(GTKMM gtkmm-3.0) message("Cuda at ${CMAKE_CUDA_COMPILER}") -check_language(CUDA) +#check_language(CUDA) if (CUDA_FOUND) enable_language(CUDA) set(CMAKE_CUDA_FLAGS "-Xcompiler -Wall") diff --git a/cv-node/include/ftl/cuda_common.hpp b/cv-node/include/ftl/cuda_common.hpp index f57c4c0ba081e35e82d58774263a9569019d6a9f..1adeed475a5b2df72493f602c7ad4128790c73be 100644 --- a/cv-node/include/ftl/cuda_common.hpp +++ b/cv-node/include/ftl/cuda_common.hpp @@ -13,6 +13,15 @@ namespace ftl { namespace cuda { +/*template <typename T> +class HisteresisTexture { + public: + HisteresisTexture(); + ~HisteresisTexture(); + + HisteresisTexture<T> &operator=(TextureObject<T> &t); +};*/ + template <typename T> class TextureObject { public: diff --git a/cv-node/src/algorithms/fixstars_sgm.cpp b/cv-node/src/algorithms/fixstars_sgm.cpp index f36c7a16e0f46fae35ee6cd91a9ab6c3bfc91527..6300162e2fb691110bf00e5f63a9f55136b72ff0 100644 --- a/cv-node/src/algorithms/fixstars_sgm.cpp +++ b/cv-node/src/algorithms/fixstars_sgm.cpp @@ -1,4 +1,5 @@ #include <ftl/algorithms/fixstars_sgm.hpp> +#include <glog/logging.h> using ftl::algorithms::FixstarsSGM; using namespace cv; @@ -18,18 +19,22 @@ void FixstarsSGM::compute(const cv::Mat &l, const cv::Mat &r, cv::Mat &disp) { cv::cvtColor(r, rbw, cv::COLOR_BGR2GRAY); if (!ssgm_) { - ssgm_ = new sgm::StereoSGM(l.cols, l.rows, max_disp_, 8, 8, sgm::EXECUTE_INOUT_HOST2HOST); + ssgm_ = new sgm::StereoSGM(l.cols, l.rows, max_disp_, 8, 16, sgm::EXECUTE_INOUT_HOST2HOST, + sgm::StereoSGM::Parameters(10,120,0.95f,true)); } //disp = Mat(); //if (disp.cols != l.cols || disp.rows != l.rows) { - disp = Mat(cv::Size(l.cols, l.rows), CV_8UC1); + disp = Mat(cv::Size(l.cols, l.rows), CV_16UC1); //} + auto start = std::chrono::high_resolution_clock::now(); ssgm_->execute(lbw.data, rbw.data, disp.data); + std::chrono::duration<double> elapsed = std::chrono::high_resolution_clock::now() - start; + LOG(INFO) << "CUDA sgm in " << elapsed.count() << "s"; - disp.convertTo(disp, CV_32F, 1.0f); + disp.convertTo(disp, CV_32F, 1.0f/16.0f); } diff --git a/cv-node/src/algorithms/nick1.cu b/cv-node/src/algorithms/nick1.cu index 44de52257b61d4be752265c8d92d6ed65fe4c458..034ae81afd20efd16e49d2b8418c5539a211be36 100644 --- a/cv-node/src/algorithms/nick1.cu +++ b/cv-node/src/algorithms/nick1.cu @@ -191,7 +191,7 @@ __global__ void edge_invar1_kernel(cudaTextureObject_t t, cudaTextureObject_t p, if (g > 1.0f) { float2 n = tex2D<float2>(p, u, v); - float avg = (n.x > g && abs(n.y-a) < 0.2) ? (g+n.x) / 2.0f : g; + float avg = (n.x > g && abs(n.y-a) < 0.1f) ? n.x : g; o(u,v) = make_float2(avg,abs(a)); } else { o(u,v) = make_float2(NAN,NAN); @@ -207,7 +207,7 @@ __device__ void edge_follow(float &sum, int &count, cudaTextureObject_t i1, int float sumchange = 0.0f; float2 pixel_i1 = tex2D<float2>(i1,u,v); - for (int j=0; j<50; j++) { + for (int j=0; j<5; j++) { // Vertical edge = 0, so to follow it don't move in x int dx = ((pixel_i1.y >= 0.785 && pixel_i1.y <= 2.356) ) ? 0 : 1; int dy = (dx == 1) ? 0 : 1; @@ -226,11 +226,11 @@ __device__ void edge_follow(float &sum, int &count, cudaTextureObject_t i1, int float diff = 10000.0f; int nu, nv; - for (int i=-2; i<=2; i++) { + for (int i=-5; i<=5; i++) { float2 pix = tex2D<float2>(i1,u2+dx*i+dy*sign, v2+dy*i+dx*sign); if (isnan(pix.x)) continue; - float d = abs(pix.x-pixel_i1.x)*abs(pix.y-pixel_i1.y); + float d = abs(pix.x-pixel_i1.x); //*abs(pix.y-pixel_i1.y); if (d < diff) { nu = u2+dx*i+dy*sign; nv = v2+dy*i+dx*sign; @@ -244,11 +244,12 @@ __device__ void edge_follow(float &sum, int &count, cudaTextureObject_t i1, int // Corner or edge change. //if (change > 0.785f) break; - if (change > 1.0f) break; + if (change > 2.0f) break; + + sumchange += (nu-u) / (nv-v); u2 = nu; v2 = nv; - sumchange += change; pixel_i1 = next_pix; n++; } else { @@ -290,10 +291,11 @@ __global__ void edge_invar2_kernel(cudaTextureObject_t i1, ftl::cuda::TextureObj // Output curvature of edge - if (count_a+count_b > 10) { + if (count_a+count_b > 3) { float curvature = ((sum_a+sum_b) / (float)(count_a+count_b)); - //o(u,v) = curvature * 300.0f + 50.0f; - o(u,v) = (count_a+count_b) * 3.0f; + o(u,v) = curvature * 150.0f + 50.0f; + //o(u,v) = (count_a+count_b) * 3.0f; + //o(u,v) = pixel_i1.y*81.0f; } else { o(u,v) = NAN; } @@ -306,7 +308,21 @@ __global__ void edge_invar2_kernel(cudaTextureObject_t i1, ftl::cuda::TextureObj } } +__global__ void disparity_kernel(cudaTextureObject_t l, cudaTextureObject_t r, ftl::cuda::TextureObject<float> o) { + for (STRIDE_Y(v,o.height())) { + for (STRIDE_X(u,o.width())) { + float dl = tex2D<float>(l,u,v); + float dr = tex2D<float>(r,u,v); + if (isnan(dl)) o(u,v) = dr; + else if (isnan(dr)) o(u,v) = dl; + else o(u,v) = max(dl,dr); + + } + } +} + ftl::cuda::TextureObject<float2> prevEdge1; +ftl::cuda::TextureObject<float2> prevEdge2; ftl::cuda::TextureObject<float> prevDisp; ftl::cuda::TextureObject<uchar4> prevImage; @@ -318,7 +334,10 @@ void nick1_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const Pt // TODO Could reduce re-allocations by caching these ftl::cuda::TextureObject<uchar4> texLeft(l); ftl::cuda::TextureObject<uchar4> texRight(r); - ftl::cuda::TextureObject<float2> inv1(l.cols, l.rows); + ftl::cuda::TextureObject<float2> invl1(l.cols, l.rows); + ftl::cuda::TextureObject<float2> invr1(r.cols, r.rows); + ftl::cuda::TextureObject<float> invl2(l.cols, l.rows); + ftl::cuda::TextureObject<float> invr2(r.cols, r.rows); ftl::cuda::TextureObject<float> output(disp); dim3 grid(1,1,1); @@ -326,14 +345,24 @@ void nick1_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const Pt grid.x = cv::cuda::device::divUp(l.cols - 2 * RADIUS2, BLOCK_W); grid.y = cv::cuda::device::divUp(l.rows - 2 * RADIUS2, ROWSperTHREAD); - edge_invar1_kernel<<<grid,threads>>>(texLeft.cudaTexture(), prevEdge1.cudaTexture(), inv1); + edge_invar1_kernel<<<grid,threads>>>(texLeft.cudaTexture(), prevEdge1.cudaTexture(), invl1); + cudaSafeCall( cudaGetLastError() ); + + edge_invar1_kernel<<<grid,threads>>>(texRight.cudaTexture(), prevEdge2.cudaTexture(), invr1); cudaSafeCall( cudaGetLastError() ); - edge_invar2_kernel<<<grid,threads>>>(inv1.cudaTexture(), output); + edge_invar2_kernel<<<grid,threads>>>(invl1.cudaTexture(), invl2); cudaSafeCall( cudaGetLastError() ); + edge_invar2_kernel<<<grid,threads>>>(invr1.cudaTexture(), invr2); + cudaSafeCall( cudaGetLastError() ); + + disparity_kernel<<<grid,threads>>>(invl2.cudaTexture(), invr2.cudaTexture(), output); + prevEdge1.free(); - prevEdge1 = inv1; + prevEdge1 = invl1; + prevEdge2.free(); + prevEdge2 = invr1; //if (&stream == Stream::Null()) cudaSafeCall( cudaDeviceSynchronize() ); @@ -341,6 +370,8 @@ void nick1_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const Pt texLeft.free(); texRight.free(); //inv1.free(); + invl2.free(); + invr2.free(); output.free(); } diff --git a/cv-node/src/main.cpp b/cv-node/src/main.cpp index ed2aa9bc25f3bdc5a456ba813c026b98d6be6fc6..cb260e116df741358b5245acfd5f2216ed15028e 100644 --- a/cv-node/src/main.cpp +++ b/cv-node/src/main.cpp @@ -140,10 +140,6 @@ static void run(const string &file) { Display display(calibrate, config["display"]); - float base_line = (float)config["camera"]["base_line"]; - float focal = (float)(config["camera"]["focal_length"]) / (float)(config["camera"]["sensor_width"]); - Mat rot_vec = Mat::zeros(1,3,CV_32F); - while (display.active()) { // Read calibrated images. calibrate.undistort(l,r);