diff --git a/cv-node/CMakeLists.txt b/cv-node/CMakeLists.txt index 1531cc233eee7057a9e1a06eb7746fdcf7452788..f0678311f69a4e4574933651723fb0da87db3194 100644 --- a/cv-node/CMakeLists.txt +++ b/cv-node/CMakeLists.txt @@ -94,7 +94,8 @@ if (CUDA_FOUND) "src/algorithms/opencv_cuda_bp.cpp" "src/algorithms/rtcensus.cu" "src/algorithms/consistency.cu" - "src/algorithms/sparse_census.cu") + "src/algorithms/sparse_census.cu" + "src/algorithms/tex_filter.cu") endif (CUDA_FOUND) add_executable(cv-node ${CVNODESRC}) diff --git a/cv-node/include/ftl/cuda_algorithms.hpp b/cv-node/include/ftl/cuda_algorithms.hpp index f4c4298ba5efcfdf3377911c092eb77676a25c17..38d1129ef2644f32c361e8af1908cd7728a925cd 100644 --- a/cv-node/include/ftl/cuda_algorithms.hpp +++ b/cv-node/include/ftl/cuda_algorithms.hpp @@ -12,6 +12,9 @@ namespace cuda { void sparse_census(const TextureObject<uchar4> &l, const TextureObject<uchar4> &r, TextureObject<uint2> &cl, TextureObject<uint2> &cr); + void texture_filter(const TextureObject<uchar4> &t, const TextureObject<float> &d, + TextureObject<float> &f, int num_disp, double thresh); + } } diff --git a/cv-node/include/ftl/cuda_common.hpp b/cv-node/include/ftl/cuda_common.hpp index 834e2af362b2fa74ab31c2de23a33c14ac23d45d..f57c4c0ba081e35e82d58774263a9569019d6a9f 100644 --- a/cv-node/include/ftl/cuda_common.hpp +++ b/cv-node/include/ftl/cuda_common.hpp @@ -25,14 +25,15 @@ class TextureObject { int pitch() const { return pitch_; } T *devicePtr() { return ptr_; }; - int width() const { return width_; } - int height() const { return height_; } + __host__ __device__ T *devicePtr(int v) { return &ptr_[v*pitch2_]; } + __host__ __device__ int width() const { return width_; } + __host__ __device__ int height() const { return height_; } cudaTextureObject_t cudaTexture() const { return texobj_; } __device__ inline T tex2D(int u, int v) { return ::tex2D<T>(texobj_, u, v); } __device__ inline T tex2D(float u, float v) { return ::tex2D<T>(texobj_, u, v); } - inline const T &operator()(int u, int v) const { return ptr_[u+v*pitch_]; } - inline T &operator()(int u, int v) { return ptr_[u+v*pitch_]; } + __host__ __device__ inline const T &operator()(int u, int v) const { return ptr_[u+v*pitch2_]; } + __host__ __device__ inline T &operator()(int u, int v) { return ptr_[u+v*pitch2_]; } void free() { if (texobj_ != 0) cudaSafeCall( cudaDestroyTextureObject (texobj_) ); @@ -44,6 +45,7 @@ class TextureObject { private: cudaTextureObject_t texobj_; size_t pitch_; + size_t pitch2_; // in T units int width_; int height_; T *ptr_; @@ -73,6 +75,7 @@ TextureObject<T>::TextureObject(const cv::cuda::PtrStepSz<T> &d) { cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL); texobj_ = tex; pitch_ = d.step; + pitch2_ = pitch_ / sizeof(T); ptr_ = d.data; width_ = d.cols; height_ = d.rows; @@ -103,6 +106,7 @@ TextureObject<T>::TextureObject(T *ptr, int pitch, int width, int height) { cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL); texobj_ = tex; pitch_ = pitch; + pitch2_ = pitch_ / sizeof(T); ptr_ = ptr; width_ = width; height_ = height; @@ -133,6 +137,7 @@ TextureObject<T>::TextureObject(size_t width, size_t height) { width_ = width; height_ = height; needsfree_ = true; + pitch2_ = pitch_ / sizeof(T); //needsdestroy_ = true; } @@ -143,6 +148,7 @@ TextureObject<T>::TextureObject(const TextureObject &p) { width_ = p.width_; height_ = p.height_; pitch_ = p.pitch_; + pitch2_ = pitch_ / sizeof(T); needsfree_ = p.needsfree_; //needsdestroy_ = false; } diff --git a/cv-node/src/algorithms/rtcensus.cu b/cv-node/src/algorithms/rtcensus.cu index 0fc81808888101fbb51bcf2b022ca7f9c2c254b5..d0432668d1b81c750c3eb01a0ede62dea6dc8cf9 100644 --- a/cv-node/src/algorithms/rtcensus.cu +++ b/cv-node/src/algorithms/rtcensus.cu @@ -58,7 +58,7 @@ __global__ void disp_kernel(float *disp_l, float *disp_r, size_t ds) { //extern __shared__ uint64_t cache[]; - const int gamma = 20; + const int gamma = 35; int u = (blockIdx.x * BLOCK_W) + threadIdx.x + RADIUS2; int v_start = (blockIdx.y * ROWSperTHREAD) + RADIUS2; @@ -331,7 +331,8 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const ftl::cuda::TextureObject<uint2> censusTexRight(r.cols, r.rows); ftl::cuda::TextureObject<float> dispTexLeft(l.cols, l.rows); ftl::cuda::TextureObject<float> dispTexRight(r.cols, r.rows); - ftl::cuda::TextureObject<float> dispTex(disp); //r.cols, r.rows); + ftl::cuda::TextureObject<float> dispTex(r.cols, r.rows); + ftl::cuda::TextureObject<float> output(disp); // Calculate the census for left and right ftl::cuda::sparse_census(texLeft, texRight, censusTexLeft, censusTexRight); @@ -353,7 +354,7 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const // Check consistency between L and R disparities. consistency(dispTexLeft, dispTexRight, dispTex); - + texture_filter(texLeft, dispTex, output, num_disp, 20.0); /*grid.x = 4; grid.y = l.rows; @@ -378,6 +379,7 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const dispTexLeft.free(); dispTexRight.free(); dispTex.free(); + output.free(); } }; diff --git a/cv-node/src/algorithms/tex_filter.cu b/cv-node/src/algorithms/tex_filter.cu index b5934c6f3292b9ea897a1913cb92f5843f6f36d8..3962d1263fff7fe2af6e741f02ac26177f42f9dc 100644 --- a/cv-node/src/algorithms/tex_filter.cu +++ b/cv-node/src/algorithms/tex_filter.cu @@ -1,47 +1,48 @@ #include <ftl/cuda_common.hpp> -#define FILTER_WINDOW 11 +#define FILTER_WINDOW 11.0 #define FILTER_WINDOW_R 5 __global__ void texture_filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d, - ftl::cuda::TextureObject<float> f, int num_disp, int thresh) { // Thresh = -5000000 + ftl::cuda::TextureObject<float> f, int num_disp, double thresh) { // Thresh = -5000000 - float disp = tex2D<float>(d,u,v); - int neigh_sq = 0; - int neigh_sum = 0; + for (STRIDE_Y(v,f.height())) { + for (STRIDE_X(u,f.width())) { + float disp = tex2D<float>(d,u,v); + double neigh_sq = 0.0; + double neigh_sum = 0.0; - for (STRIDE_Y(v,h)) { - for (STRIDE_X(u,w)) { for (int m=-FILTER_WINDOW_R; m<=FILTER_WINDOW_R; m++) { for (int n=-FILTER_WINDOW_R; n<=FILTER_WINDOW_R; n++) { uchar4 neigh = tex2D<uchar4>(t, u+n, v+m); - neigh_sq += neigh*neigh; - neigh_sum += neigh; + neigh_sq += (double)(neigh.z*neigh.z); + neigh_sum += (double)neigh.z; } } - } - } - // Texture map filtering - int tm = (neigh_sq / (FILTER_WINDOW*FILTER_WINDOW)) - - ((neigh_sum*neigh_sum) / (FILTER_WINDOW*FILTER_WINDOW)); + // Texture map filtering + double tm = (neigh_sq / (FILTER_WINDOW*FILTER_WINDOW)) - + //((neigh_sum*neigh_sum) / (FILTER_WINDOW*FILTER_WINDOW)); + ((neigh_sum / (FILTER_WINDOW*FILTER_WINDOW)) * (neigh_sum / (FILTER_WINDOW*FILTER_WINDOW))); - if (tm < thesh) { - f(u,v) = disp; - } else { - f(u,v) = NAN; + if (tm >= thresh) { + f(u,v) = disp; + } else { + f(u,v) = NAN; + } + } } } namespace ftl { namespace cuda { void texture_filter(const TextureObject<uchar4> &t, const TextureObject<float> &d, - TextureObject<float> &f, int num_disp, int thresh) { + TextureObject<float> &f, int num_disp, double thresh) { dim3 grid(1,1,1); dim3 threads(128, 1, 1); - grid.x = cv::cuda::device::divUp(disp.width(), 128); - grid.y = cv::cuda::device::divUp(disp.height(), 1); - texture_filter_kernel<<<grid, threads>>> + grid.x = cv::cuda::device::divUp(d.width(), 128); + grid.y = cv::cuda::device::divUp(d.height(), 1); + texture_filter_kernel<<<grid, threads>>>( t.cudaTexture(), d.cudaTexture(), f,