From f775b14866838c9cb5a99c5ec6f784df237a4a5f Mon Sep 17 00:00:00 2001 From: Nicolas Pope <nwpope@utu.fi> Date: Wed, 27 Mar 2019 10:35:58 +0200 Subject: [PATCH] Early attempts at my own algorithm --- cv-node/CMakeLists.txt | 4 +- cv-node/include/ftl/algorithms/nick.hpp | 29 +++ cv-node/include/ftl/cuda_algorithms.hpp | 3 + cv-node/src/algorithms/consistency.cu | 4 + cv-node/src/algorithms/nick.cpp | 41 ++++ cv-node/src/algorithms/nick1.cu | 264 ++++++++++++++++++++++++ cv-node/src/algorithms/rtcensus.cpp | 2 + cv-node/src/algorithms/rtcensus.cu | 209 ++----------------- cv-node/src/algorithms/tex_filter.cu | 50 ++++- 9 files changed, 408 insertions(+), 198 deletions(-) create mode 100644 cv-node/include/ftl/algorithms/nick.hpp create mode 100644 cv-node/src/algorithms/nick.cpp create mode 100644 cv-node/src/algorithms/nick1.cu diff --git a/cv-node/CMakeLists.txt b/cv-node/CMakeLists.txt index f0678311f..2830734c8 100644 --- a/cv-node/CMakeLists.txt +++ b/cv-node/CMakeLists.txt @@ -95,7 +95,9 @@ if (CUDA_FOUND) "src/algorithms/rtcensus.cu" "src/algorithms/consistency.cu" "src/algorithms/sparse_census.cu" - "src/algorithms/tex_filter.cu") + "src/algorithms/tex_filter.cu" + "src/algorithms/nick1.cu" + "src/algorithms/nick.cpp") endif (CUDA_FOUND) add_executable(cv-node ${CVNODESRC}) diff --git a/cv-node/include/ftl/algorithms/nick.hpp b/cv-node/include/ftl/algorithms/nick.hpp new file mode 100644 index 000000000..685882ba0 --- /dev/null +++ b/cv-node/include/ftl/algorithms/nick.hpp @@ -0,0 +1,29 @@ +#ifndef _FTL_ALGORITHMS_NICK_HPP_ +#define _FTL_ALGORITHMS_NICK_HPP_ + +#include <opencv2/core.hpp> +#include <opencv2/opencv.hpp> +#include <opencv2/cudastereo.hpp> +#include <ftl/disparity.hpp> + +namespace ftl { +namespace algorithms { +class NickCuda : public ftl::Disparity { + public: + NickCuda(nlohmann::json &config); + + void compute(const cv::Mat &l, const cv::Mat &r, cv::Mat &disp); + + static inline Disparity *create(nlohmann::json &config) { return new NickCuda(config); } + + private: + cv::cuda::GpuMat disp_; + //cv::cuda::GpuMat filtered_; + cv::cuda::GpuMat left_; + cv::cuda::GpuMat right_; +}; +}; +}; + +#endif // _FTL_ALGORITHMS_NICK_HPP_ + diff --git a/cv-node/include/ftl/cuda_algorithms.hpp b/cv-node/include/ftl/cuda_algorithms.hpp index 38d1129ef..a7f341239 100644 --- a/cv-node/include/ftl/cuda_algorithms.hpp +++ b/cv-node/include/ftl/cuda_algorithms.hpp @@ -14,6 +14,9 @@ namespace cuda { void texture_filter(const TextureObject<uchar4> &t, const TextureObject<float> &d, TextureObject<float> &f, int num_disp, double thresh); + + void texture_map(const TextureObject<uchar4> &t, + TextureObject<float> &f); } } diff --git a/cv-node/src/algorithms/consistency.cu b/cv-node/src/algorithms/consistency.cu index 3924ace27..ca9a3df22 100644 --- a/cv-node/src/algorithms/consistency.cu +++ b/cv-node/src/algorithms/consistency.cu @@ -1,3 +1,7 @@ +/* + * Algorithms for checking the consistency of two disparity maps. + */ + #include <ftl/cuda_common.hpp> #define CONSISTENCY_THRESHOLD 1.0f diff --git a/cv-node/src/algorithms/nick.cpp b/cv-node/src/algorithms/nick.cpp new file mode 100644 index 000000000..5699023e7 --- /dev/null +++ b/cv-node/src/algorithms/nick.cpp @@ -0,0 +1,41 @@ +#include <ftl/algorithms/nick.hpp> +#include <vector_types.h> + +using ftl::algorithms::NickCuda; +using namespace cv; +using namespace cv::cuda; + +static ftl::Disparity::Register nickcuda("nick", NickCuda::create); + +NickCuda::NickCuda(nlohmann::json &config) : Disparity(config) { + +} + +namespace ftl { namespace gpu { +void nick1_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const PtrStepSz<float> &disp, size_t num_disp); +}} + +void NickCuda::compute(const cv::Mat &l, const cv::Mat &r, cv::Mat &disp) { + if (disp_.empty()) disp_ = cuda::GpuMat(l.size(), CV_32FC1); + if (left_.empty()) left_ = cuda::GpuMat(l.size(), CV_8UC4); + if (right_.empty()) right_ = cuda::GpuMat(l.size(), CV_8UC4); + + Mat lhsv, rhsv; + cv::cvtColor(l, lhsv, COLOR_BGR2HSV); + cv::cvtColor(r, rhsv, COLOR_BGR2HSV); + int from_to[] = {0,0,1,1,2,2,-1,3}; + Mat hsval(lhsv.size(), CV_8UC4); + Mat hsvar(rhsv.size(), CV_8UC4); + mixChannels(&lhsv, 1, &hsval, 1, from_to, 4); + mixChannels(&rhsv, 1, &hsvar, 1, from_to, 4); + + left_.upload(hsval); + right_.upload(hsvar); + + ftl::gpu::nick1_call(left_, right_, disp_, 200); + + disp_.download(disp); +} + + + diff --git a/cv-node/src/algorithms/nick1.cu b/cv-node/src/algorithms/nick1.cu new file mode 100644 index 000000000..7c7702b7f --- /dev/null +++ b/cv-node/src/algorithms/nick1.cu @@ -0,0 +1,264 @@ +/* + * Author: Nicolas Pope + * Based initially on rtcensus + * + */ + +#include <ftl/cuda_common.hpp> +#include <ftl/cuda_algorithms.hpp> + +using namespace cv::cuda; +using namespace cv; + + +#define BLOCK_W 60 +#define RADIUS 7 +#define RADIUS2 2 +#define ROWSperTHREAD 1 + +template <typename T> +__host__ __device__ +inline T lerp(T v0, T v1, T t) { + return fma(t, v1, fma(-t, v0, v0)); +} + +#define FILTER_WINDOW 21 +#define FILTER_WINDOW_R 10 +#define EDGE_SENSITIVITY 10.0f + +__device__ float calculate_edge_disp(cudaTextureObject_t t, cudaTextureObject_t d, cudaTextureObject_t pT, cudaTextureObject_t pD, uchar4 pixel, int u, int v) { + float est = 0.0; + int nn = 0; + //float pest = 0.0; + //int pnn = 0; + + //cudaTextureObject_t nTex = (pT) ? pT : t; + //cudaTextureObject_t nDisp = (pD) ? pD : d; + + for (int m=-FILTER_WINDOW_R; m<=FILTER_WINDOW_R; m++) { + for (int n=-FILTER_WINDOW_R; n<=FILTER_WINDOW_R; n++) { + uchar4 neigh = tex2D<uchar4>(t, u+n, v+m); + float ndisp = tex2D<float>(d,u+n,v+m); + + //uchar4 pneigh = tex2D<uchar4>(nTex, u+n, v+m); + //float pndisp = tex2D<float>(nDisp,u+n,v+m); + + //if (isnan(tex2D<float>(nDisp,u+n,v+m))) continue; + //if (m == 0 && n == 0) continue; + + if (!isnan(ndisp) && (abs(neigh.z-pixel.z) <= EDGE_SENSITIVITY)) { // && (isnan(disp) || abs(ndisp-disp) < FILTER_DISP_THRESH)) { + est += ndisp; + nn++; + } + + //if (!isnan(pndisp) && (abs(pneigh.z-pixel.z) <= EDGE_SENSITIVITY)) { // && (isnan(disp) || abs(ndisp-disp) < FILTER_DISP_THRESH)) { + // pest += pndisp; + // pnn++; + //} + } + } + + est = (nn > 0) ? est/nn : NAN; + //pest = (pnn > 0) ? pest/pnn : NAN; + + return est; +} + +__device__ float colour_error(uchar4 v1, uchar4 v2) { + float dx = 0.05*abs(v1.x-v2.x); + float dy = 0.1*abs(v1.y-v2.y); + float dz = 0.85*abs(v1.z-v2.z); + return dx + dz + dy; +} + +// TODO Use HUE also and perhaps increase window? +// Or use more complex notion of texture? + +/* Just crossed and currently on edge */ +__device__ bool is_edge_left(uchar4 *line, int x, int n) { + if (x < 1 || x >= n-1) return false; + return (colour_error(line[x-1],line[x]) > EDGE_SENSITIVITY && colour_error(line[x],line[x+1]) <= EDGE_SENSITIVITY); +} + +/* Just crossed but not on edge now */ +__device__ bool is_edge_right(uchar4 *line, int x, int n) { + if (x < 1 || x >= n-1) return false; + return (colour_error(line[x-1],line[x]) <= EDGE_SENSITIVITY && colour_error(line[x],line[x+1]) > EDGE_SENSITIVITY); +} + +__global__ void filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d, + cudaTextureObject_t prevD, + cudaTextureObject_t prevT, PtrStepSz<float> f, int num_disp) { + + + extern __shared__ uchar4 line[]; // One entire line of hsv image + + for (STRIDE_Y(v,f.rows)) { + for (STRIDE_X(u,f.cols)) { + line[u] = tex2D<uchar4>(t, u, v); + } + __syncthreads(); + + for (STRIDE_X(u,f.cols)) { + if (is_edge_right(line, u, f.cols)) { + float edge_disp = calculate_edge_disp(t,d,prevT,prevD,line[u],u+2,v); // tex2D<float>(d, u, v); + f(v,u) = edge_disp; + continue; + + float est = 0.0f; + int nn = 0; + + if (!isnan(edge_disp)) { + est += edge_disp; + nn++; + } + //f(v,u) = edge_disp; + + // TODO, find other edge first to get disparity + // Use middle disparities to: + // estimate curve or linear (choose equation) + // or ignore as noise if impossible + + // TODO For edge disparity, use a window to: + // a) find a missing disparity + // b) make sure disparity has some consensus (above or below mostly) + + // TODO Use past values? + // Another way to fill blanks and gain concensus + + // TODO Maintain a disparity stack to pop back to background? + // Issue of background disparity not being detected. + // Only if hsv also matches previous background + + // TODO Edge prediction (in vertical direction at least) could + // help fill both edge and disparity gaps. Propagate disparity + // along edges + + float last_disp = edge_disp; + + int i; + for (i=1; u+i<f.cols; i++) { + if (is_edge_right(line, u+i, f.cols)) { + //float end_disp = calculate_edge_disp(t,d,prevT,prevD,line[u+i-1],u+i-3,v); + //if (!isnan(end_disp)) last_disp = end_disp; + break; + } + + float di = tex2D<float>(d,u+i,v); + if (!isnan(di)) { + est += di; + nn++; + } + //f(v,u+i) = edge_disp; + } + + est = (nn > 0) ? est / nn : NAN; + //for (int j=1; j<i; j++) { + // f(v,u+j) = est; //lerp(edge_disp, last_disp, (float)j / (float)(i-1)); + //} + } else f(v,u) = NAN; + } + } +} + +__global__ void edge_invar1_kernel(cudaTextureObject_t t, ftl::cuda::TextureObject<float2> o) { + for (STRIDE_Y(v,o.height())) { + for (STRIDE_X(u,o.width())) { + float gx = ((tex2D<uchar4>(t, u-1, v-1).z - tex2D<uchar4>(t, u+1, v-1).z) + + (tex2D<uchar4>(t, u-1, v).z - tex2D<uchar4>(t, u+1, v).z) + + (tex2D<uchar4>(t, u-1, v+1).z - tex2D<uchar4>(t, u+1, v+1).z)) / 3; + float gy = ((tex2D<uchar4>(t, u-1, v-1).z - tex2D<uchar4>(t, u-1, v+1).z) + + (tex2D<uchar4>(t, u, v-1).z - tex2D<uchar4>(t, u, v+1).z) + + (tex2D<uchar4>(t, u+1, v-1).z - tex2D<uchar4>(t, u+1, v+1).z)) / 3; + + float g = sqrt(gx*gx+gy*gy); + float a = atan2(gy,gx); + + // TODO adapt threshold using histeresis + o(u,v) = (g > 10.0f) ? make_float2(g,a) : make_float2(NAN,NAN); + } + } +} + +__global__ void edge_invar2_kernel(cudaTextureObject_t i1, ftl::cuda::TextureObject<float> o) { + for (STRIDE_Y(v,o.height())) { + for (STRIDE_X(u,o.width())) { + float2 pixel_i1 = tex2D<float2>(i1,u,v); + + if (isnan(pixel_i1.x)) { + o(u,v) = NAN; + continue; + } + + int dx = ((pixel_i1.y >= 0.785 && pixel_i1.y <= 2.356) ) ? 0 : 1; + int dy = (dx == 1) ? 0 : 1; + + float2 next_pix; + next_pix.x = NAN; + next_pix.y = NAN; + float diff = 10000.0f; + + for (int i=-10; i<=10; i++) { + float2 pix = tex2D<float2>(i1,u+dx*i+dy*2, v+dy*i+dx*2); + if (isnan(pix.x)) continue; + + float d = abs(pix.x-pixel_i1.x)*abs(pix.y-pixel_i1.y); + if (d < diff) { + next_pix = pix; + diff = d; + } + } + + if (!isnan(next_pix.x) && diff < 2.0f) { + o(u,v) = abs(pixel_i1.y - next_pix.y)*81.0f; + } else { + o(u,v) = NAN; + } + + // Search in two directions for next edge pixel + // Calculate curvature by monitoring gradient angle change + // Calculate length by stopping when change exceeds threshold + } + } +} + +ftl::cuda::TextureObject<float> prevDisp; +ftl::cuda::TextureObject<uchar4> prevImage; + +namespace ftl { +namespace gpu { + +void nick1_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const PtrStepSz<float> &disp, size_t num_disp) { + // Make all the required texture steps + // TODO Could reduce re-allocations by caching these + ftl::cuda::TextureObject<uchar4> texLeft(l); + ftl::cuda::TextureObject<uchar4> texRight(r); + ftl::cuda::TextureObject<float2> inv1(l.cols, l.rows); + ftl::cuda::TextureObject<float> output(disp); + + dim3 grid(1,1,1); + dim3 threads(BLOCK_W, 1, 1); + grid.x = cv::cuda::device::divUp(l.cols - 2 * RADIUS2, BLOCK_W); + grid.y = cv::cuda::device::divUp(l.rows - 2 * RADIUS2, ROWSperTHREAD); + + edge_invar1_kernel<<<grid,threads>>>(texLeft.cudaTexture(), inv1); + cudaSafeCall( cudaGetLastError() ); + + edge_invar2_kernel<<<grid,threads>>>(inv1.cudaTexture(), output); + cudaSafeCall( cudaGetLastError() ); + + //prevImage.free(); + //prevImage = texLeft; + + //if (&stream == Stream::Null()) + cudaSafeCall( cudaDeviceSynchronize() ); + + texLeft.free(); + texRight.free(); + inv1.free(); + output.free(); +} + +} +} + diff --git a/cv-node/src/algorithms/rtcensus.cpp b/cv-node/src/algorithms/rtcensus.cpp index ee7650cfa..2c3222552 100644 --- a/cv-node/src/algorithms/rtcensus.cpp +++ b/cv-node/src/algorithms/rtcensus.cpp @@ -6,6 +6,8 @@ * for embedded real-time systems * [2] Humenberger, Zinner, Kubinger: Performance Evaluation of Census-Based * Stereo Matching Algorithm on Embedded and Multi-Core Hardware + * [3] Humenberger, Engelke, Kubinger: A Census-Based Stereo Vision Algorithm Using Modified Semi-Global Matching + * and Plane Fitting to Improve Matching Quality. * * Equation numbering uses [1] unless otherwise stated */ diff --git a/cv-node/src/algorithms/rtcensus.cu b/cv-node/src/algorithms/rtcensus.cu index d0432668d..d2aa073eb 100644 --- a/cv-node/src/algorithms/rtcensus.cu +++ b/cv-node/src/algorithms/rtcensus.cu @@ -6,6 +6,8 @@ * for embedded real-time systems * [2] Humenberger, Zinner, Kubinger: Performance Evaluation of Census-Based * Stereo Matching Algorithm on Embedded and Multi-Core Hardware + * [3] Humenberger, Engelke, Kubinger: A Census-Based Stereo Vision Algorithm Using Modified Semi-Global Matching + * and Plane Fitting to Improve Matching Quality. * * Equation numbering uses [1] unless otherwise stated * @@ -49,7 +51,7 @@ __forceinline__ __device__ uint64_t uint2asull (uint2 a) { } /* - * Generate left and right disparity images from census data. (19) + * Generate left and right disparity images from census data. (18)(19)(25) */ __global__ void disp_kernel(float *disp_l, float *disp_r, int pitchL, int pitchR, @@ -68,25 +70,11 @@ __global__ void disp_kernel(float *disp_l, float *disp_r, // Local cache uint64_t l_cache_l1[5][5]; uint64_t l_cache_l2[5][5]; - - // Prepare the cache load - //const int cache_thread_width = (BLOCK_W+ds / BLOCK_W + RADIUS2*2 + 1)*2; - //uint64_t *cache_ptr = cache + (threadIdx.x * cache_thread_width); if (v_end >= height) v_end = height; if (u+maxdisp >= width) maxdisp = width-u; for (int v=v_start; v<v_end; v++) { - /*const int cache_start = v*width*2 + cache_thread_width*blockIdx.x; - for (int i=0; i<cache_thread_width; i+=2) { - cache_ptr[i] = census[cache_start+i]; - cache_ptr[i+1] = census[cache_start+i+1]; - } - - __syncthreads();*/ - - // Fill local cache for window 5x5 - // TODO Use shared memory? for (int m=-2; m<=2; m++) { for (int n=-2; n<=2; n++) { l_cache_l2[m+2][n+2] = uint2asull(tex2D<uint2>(censusL,u+n,v+m)); @@ -107,7 +95,7 @@ __global__ void disp_kernel(float *disp_l, float *disp_r, int dix1 = 0; int dix2 = 0; - // TODO Use prediction textures to narrow range + // (19) for (int d=0; d<maxdisp; d++) { uint16_t hamming1 = 0; uint16_t hamming2 = 0; @@ -119,18 +107,17 @@ __global__ void disp_kernel(float *disp_l, float *disp_r, for (int n=-2; n<=2; n++) { const auto u_ = u + n; + // (18) auto l1 = l_cache_l1[m+2][n+2]; auto l2 = l_cache_l2[m+2][n+2]; - - // TODO Somehow might use shared memory auto r1 = uint2asull(tex2D<uint2>(censusL, u_+d, v_)); auto r2 = uint2asull(tex2D<uint2>(censusR, u_-d, v_)); - hamming1 += __popcll(r1^l1); hamming2 += __popcll(r2^l2); } } + // Find the two minimum costs if (hamming1 < min_disp1) { min_before1 = last_ham1; min_disp1 = hamming1; @@ -157,171 +144,16 @@ __global__ void disp_kernel(float *disp_l, float *disp_r, //float d2 = (dix2 == 0 || dix2 == ds-1) ? (float)dix2 : fit_parabola(dix2, min_disp2, min_before2, min_after2); // TODO Allow for discontinuities with threshold + // Subpixel disparity (20) float d1 = fit_parabola(dix1, min_disp1, min_before1, min_after1); float d2 = fit_parabola(dix2, min_disp2, min_before2, min_after2); - // Confidence filter (25) - // TODO choice of gamma to depend on disparity variance - // Variance with next option, variance with neighbours, variance with past value + // Confidence filter based on (25) disp_l[v*pitchL+u] = ((min_disp2b - min_disp2) >= gamma) ? d2 : NAN; disp_r[v*pitchR+u] = ((min_disp1b - min_disp1) >= gamma) ? d1 : NAN; - - // TODO If disparity is 0.0f, perhaps - // Use previous value unless it conflicts with present - // Use neighbour values if texture matches - } -} - - - -template <typename T> -__host__ __device__ -inline T lerp(T v0, T v1, T t) { - return fma(t, v1, fma(-t, v0, v0)); -} - -#define FILTER_WINDOW 21 -#define FILTER_WINDOW_R 10 -#define EDGE_SENSITIVITY 10.0f - -__device__ float calculate_edge_disp(cudaTextureObject_t t, cudaTextureObject_t d, cudaTextureObject_t pT, cudaTextureObject_t pD, uchar4 pixel, int u, int v) { - float est = 0.0; - int nn = 0; - //float pest = 0.0; - //int pnn = 0; - - //cudaTextureObject_t nTex = (pT) ? pT : t; - //cudaTextureObject_t nDisp = (pD) ? pD : d; - - for (int m=-FILTER_WINDOW_R; m<=FILTER_WINDOW_R; m++) { - for (int n=-FILTER_WINDOW_R; n<=FILTER_WINDOW_R; n++) { - uchar4 neigh = tex2D<uchar4>(t, u+n, v+m); - float ndisp = tex2D<float>(d,u+n,v+m); - - //uchar4 pneigh = tex2D<uchar4>(nTex, u+n, v+m); - //float pndisp = tex2D<float>(nDisp,u+n,v+m); - - //if (isnan(tex2D<float>(nDisp,u+n,v+m))) continue; - //if (m == 0 && n == 0) continue; - - if (!isnan(ndisp) && (abs(neigh.z-pixel.z) <= EDGE_SENSITIVITY)) { // && (isnan(disp) || abs(ndisp-disp) < FILTER_DISP_THRESH)) { - est += ndisp; - nn++; - } - - //if (!isnan(pndisp) && (abs(pneigh.z-pixel.z) <= EDGE_SENSITIVITY)) { // && (isnan(disp) || abs(ndisp-disp) < FILTER_DISP_THRESH)) { - // pest += pndisp; - // pnn++; - //} - } } - - est = (nn > 0) ? est/nn : NAN; - //pest = (pnn > 0) ? pest/pnn : NAN; - - return est; } -__device__ float colour_error(uchar4 v1, uchar4 v2) { - float dx = 0.05*abs(v1.x-v2.x); - float dy = 0.1*abs(v1.y-v2.y); - float dz = 0.85*abs(v1.z-v2.z); - return dx + dz + dy; -} - -// TODO Use HUE also and perhaps increase window? -// Or use more complex notion of texture? - -/* Just crossed and currently on edge */ -__device__ bool is_edge_left(uchar4 *line, int x, int n) { - if (x < 1 || x >= n-1) return false; - return (colour_error(line[x-1],line[x]) > EDGE_SENSITIVITY && colour_error(line[x],line[x+1]) <= EDGE_SENSITIVITY); -} - -/* Just crossed but not on edge now */ -__device__ bool is_edge_right(uchar4 *line, int x, int n) { - if (x < 1 || x >= n-1) return false; - return (colour_error(line[x-1],line[x]) <= EDGE_SENSITIVITY && colour_error(line[x],line[x+1]) > EDGE_SENSITIVITY); -} - -__global__ void filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d, - cudaTextureObject_t prevD, - cudaTextureObject_t prevT, PtrStepSz<float> f, int num_disp) { - - - extern __shared__ uchar4 line[]; // One entire line of hsv image - - for (STRIDE_Y(v,f.rows)) { - for (STRIDE_X(u,f.cols)) { - line[u] = tex2D<uchar4>(t, u, v); - } - __syncthreads(); - - for (STRIDE_X(u,f.cols)) { - if (is_edge_right(line, u, f.cols)) { - float edge_disp = calculate_edge_disp(t,d,prevT,prevD,line[u],u+2,v); // tex2D<float>(d, u, v); - f(v,u) = edge_disp; - continue; - - float est = 0.0f; - int nn = 0; - - if (!isnan(edge_disp)) { - est += edge_disp; - nn++; - } - //f(v,u) = edge_disp; - - // TODO, find other edge first to get disparity - // Use middle disparities to: - // estimate curve or linear (choose equation) - // or ignore as noise if impossible - - // TODO For edge disparity, use a window to: - // a) find a missing disparity - // b) make sure disparity has some consensus (above or below mostly) - - // TODO Use past values? - // Another way to fill blanks and gain concensus - - // TODO Maintain a disparity stack to pop back to background? - // Issue of background disparity not being detected. - // Only if hsv also matches previous background - - // TODO Edge prediction (in vertical direction at least) could - // help fill both edge and disparity gaps. Propagate disparity - // along edges - - float last_disp = edge_disp; - - int i; - for (i=1; u+i<f.cols; i++) { - if (is_edge_right(line, u+i, f.cols)) { - //float end_disp = calculate_edge_disp(t,d,prevT,prevD,line[u+i-1],u+i-3,v); - //if (!isnan(end_disp)) last_disp = end_disp; - break; - } - - float di = tex2D<float>(d,u+i,v); - if (!isnan(di)) { - est += di; - nn++; - } - //f(v,u+i) = edge_disp; - } - - est = (nn > 0) ? est / nn : NAN; - //for (int j=1; j<i; j++) { - // f(v,u+j) = est; //lerp(edge_disp, last_disp, (float)j / (float)(i-1)); - //} - } else f(v,u) = NAN; - } - } -} - -ftl::cuda::TextureObject<float> prevDisp; -ftl::cuda::TextureObject<uchar4> prevImage; - void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const PtrStepSz<float> &disp, size_t num_disp, const int &stream) { // Make all the required texture steps // TODO Could reduce re-allocations by caching these @@ -334,7 +166,7 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const ftl::cuda::TextureObject<float> dispTex(r.cols, r.rows); ftl::cuda::TextureObject<float> output(disp); - // Calculate the census for left and right + // Calculate the census for left and right (14)(15)(16) ftl::cuda::sparse_census(texLeft, texRight, censusTexLeft, censusTexRight); dim3 grid(1,1,1); @@ -342,7 +174,7 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const grid.x = cv::cuda::device::divUp(l.cols - 2 * RADIUS2, BLOCK_W); grid.y = cv::cuda::device::divUp(l.rows - 2 * RADIUS2, ROWSperTHREAD); - // Calculate L and R disparities + // Calculate L and R disparities (18)(19)(20)(21)(22)(25) disp_kernel<<<grid, threads>>>( dispTexLeft.devicePtr(), dispTexRight.devicePtr(), dispTexLeft.pitch()/sizeof(float), dispTexRight.pitch()/sizeof(float), @@ -351,28 +183,15 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const num_disp); cudaSafeCall( cudaGetLastError() ); - // Check consistency between L and R disparities. + // Check consistency between L and R disparities. (23)(24) consistency(dispTexLeft, dispTexRight, dispTex); - texture_filter(texLeft, dispTex, output, num_disp, 20.0); - - /*grid.x = 4; - grid.y = l.rows; - threads.x = l.cols; - size_t filter_smem = sizeof(uchar4) * l.cols; - filter_kernel<<<grid, threads, filter_smem>>>(texLeft.cudaTexture(), dispTex.cudaTexture(), prevDisp.cudaTexture(), prevImage.cudaTexture(), disp, num_disp); - cudaSafeCall( cudaGetLastError() );*/ + // TM in (7) of paper [3]. Eq (26) in [1] is wrong. + texture_filter(texLeft, dispTex, output, num_disp, 10.0); - //prevDisp.free(); - //prevDisp = disp; - prevImage.free(); - prevImage = texLeft; - - //if (&stream == Stream::Null()) cudaSafeCall( cudaDeviceSynchronize() ); - - //cudaSafeCall( cudaDestroyTextureObject (texLeft) ); + texLeft.free(); texRight.free(); censusTexLeft.free(); censusTexRight.free(); diff --git a/cv-node/src/algorithms/tex_filter.cu b/cv-node/src/algorithms/tex_filter.cu index 3962d1263..12c915726 100644 --- a/cv-node/src/algorithms/tex_filter.cu +++ b/cv-node/src/algorithms/tex_filter.cu @@ -3,6 +3,8 @@ #define FILTER_WINDOW 11.0 #define FILTER_WINDOW_R 5 +// --- Filter by texture complexity -------------------------------------------- + __global__ void texture_filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d, ftl::cuda::TextureObject<float> f, int num_disp, double thresh) { // Thresh = -5000000 @@ -22,14 +24,15 @@ __global__ void texture_filter_kernel(cudaTextureObject_t t, cudaTextureObject_t // Texture map filtering double tm = (neigh_sq / (FILTER_WINDOW*FILTER_WINDOW)) - - //((neigh_sum*neigh_sum) / (FILTER_WINDOW*FILTER_WINDOW)); ((neigh_sum / (FILTER_WINDOW*FILTER_WINDOW)) * (neigh_sum / (FILTER_WINDOW*FILTER_WINDOW))); if (tm >= thresh) { f(u,v) = disp; } else { - f(u,v) = NAN; + f(u,v) = NAN; //(tm <= 200.0) ? 0 : 200.0f;; } + + //f(u,v) = tm; } } } @@ -53,3 +56,46 @@ namespace cuda { } } +// --- Generate a texture map -------------------------------------------------- + +__global__ void texture_map_kernel(cudaTextureObject_t t, + ftl::cuda::TextureObject<float> f) { + + for (STRIDE_Y(v,f.height())) { + for (STRIDE_X(u,f.width())) { + double neigh_sq = 0.0; + double neigh_sum = 0.0; + + for (int m=-FILTER_WINDOW_R; m<=FILTER_WINDOW_R; m++) { + for (int n=-FILTER_WINDOW_R; n<=FILTER_WINDOW_R; n++) { + uchar4 neigh = tex2D<uchar4>(t, u+n, v+m); + neigh_sq += (double)(neigh.z*neigh.z); + neigh_sum += (double)neigh.z; + } + } + + // Texture map filtering + double tm = (neigh_sq / (FILTER_WINDOW*FILTER_WINDOW)) - + ((neigh_sum / (FILTER_WINDOW*FILTER_WINDOW)) * (neigh_sum / (FILTER_WINDOW*FILTER_WINDOW))); + + f(u,v) = tm; + } + } +} + +namespace ftl { +namespace cuda { + void texture_map(const TextureObject<uchar4> &t, + TextureObject<float> &f) { + dim3 grid(1,1,1); + dim3 threads(128, 1, 1); + grid.x = cv::cuda::device::divUp(f.width(), 128); + grid.y = cv::cuda::device::divUp(f.height(), 1); + texture_map_kernel<<<grid, threads>>>( + t.cudaTexture(), + f); + cudaSafeCall( cudaGetLastError() ); + } +} +} + -- GitLab