Skip to content
Snippets Groups Projects
Commit 7ae2627c authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Use texture correct texture filtering in rtcensus

parent 85b6b8b0
No related branches found
No related tags found
No related merge requests found
......@@ -94,7 +94,8 @@ if (CUDA_FOUND)
"src/algorithms/opencv_cuda_bp.cpp"
"src/algorithms/rtcensus.cu"
"src/algorithms/consistency.cu"
"src/algorithms/sparse_census.cu")
"src/algorithms/sparse_census.cu"
"src/algorithms/tex_filter.cu")
endif (CUDA_FOUND)
add_executable(cv-node ${CVNODESRC})
......
......@@ -12,6 +12,9 @@ namespace cuda {
void sparse_census(const TextureObject<uchar4> &l, const TextureObject<uchar4> &r,
TextureObject<uint2> &cl, TextureObject<uint2> &cr);
void texture_filter(const TextureObject<uchar4> &t, const TextureObject<float> &d,
TextureObject<float> &f, int num_disp, double thresh);
}
}
......
......@@ -25,14 +25,15 @@ class TextureObject {
int pitch() const { return pitch_; }
T *devicePtr() { return ptr_; };
int width() const { return width_; }
int height() const { return height_; }
__host__ __device__ T *devicePtr(int v) { return &ptr_[v*pitch2_]; }
__host__ __device__ int width() const { return width_; }
__host__ __device__ int height() const { return height_; }
cudaTextureObject_t cudaTexture() const { return texobj_; }
__device__ inline T tex2D(int u, int v) { return ::tex2D<T>(texobj_, u, v); }
__device__ inline T tex2D(float u, float v) { return ::tex2D<T>(texobj_, u, v); }
inline const T &operator()(int u, int v) const { return ptr_[u+v*pitch_]; }
inline T &operator()(int u, int v) { return ptr_[u+v*pitch_]; }
__host__ __device__ inline const T &operator()(int u, int v) const { return ptr_[u+v*pitch2_]; }
__host__ __device__ inline T &operator()(int u, int v) { return ptr_[u+v*pitch2_]; }
void free() {
if (texobj_ != 0) cudaSafeCall( cudaDestroyTextureObject (texobj_) );
......@@ -44,6 +45,7 @@ class TextureObject {
private:
cudaTextureObject_t texobj_;
size_t pitch_;
size_t pitch2_; // in T units
int width_;
int height_;
T *ptr_;
......@@ -73,6 +75,7 @@ TextureObject<T>::TextureObject(const cv::cuda::PtrStepSz<T> &d) {
cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
texobj_ = tex;
pitch_ = d.step;
pitch2_ = pitch_ / sizeof(T);
ptr_ = d.data;
width_ = d.cols;
height_ = d.rows;
......@@ -103,6 +106,7 @@ TextureObject<T>::TextureObject(T *ptr, int pitch, int width, int height) {
cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
texobj_ = tex;
pitch_ = pitch;
pitch2_ = pitch_ / sizeof(T);
ptr_ = ptr;
width_ = width;
height_ = height;
......@@ -133,6 +137,7 @@ TextureObject<T>::TextureObject(size_t width, size_t height) {
width_ = width;
height_ = height;
needsfree_ = true;
pitch2_ = pitch_ / sizeof(T);
//needsdestroy_ = true;
}
......@@ -143,6 +148,7 @@ TextureObject<T>::TextureObject(const TextureObject &p) {
width_ = p.width_;
height_ = p.height_;
pitch_ = p.pitch_;
pitch2_ = pitch_ / sizeof(T);
needsfree_ = p.needsfree_;
//needsdestroy_ = false;
}
......
......@@ -58,7 +58,7 @@ __global__ void disp_kernel(float *disp_l, float *disp_r,
size_t ds) {
//extern __shared__ uint64_t cache[];
const int gamma = 20;
const int gamma = 35;
int u = (blockIdx.x * BLOCK_W) + threadIdx.x + RADIUS2;
int v_start = (blockIdx.y * ROWSperTHREAD) + RADIUS2;
......@@ -331,7 +331,8 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const
ftl::cuda::TextureObject<uint2> censusTexRight(r.cols, r.rows);
ftl::cuda::TextureObject<float> dispTexLeft(l.cols, l.rows);
ftl::cuda::TextureObject<float> dispTexRight(r.cols, r.rows);
ftl::cuda::TextureObject<float> dispTex(disp); //r.cols, r.rows);
ftl::cuda::TextureObject<float> dispTex(r.cols, r.rows);
ftl::cuda::TextureObject<float> output(disp);
// Calculate the census for left and right
ftl::cuda::sparse_census(texLeft, texRight, censusTexLeft, censusTexRight);
......@@ -353,7 +354,7 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const
// Check consistency between L and R disparities.
consistency(dispTexLeft, dispTexRight, dispTex);
texture_filter(texLeft, dispTex, output, num_disp, 20.0);
/*grid.x = 4;
grid.y = l.rows;
......@@ -378,6 +379,7 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const
dispTexLeft.free();
dispTexRight.free();
dispTex.free();
output.free();
}
};
......
#include <ftl/cuda_common.hpp>
#define FILTER_WINDOW 11
#define FILTER_WINDOW 11.0
#define FILTER_WINDOW_R 5
__global__ void texture_filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d,
ftl::cuda::TextureObject<float> f, int num_disp, int thresh) { // Thresh = -5000000
ftl::cuda::TextureObject<float> f, int num_disp, double thresh) { // Thresh = -5000000
float disp = tex2D<float>(d,u,v);
int neigh_sq = 0;
int neigh_sum = 0;
for (STRIDE_Y(v,f.height())) {
for (STRIDE_X(u,f.width())) {
float disp = tex2D<float>(d,u,v);
double neigh_sq = 0.0;
double neigh_sum = 0.0;
for (STRIDE_Y(v,h)) {
for (STRIDE_X(u,w)) {
for (int m=-FILTER_WINDOW_R; m<=FILTER_WINDOW_R; m++) {
for (int n=-FILTER_WINDOW_R; n<=FILTER_WINDOW_R; n++) {
uchar4 neigh = tex2D<uchar4>(t, u+n, v+m);
neigh_sq += neigh*neigh;
neigh_sum += neigh;
neigh_sq += (double)(neigh.z*neigh.z);
neigh_sum += (double)neigh.z;
}
}
}
}
// Texture map filtering
int tm = (neigh_sq / (FILTER_WINDOW*FILTER_WINDOW)) -
((neigh_sum*neigh_sum) / (FILTER_WINDOW*FILTER_WINDOW));
// Texture map filtering
double tm = (neigh_sq / (FILTER_WINDOW*FILTER_WINDOW)) -
//((neigh_sum*neigh_sum) / (FILTER_WINDOW*FILTER_WINDOW));
((neigh_sum / (FILTER_WINDOW*FILTER_WINDOW)) * (neigh_sum / (FILTER_WINDOW*FILTER_WINDOW)));
if (tm < thesh) {
f(u,v) = disp;
} else {
f(u,v) = NAN;
if (tm >= thresh) {
f(u,v) = disp;
} else {
f(u,v) = NAN;
}
}
}
}
namespace ftl {
namespace cuda {
void texture_filter(const TextureObject<uchar4> &t, const TextureObject<float> &d,
TextureObject<float> &f, int num_disp, int thresh) {
TextureObject<float> &f, int num_disp, double thresh) {
dim3 grid(1,1,1);
dim3 threads(128, 1, 1);
grid.x = cv::cuda::device::divUp(disp.width(), 128);
grid.y = cv::cuda::device::divUp(disp.height(), 1);
texture_filter_kernel<<<grid, threads>>>
grid.x = cv::cuda::device::divUp(d.width(), 128);
grid.y = cv::cuda::device::divUp(d.height(), 1);
texture_filter_kernel<<<grid, threads>>>(
t.cudaTexture(),
d.cudaTexture(),
f,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment