Skip to content
Snippets Groups Projects
Commit f775b148 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Early attempts at my own algorithm

parent 7ae2627c
No related branches found
No related tags found
No related merge requests found
......@@ -95,7 +95,9 @@ if (CUDA_FOUND)
"src/algorithms/rtcensus.cu"
"src/algorithms/consistency.cu"
"src/algorithms/sparse_census.cu"
"src/algorithms/tex_filter.cu")
"src/algorithms/tex_filter.cu"
"src/algorithms/nick1.cu"
"src/algorithms/nick.cpp")
endif (CUDA_FOUND)
add_executable(cv-node ${CVNODESRC})
......
#ifndef _FTL_ALGORITHMS_NICK_HPP_
#define _FTL_ALGORITHMS_NICK_HPP_
#include <opencv2/core.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/cudastereo.hpp>
#include <ftl/disparity.hpp>
namespace ftl {
namespace algorithms {
class NickCuda : public ftl::Disparity {
public:
NickCuda(nlohmann::json &config);
void compute(const cv::Mat &l, const cv::Mat &r, cv::Mat &disp);
static inline Disparity *create(nlohmann::json &config) { return new NickCuda(config); }
private:
cv::cuda::GpuMat disp_;
//cv::cuda::GpuMat filtered_;
cv::cuda::GpuMat left_;
cv::cuda::GpuMat right_;
};
};
};
#endif // _FTL_ALGORITHMS_NICK_HPP_
......@@ -14,6 +14,9 @@ namespace cuda {
void texture_filter(const TextureObject<uchar4> &t, const TextureObject<float> &d,
TextureObject<float> &f, int num_disp, double thresh);
void texture_map(const TextureObject<uchar4> &t,
TextureObject<float> &f);
}
}
......
/*
* Algorithms for checking the consistency of two disparity maps.
*/
#include <ftl/cuda_common.hpp>
#define CONSISTENCY_THRESHOLD 1.0f
......
#include <ftl/algorithms/nick.hpp>
#include <vector_types.h>
using ftl::algorithms::NickCuda;
using namespace cv;
using namespace cv::cuda;
static ftl::Disparity::Register nickcuda("nick", NickCuda::create);
NickCuda::NickCuda(nlohmann::json &config) : Disparity(config) {
}
namespace ftl { namespace gpu {
void nick1_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const PtrStepSz<float> &disp, size_t num_disp);
}}
void NickCuda::compute(const cv::Mat &l, const cv::Mat &r, cv::Mat &disp) {
if (disp_.empty()) disp_ = cuda::GpuMat(l.size(), CV_32FC1);
if (left_.empty()) left_ = cuda::GpuMat(l.size(), CV_8UC4);
if (right_.empty()) right_ = cuda::GpuMat(l.size(), CV_8UC4);
Mat lhsv, rhsv;
cv::cvtColor(l, lhsv, COLOR_BGR2HSV);
cv::cvtColor(r, rhsv, COLOR_BGR2HSV);
int from_to[] = {0,0,1,1,2,2,-1,3};
Mat hsval(lhsv.size(), CV_8UC4);
Mat hsvar(rhsv.size(), CV_8UC4);
mixChannels(&lhsv, 1, &hsval, 1, from_to, 4);
mixChannels(&rhsv, 1, &hsvar, 1, from_to, 4);
left_.upload(hsval);
right_.upload(hsvar);
ftl::gpu::nick1_call(left_, right_, disp_, 200);
disp_.download(disp);
}
/*
* Author: Nicolas Pope
* Based initially on rtcensus
*
*/
#include <ftl/cuda_common.hpp>
#include <ftl/cuda_algorithms.hpp>
using namespace cv::cuda;
using namespace cv;
#define BLOCK_W 60
#define RADIUS 7
#define RADIUS2 2
#define ROWSperTHREAD 1
template <typename T>
__host__ __device__
inline T lerp(T v0, T v1, T t) {
return fma(t, v1, fma(-t, v0, v0));
}
#define FILTER_WINDOW 21
#define FILTER_WINDOW_R 10
#define EDGE_SENSITIVITY 10.0f
__device__ float calculate_edge_disp(cudaTextureObject_t t, cudaTextureObject_t d, cudaTextureObject_t pT, cudaTextureObject_t pD, uchar4 pixel, int u, int v) {
float est = 0.0;
int nn = 0;
//float pest = 0.0;
//int pnn = 0;
//cudaTextureObject_t nTex = (pT) ? pT : t;
//cudaTextureObject_t nDisp = (pD) ? pD : d;
for (int m=-FILTER_WINDOW_R; m<=FILTER_WINDOW_R; m++) {
for (int n=-FILTER_WINDOW_R; n<=FILTER_WINDOW_R; n++) {
uchar4 neigh = tex2D<uchar4>(t, u+n, v+m);
float ndisp = tex2D<float>(d,u+n,v+m);
//uchar4 pneigh = tex2D<uchar4>(nTex, u+n, v+m);
//float pndisp = tex2D<float>(nDisp,u+n,v+m);
//if (isnan(tex2D<float>(nDisp,u+n,v+m))) continue;
//if (m == 0 && n == 0) continue;
if (!isnan(ndisp) && (abs(neigh.z-pixel.z) <= EDGE_SENSITIVITY)) { // && (isnan(disp) || abs(ndisp-disp) < FILTER_DISP_THRESH)) {
est += ndisp;
nn++;
}
//if (!isnan(pndisp) && (abs(pneigh.z-pixel.z) <= EDGE_SENSITIVITY)) { // && (isnan(disp) || abs(ndisp-disp) < FILTER_DISP_THRESH)) {
// pest += pndisp;
// pnn++;
//}
}
}
est = (nn > 0) ? est/nn : NAN;
//pest = (pnn > 0) ? pest/pnn : NAN;
return est;
}
__device__ float colour_error(uchar4 v1, uchar4 v2) {
float dx = 0.05*abs(v1.x-v2.x);
float dy = 0.1*abs(v1.y-v2.y);
float dz = 0.85*abs(v1.z-v2.z);
return dx + dz + dy;
}
// TODO Use HUE also and perhaps increase window?
// Or use more complex notion of texture?
/* Just crossed and currently on edge */
__device__ bool is_edge_left(uchar4 *line, int x, int n) {
if (x < 1 || x >= n-1) return false;
return (colour_error(line[x-1],line[x]) > EDGE_SENSITIVITY && colour_error(line[x],line[x+1]) <= EDGE_SENSITIVITY);
}
/* Just crossed but not on edge now */
__device__ bool is_edge_right(uchar4 *line, int x, int n) {
if (x < 1 || x >= n-1) return false;
return (colour_error(line[x-1],line[x]) <= EDGE_SENSITIVITY && colour_error(line[x],line[x+1]) > EDGE_SENSITIVITY);
}
__global__ void filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d,
cudaTextureObject_t prevD,
cudaTextureObject_t prevT, PtrStepSz<float> f, int num_disp) {
extern __shared__ uchar4 line[]; // One entire line of hsv image
for (STRIDE_Y(v,f.rows)) {
for (STRIDE_X(u,f.cols)) {
line[u] = tex2D<uchar4>(t, u, v);
}
__syncthreads();
for (STRIDE_X(u,f.cols)) {
if (is_edge_right(line, u, f.cols)) {
float edge_disp = calculate_edge_disp(t,d,prevT,prevD,line[u],u+2,v); // tex2D<float>(d, u, v);
f(v,u) = edge_disp;
continue;
float est = 0.0f;
int nn = 0;
if (!isnan(edge_disp)) {
est += edge_disp;
nn++;
}
//f(v,u) = edge_disp;
// TODO, find other edge first to get disparity
// Use middle disparities to:
// estimate curve or linear (choose equation)
// or ignore as noise if impossible
// TODO For edge disparity, use a window to:
// a) find a missing disparity
// b) make sure disparity has some consensus (above or below mostly)
// TODO Use past values?
// Another way to fill blanks and gain concensus
// TODO Maintain a disparity stack to pop back to background?
// Issue of background disparity not being detected.
// Only if hsv also matches previous background
// TODO Edge prediction (in vertical direction at least) could
// help fill both edge and disparity gaps. Propagate disparity
// along edges
float last_disp = edge_disp;
int i;
for (i=1; u+i<f.cols; i++) {
if (is_edge_right(line, u+i, f.cols)) {
//float end_disp = calculate_edge_disp(t,d,prevT,prevD,line[u+i-1],u+i-3,v);
//if (!isnan(end_disp)) last_disp = end_disp;
break;
}
float di = tex2D<float>(d,u+i,v);
if (!isnan(di)) {
est += di;
nn++;
}
//f(v,u+i) = edge_disp;
}
est = (nn > 0) ? est / nn : NAN;
//for (int j=1; j<i; j++) {
// f(v,u+j) = est; //lerp(edge_disp, last_disp, (float)j / (float)(i-1));
//}
} else f(v,u) = NAN;
}
}
}
__global__ void edge_invar1_kernel(cudaTextureObject_t t, ftl::cuda::TextureObject<float2> o) {
for (STRIDE_Y(v,o.height())) {
for (STRIDE_X(u,o.width())) {
float gx = ((tex2D<uchar4>(t, u-1, v-1).z - tex2D<uchar4>(t, u+1, v-1).z) +
(tex2D<uchar4>(t, u-1, v).z - tex2D<uchar4>(t, u+1, v).z) +
(tex2D<uchar4>(t, u-1, v+1).z - tex2D<uchar4>(t, u+1, v+1).z)) / 3;
float gy = ((tex2D<uchar4>(t, u-1, v-1).z - tex2D<uchar4>(t, u-1, v+1).z) +
(tex2D<uchar4>(t, u, v-1).z - tex2D<uchar4>(t, u, v+1).z) +
(tex2D<uchar4>(t, u+1, v-1).z - tex2D<uchar4>(t, u+1, v+1).z)) / 3;
float g = sqrt(gx*gx+gy*gy);
float a = atan2(gy,gx);
// TODO adapt threshold using histeresis
o(u,v) = (g > 10.0f) ? make_float2(g,a) : make_float2(NAN,NAN);
}
}
}
__global__ void edge_invar2_kernel(cudaTextureObject_t i1, ftl::cuda::TextureObject<float> o) {
for (STRIDE_Y(v,o.height())) {
for (STRIDE_X(u,o.width())) {
float2 pixel_i1 = tex2D<float2>(i1,u,v);
if (isnan(pixel_i1.x)) {
o(u,v) = NAN;
continue;
}
int dx = ((pixel_i1.y >= 0.785 && pixel_i1.y <= 2.356) ) ? 0 : 1;
int dy = (dx == 1) ? 0 : 1;
float2 next_pix;
next_pix.x = NAN;
next_pix.y = NAN;
float diff = 10000.0f;
for (int i=-10; i<=10; i++) {
float2 pix = tex2D<float2>(i1,u+dx*i+dy*2, v+dy*i+dx*2);
if (isnan(pix.x)) continue;
float d = abs(pix.x-pixel_i1.x)*abs(pix.y-pixel_i1.y);
if (d < diff) {
next_pix = pix;
diff = d;
}
}
if (!isnan(next_pix.x) && diff < 2.0f) {
o(u,v) = abs(pixel_i1.y - next_pix.y)*81.0f;
} else {
o(u,v) = NAN;
}
// Search in two directions for next edge pixel
// Calculate curvature by monitoring gradient angle change
// Calculate length by stopping when change exceeds threshold
}
}
}
ftl::cuda::TextureObject<float> prevDisp;
ftl::cuda::TextureObject<uchar4> prevImage;
namespace ftl {
namespace gpu {
void nick1_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const PtrStepSz<float> &disp, size_t num_disp) {
// Make all the required texture steps
// TODO Could reduce re-allocations by caching these
ftl::cuda::TextureObject<uchar4> texLeft(l);
ftl::cuda::TextureObject<uchar4> texRight(r);
ftl::cuda::TextureObject<float2> inv1(l.cols, l.rows);
ftl::cuda::TextureObject<float> output(disp);
dim3 grid(1,1,1);
dim3 threads(BLOCK_W, 1, 1);
grid.x = cv::cuda::device::divUp(l.cols - 2 * RADIUS2, BLOCK_W);
grid.y = cv::cuda::device::divUp(l.rows - 2 * RADIUS2, ROWSperTHREAD);
edge_invar1_kernel<<<grid,threads>>>(texLeft.cudaTexture(), inv1);
cudaSafeCall( cudaGetLastError() );
edge_invar2_kernel<<<grid,threads>>>(inv1.cudaTexture(), output);
cudaSafeCall( cudaGetLastError() );
//prevImage.free();
//prevImage = texLeft;
//if (&stream == Stream::Null())
cudaSafeCall( cudaDeviceSynchronize() );
texLeft.free();
texRight.free();
inv1.free();
output.free();
}
}
}
......@@ -6,6 +6,8 @@
* for embedded real-time systems
* [2] Humenberger, Zinner, Kubinger: Performance Evaluation of Census-Based
* Stereo Matching Algorithm on Embedded and Multi-Core Hardware
* [3] Humenberger, Engelke, Kubinger: A Census-Based Stereo Vision Algorithm Using Modified Semi-Global Matching
* and Plane Fitting to Improve Matching Quality.
*
* Equation numbering uses [1] unless otherwise stated
*/
......
......@@ -6,6 +6,8 @@
* for embedded real-time systems
* [2] Humenberger, Zinner, Kubinger: Performance Evaluation of Census-Based
* Stereo Matching Algorithm on Embedded and Multi-Core Hardware
* [3] Humenberger, Engelke, Kubinger: A Census-Based Stereo Vision Algorithm Using Modified Semi-Global Matching
* and Plane Fitting to Improve Matching Quality.
*
* Equation numbering uses [1] unless otherwise stated
*
......@@ -49,7 +51,7 @@ __forceinline__ __device__ uint64_t uint2asull (uint2 a) {
}
/*
* Generate left and right disparity images from census data. (19)
* Generate left and right disparity images from census data. (18)(19)(25)
*/
__global__ void disp_kernel(float *disp_l, float *disp_r,
int pitchL, int pitchR,
......@@ -68,25 +70,11 @@ __global__ void disp_kernel(float *disp_l, float *disp_r,
// Local cache
uint64_t l_cache_l1[5][5];
uint64_t l_cache_l2[5][5];
// Prepare the cache load
//const int cache_thread_width = (BLOCK_W+ds / BLOCK_W + RADIUS2*2 + 1)*2;
//uint64_t *cache_ptr = cache + (threadIdx.x * cache_thread_width);
if (v_end >= height) v_end = height;
if (u+maxdisp >= width) maxdisp = width-u;
for (int v=v_start; v<v_end; v++) {
/*const int cache_start = v*width*2 + cache_thread_width*blockIdx.x;
for (int i=0; i<cache_thread_width; i+=2) {
cache_ptr[i] = census[cache_start+i];
cache_ptr[i+1] = census[cache_start+i+1];
}
__syncthreads();*/
// Fill local cache for window 5x5
// TODO Use shared memory?
for (int m=-2; m<=2; m++) {
for (int n=-2; n<=2; n++) {
l_cache_l2[m+2][n+2] = uint2asull(tex2D<uint2>(censusL,u+n,v+m));
......@@ -107,7 +95,7 @@ __global__ void disp_kernel(float *disp_l, float *disp_r,
int dix1 = 0;
int dix2 = 0;
// TODO Use prediction textures to narrow range
// (19)
for (int d=0; d<maxdisp; d++) {
uint16_t hamming1 = 0;
uint16_t hamming2 = 0;
......@@ -119,18 +107,17 @@ __global__ void disp_kernel(float *disp_l, float *disp_r,
for (int n=-2; n<=2; n++) {
const auto u_ = u + n;
// (18)
auto l1 = l_cache_l1[m+2][n+2];
auto l2 = l_cache_l2[m+2][n+2];
// TODO Somehow might use shared memory
auto r1 = uint2asull(tex2D<uint2>(censusL, u_+d, v_));
auto r2 = uint2asull(tex2D<uint2>(censusR, u_-d, v_));
hamming1 += __popcll(r1^l1);
hamming2 += __popcll(r2^l2);
}
}
// Find the two minimum costs
if (hamming1 < min_disp1) {
min_before1 = last_ham1;
min_disp1 = hamming1;
......@@ -157,171 +144,16 @@ __global__ void disp_kernel(float *disp_l, float *disp_r,
//float d2 = (dix2 == 0 || dix2 == ds-1) ? (float)dix2 : fit_parabola(dix2, min_disp2, min_before2, min_after2);
// TODO Allow for discontinuities with threshold
// Subpixel disparity (20)
float d1 = fit_parabola(dix1, min_disp1, min_before1, min_after1);
float d2 = fit_parabola(dix2, min_disp2, min_before2, min_after2);
// Confidence filter (25)
// TODO choice of gamma to depend on disparity variance
// Variance with next option, variance with neighbours, variance with past value
// Confidence filter based on (25)
disp_l[v*pitchL+u] = ((min_disp2b - min_disp2) >= gamma) ? d2 : NAN;
disp_r[v*pitchR+u] = ((min_disp1b - min_disp1) >= gamma) ? d1 : NAN;
// TODO If disparity is 0.0f, perhaps
// Use previous value unless it conflicts with present
// Use neighbour values if texture matches
}
}
template <typename T>
__host__ __device__
inline T lerp(T v0, T v1, T t) {
return fma(t, v1, fma(-t, v0, v0));
}
#define FILTER_WINDOW 21
#define FILTER_WINDOW_R 10
#define EDGE_SENSITIVITY 10.0f
__device__ float calculate_edge_disp(cudaTextureObject_t t, cudaTextureObject_t d, cudaTextureObject_t pT, cudaTextureObject_t pD, uchar4 pixel, int u, int v) {
float est = 0.0;
int nn = 0;
//float pest = 0.0;
//int pnn = 0;
//cudaTextureObject_t nTex = (pT) ? pT : t;
//cudaTextureObject_t nDisp = (pD) ? pD : d;
for (int m=-FILTER_WINDOW_R; m<=FILTER_WINDOW_R; m++) {
for (int n=-FILTER_WINDOW_R; n<=FILTER_WINDOW_R; n++) {
uchar4 neigh = tex2D<uchar4>(t, u+n, v+m);
float ndisp = tex2D<float>(d,u+n,v+m);
//uchar4 pneigh = tex2D<uchar4>(nTex, u+n, v+m);
//float pndisp = tex2D<float>(nDisp,u+n,v+m);
//if (isnan(tex2D<float>(nDisp,u+n,v+m))) continue;
//if (m == 0 && n == 0) continue;
if (!isnan(ndisp) && (abs(neigh.z-pixel.z) <= EDGE_SENSITIVITY)) { // && (isnan(disp) || abs(ndisp-disp) < FILTER_DISP_THRESH)) {
est += ndisp;
nn++;
}
//if (!isnan(pndisp) && (abs(pneigh.z-pixel.z) <= EDGE_SENSITIVITY)) { // && (isnan(disp) || abs(ndisp-disp) < FILTER_DISP_THRESH)) {
// pest += pndisp;
// pnn++;
//}
}
}
est = (nn > 0) ? est/nn : NAN;
//pest = (pnn > 0) ? pest/pnn : NAN;
return est;
}
__device__ float colour_error(uchar4 v1, uchar4 v2) {
float dx = 0.05*abs(v1.x-v2.x);
float dy = 0.1*abs(v1.y-v2.y);
float dz = 0.85*abs(v1.z-v2.z);
return dx + dz + dy;
}
// TODO Use HUE also and perhaps increase window?
// Or use more complex notion of texture?
/* Just crossed and currently on edge */
__device__ bool is_edge_left(uchar4 *line, int x, int n) {
if (x < 1 || x >= n-1) return false;
return (colour_error(line[x-1],line[x]) > EDGE_SENSITIVITY && colour_error(line[x],line[x+1]) <= EDGE_SENSITIVITY);
}
/* Just crossed but not on edge now */
__device__ bool is_edge_right(uchar4 *line, int x, int n) {
if (x < 1 || x >= n-1) return false;
return (colour_error(line[x-1],line[x]) <= EDGE_SENSITIVITY && colour_error(line[x],line[x+1]) > EDGE_SENSITIVITY);
}
__global__ void filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d,
cudaTextureObject_t prevD,
cudaTextureObject_t prevT, PtrStepSz<float> f, int num_disp) {
extern __shared__ uchar4 line[]; // One entire line of hsv image
for (STRIDE_Y(v,f.rows)) {
for (STRIDE_X(u,f.cols)) {
line[u] = tex2D<uchar4>(t, u, v);
}
__syncthreads();
for (STRIDE_X(u,f.cols)) {
if (is_edge_right(line, u, f.cols)) {
float edge_disp = calculate_edge_disp(t,d,prevT,prevD,line[u],u+2,v); // tex2D<float>(d, u, v);
f(v,u) = edge_disp;
continue;
float est = 0.0f;
int nn = 0;
if (!isnan(edge_disp)) {
est += edge_disp;
nn++;
}
//f(v,u) = edge_disp;
// TODO, find other edge first to get disparity
// Use middle disparities to:
// estimate curve or linear (choose equation)
// or ignore as noise if impossible
// TODO For edge disparity, use a window to:
// a) find a missing disparity
// b) make sure disparity has some consensus (above or below mostly)
// TODO Use past values?
// Another way to fill blanks and gain concensus
// TODO Maintain a disparity stack to pop back to background?
// Issue of background disparity not being detected.
// Only if hsv also matches previous background
// TODO Edge prediction (in vertical direction at least) could
// help fill both edge and disparity gaps. Propagate disparity
// along edges
float last_disp = edge_disp;
int i;
for (i=1; u+i<f.cols; i++) {
if (is_edge_right(line, u+i, f.cols)) {
//float end_disp = calculate_edge_disp(t,d,prevT,prevD,line[u+i-1],u+i-3,v);
//if (!isnan(end_disp)) last_disp = end_disp;
break;
}
float di = tex2D<float>(d,u+i,v);
if (!isnan(di)) {
est += di;
nn++;
}
//f(v,u+i) = edge_disp;
}
est = (nn > 0) ? est / nn : NAN;
//for (int j=1; j<i; j++) {
// f(v,u+j) = est; //lerp(edge_disp, last_disp, (float)j / (float)(i-1));
//}
} else f(v,u) = NAN;
}
}
}
ftl::cuda::TextureObject<float> prevDisp;
ftl::cuda::TextureObject<uchar4> prevImage;
void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const PtrStepSz<float> &disp, size_t num_disp, const int &stream) {
// Make all the required texture steps
// TODO Could reduce re-allocations by caching these
......@@ -334,7 +166,7 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const
ftl::cuda::TextureObject<float> dispTex(r.cols, r.rows);
ftl::cuda::TextureObject<float> output(disp);
// Calculate the census for left and right
// Calculate the census for left and right (14)(15)(16)
ftl::cuda::sparse_census(texLeft, texRight, censusTexLeft, censusTexRight);
dim3 grid(1,1,1);
......@@ -342,7 +174,7 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const
grid.x = cv::cuda::device::divUp(l.cols - 2 * RADIUS2, BLOCK_W);
grid.y = cv::cuda::device::divUp(l.rows - 2 * RADIUS2, ROWSperTHREAD);
// Calculate L and R disparities
// Calculate L and R disparities (18)(19)(20)(21)(22)(25)
disp_kernel<<<grid, threads>>>(
dispTexLeft.devicePtr(), dispTexRight.devicePtr(),
dispTexLeft.pitch()/sizeof(float), dispTexRight.pitch()/sizeof(float),
......@@ -351,28 +183,15 @@ void rtcensus_call(const PtrStepSz<uchar4> &l, const PtrStepSz<uchar4> &r, const
num_disp);
cudaSafeCall( cudaGetLastError() );
// Check consistency between L and R disparities.
// Check consistency between L and R disparities. (23)(24)
consistency(dispTexLeft, dispTexRight, dispTex);
texture_filter(texLeft, dispTex, output, num_disp, 20.0);
/*grid.x = 4;
grid.y = l.rows;
threads.x = l.cols;
size_t filter_smem = sizeof(uchar4) * l.cols;
filter_kernel<<<grid, threads, filter_smem>>>(texLeft.cudaTexture(), dispTex.cudaTexture(), prevDisp.cudaTexture(), prevImage.cudaTexture(), disp, num_disp);
cudaSafeCall( cudaGetLastError() );*/
// TM in (7) of paper [3]. Eq (26) in [1] is wrong.
texture_filter(texLeft, dispTex, output, num_disp, 10.0);
//prevDisp.free();
//prevDisp = disp;
prevImage.free();
prevImage = texLeft;
//if (&stream == Stream::Null())
cudaSafeCall( cudaDeviceSynchronize() );
//cudaSafeCall( cudaDestroyTextureObject (texLeft) );
texLeft.free();
texRight.free();
censusTexLeft.free();
censusTexRight.free();
......
......@@ -3,6 +3,8 @@
#define FILTER_WINDOW 11.0
#define FILTER_WINDOW_R 5
// --- Filter by texture complexity --------------------------------------------
__global__ void texture_filter_kernel(cudaTextureObject_t t, cudaTextureObject_t d,
ftl::cuda::TextureObject<float> f, int num_disp, double thresh) { // Thresh = -5000000
......@@ -22,14 +24,15 @@ __global__ void texture_filter_kernel(cudaTextureObject_t t, cudaTextureObject_t
// Texture map filtering
double tm = (neigh_sq / (FILTER_WINDOW*FILTER_WINDOW)) -
//((neigh_sum*neigh_sum) / (FILTER_WINDOW*FILTER_WINDOW));
((neigh_sum / (FILTER_WINDOW*FILTER_WINDOW)) * (neigh_sum / (FILTER_WINDOW*FILTER_WINDOW)));
if (tm >= thresh) {
f(u,v) = disp;
} else {
f(u,v) = NAN;
f(u,v) = NAN; //(tm <= 200.0) ? 0 : 200.0f;;
}
//f(u,v) = tm;
}
}
}
......@@ -53,3 +56,46 @@ namespace cuda {
}
}
// --- Generate a texture map --------------------------------------------------
__global__ void texture_map_kernel(cudaTextureObject_t t,
ftl::cuda::TextureObject<float> f) {
for (STRIDE_Y(v,f.height())) {
for (STRIDE_X(u,f.width())) {
double neigh_sq = 0.0;
double neigh_sum = 0.0;
for (int m=-FILTER_WINDOW_R; m<=FILTER_WINDOW_R; m++) {
for (int n=-FILTER_WINDOW_R; n<=FILTER_WINDOW_R; n++) {
uchar4 neigh = tex2D<uchar4>(t, u+n, v+m);
neigh_sq += (double)(neigh.z*neigh.z);
neigh_sum += (double)neigh.z;
}
}
// Texture map filtering
double tm = (neigh_sq / (FILTER_WINDOW*FILTER_WINDOW)) -
((neigh_sum / (FILTER_WINDOW*FILTER_WINDOW)) * (neigh_sum / (FILTER_WINDOW*FILTER_WINDOW)));
f(u,v) = tm;
}
}
}
namespace ftl {
namespace cuda {
void texture_map(const TextureObject<uchar4> &t,
TextureObject<float> &f) {
dim3 grid(1,1,1);
dim3 threads(128, 1, 1);
grid.x = cv::cuda::device::divUp(f.width(), 128);
grid.y = cv::cuda::device::divUp(f.height(), 1);
texture_map_kernel<<<grid, threads>>>(
t.cudaTexture(),
f);
cudaSafeCall( cudaGetLastError() );
}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment