diff --git a/lib/libstereo/src/algorithms/clustersf.cu b/lib/libstereo/src/algorithms/clustersf.cu index cacbe1e40cbb7da52d90590cc249aef191971adb..273025d75485fdf9a535445762633de9e53c35b6 100644 --- a/lib/libstereo/src/algorithms/clustersf.cu +++ b/lib/libstereo/src/algorithms/clustersf.cu @@ -11,12 +11,13 @@ struct StereoCSF::Impl { Array2D<uchar> gl; Array2D<uchar> gr; Array2D<uchar> temp; - Bucket2D<ushort, 64> buckets_l; + Bucket1D<short2, 64> buckets_l; + Bucket2D<ushort, 64> buckets_r; Impl(int width, int height) : l(width, height), r(width, height), gl(width, height), gr(width, height), temp(width, height), - buckets_l(16, height) {} + buckets_l(height), buckets_r(16, height) {} }; StereoCSF::StereoCSF() : impl_(nullptr) { @@ -37,15 +38,15 @@ void StereoCSF::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disp SalientGradient sgl = {impl_->l.data(), impl_->gl.data(), impl_->temp.data(), impl_->buckets_l.data(), impl_->l.width, impl_->l.height}; parallel1DWarpSM(sgl, l.rows(), l.cols()); - //SalientGradient sgr = {impl_->r.data(), impl_->gr.data(), impl_->temp.data(), impl_->r.width, impl_->r.height}; - //parallel1DWarpSM(sgr, r.rows(), r.cols()); + SalientGradientGrouped sgr = {impl_->r.data(), impl_->gr.data(), impl_->temp.data(), impl_->buckets_r.data(), impl_->r.width, impl_->r.height}; + parallel1DWarpSM(sgr, r.rows(), r.cols()); cv::Mat tmp; - impl_->buckets_l.toGpuMat().download(tmp); + impl_->buckets_r.toGpuMat().download(tmp); tmp.convertTo(tmp, CV_8UC1, 4.0); cv::resize(tmp,tmp, cv::Size(tmp.cols*40, tmp.rows/2)); cv::applyColorMap(tmp, tmp, cv::COLORMAP_TURBO); - cv::imshow("Gradients Left", tmp); + cv::imshow("Gradients Right", tmp); //impl_->gr.toGpuMat().download(tmp); //cv::resize(tmp,tmp, cv::Size(tmp.cols/2, tmp.rows/2)); @@ -53,6 +54,7 @@ void StereoCSF::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disp cudaSafeCall(cudaDeviceSynchronize()); disparity.create(l.rows(), l.cols(), CV_32F); + cv::waitKey(10000); } StereoCSF::~StereoCSF() { diff --git a/lib/libstereo/src/bucket1d.hpp b/lib/libstereo/src/bucket1d.hpp new file mode 100644 index 0000000000000000000000000000000000000000..77f8ff9621112c0adbfb19eb1fbb093ac4e768c8 --- /dev/null +++ b/lib/libstereo/src/bucket1d.hpp @@ -0,0 +1,130 @@ +#ifndef _FTL_LIBSTEREO_BUCKET1D_HPP_ +#define _FTL_LIBSTEREO_BUCKET1D_HPP_ + +#include "memory.hpp" + +template<typename T, int DEPTH> +class Bucket1D { +public: + Bucket1D() : width(0), needs_free_(false) { + data_.buckets = nullptr; + data_.counters = nullptr; + } + + Bucket1D(int w) : width(w), needs_free_(true) { + data_.counters = allocateMemory<uint>(w); + data_.buckets = allocateMemory2D<T>(DEPTH, w, data_.bucket_pitch); + } + + ~Bucket1D() { + free(); + } + + void free() { + if (needs_free_ && data_.buckets) { + freeMemory(data_.counters); + freeMemory(data_.buckets); + } + } + + Bucket1D<T, DEPTH> &operator=(const Bucket1D<T, DEPTH> &c) { + data_ = c.data_; + width = c.width; + needs_free_ = false; + return *this; + } + + struct Data { + __host__ __device__ inline uint& operator() (const int y) { + return counters[y]; + } + + __host__ __device__ inline const uint& operator() (const int y) const { + return counters[y]; + } + + __host__ __device__ inline const T& operator() (const int y, int d) const { + return buckets[d + y*bucket_pitch]; + } + + __host__ __device__ inline void add (const int y, T v) { + uint ix = atomicInc(&counters[y], DEPTH); + buckets[ix + y*bucket_pitch] = v; + } + + __host__ __device__ inline const T *ptr(const int y) const { return &buckets[y*bucket_pitch]; } + + uint bucket_pitch; + T *buckets; + uint *counters; + }; + + void create(int w) { + if (w == width) return; + width = w; + free(); + needs_free_ = true; + data_.counters = allocateMemory<uint>(w); + data_.buckets = allocateMemory2D<T>(DEPTH, w, data_.bucket_pitch); + } + + inline Data &data() { return data_; } + inline const Data &data() const { return data_; } + + void toMat(cv::Mat &m) { + #ifdef USE_GPU + cv::cuda::GpuMat gm; + toGpuMat(gm); + gm.download(m); + #else + m = cv::Mat(1, width, cv::traits::Type<int>::value, data_.counters); + #endif + } + + cv::Mat toMat() { + #ifdef USE_GPU + cv::cuda::GpuMat gm; + toGpuMat(gm); + cv::Mat m; + gm.download(m); + return m; + #else + return cv::Mat(1, width, cv::traits::Type<int>::value, data_.counters); + #endif + } + + const cv::Mat toMat() const { + #ifdef USE_GPU + cv::cuda::GpuMat gm(1, width, cv::traits::Type<int>::value, (void*)data_.counters); + cv::Mat m; + gm.download(m); + return m; + #else + return cv::Mat(height, width, cv::traits::Type<int>::value, data_.counters); + #endif + } + + void toGpuMat(cv::cuda::GpuMat &m) { + #ifdef USE_GPU + m = cv::cuda::GpuMat(1, width, cv::traits::Type<int>::value, (void*)data_.counters); + #else + // TODO + #endif + } + + cv::cuda::GpuMat toGpuMat() { + #ifdef USE_GPU + return cv::cuda::GpuMat(1, width, cv::traits::Type<int>::value, (void*)data_.counters); + #else + return cv::cuda::GpuMat(1, width, cv::traits::Type<int>::value); + #endif + } + + int width; + +private: + Data data_; + bool needs_free_; +}; + +#endif diff --git a/lib/libstereo/src/filters/focal_cluster.hpp b/lib/libstereo/src/filters/focal_cluster.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6aec3128a2f8db6ef94158fd9bfbba9219d5ad06 --- /dev/null +++ b/lib/libstereo/src/filters/focal_cluster.hpp @@ -0,0 +1,23 @@ +#ifndef _FTL_LIBSTEREO_FILTERS_CLUSTER_HPP_ +#define _FTL_LIBSTEREO_FILTERS_CLUSTER_HPP_ + +struct FocalCluster { + short2 focal_pt; + Bucket2D<ushort, 64>::Data left; + Bucket2D<ushort, 64>::Data right; + Array2D<float>::Data histogram; + + __device__ void operator()(ushort2 thread, ushort2 stride, ushort2 size) { + for (int y=thread.y; y<size.y; y+=stride.y) { + for (int f=thread.x; f<) + // For each feature or features near to focal point + + + // - Calc distance to focal in X + // - For each feature in right image that matches + // - Add focal dist to feature X and add to histogram + } + } +}; + +#endif diff --git a/lib/libstereo/src/filters/salient_gradient.hpp b/lib/libstereo/src/filters/salient_gradient.hpp index 872e8c9a658b4511348952e4028a857180aff3e6..c473de6217a7f3e601b9734b9173fd5f4f2c401e 100644 --- a/lib/libstereo/src/filters/salient_gradient.hpp +++ b/lib/libstereo/src/filters/salient_gradient.hpp @@ -4,8 +4,99 @@ #include "../util.hpp" #include "../array2d.hpp" #include "../bucket2d.hpp" +#include "../bucket1d.hpp" +/** + * Select salient gradient features and gather into scanline buckets that + * includes the x-coordinate and feature orientation. Not grouped by orientation. + */ struct SalientGradient { + Array2D<uchar>::Data image; + Array2D<uchar>::Data angle; + Array2D<uchar>::Data magnitude; + Bucket1D<short2, 64>::Data buckets; + + int width, height; + + __cuda__ inline float2 calculateGradient(int x, int y) { + if (x < 1 || y < 1 || x >= width-1 || y >= height-1) return make_float2(0,0); + + float dx = -1.0f*float(image(y-1,x-1)) + -2.0f*float(image(y, x-1)) + -1*float(image(y+1, x-1)) + + float(image(y-1, x+1)) + 2.0f*float(image(y, x+1)) + float(image(y+1, x+1)); + float dy = float(image(y-1, x-1)) + 2.0f*float(image(y-1, x)) + float(image(y-1, x+1)) + + -1.0f*float(image(y+1, x-1)) + -2.0f*float(image(y+1, x)) + -1.0f*float(image(y+1, x+1)); + + float g = sqrt( (dx*dx) + (dy*dy) ); + float a = atan2(dy, dx); + return make_float2(g,a); + } + + struct WarpSharedMemory { + int gradient_histogram[32]; + }; + + inline __device__ int scan(volatile int *s_Data, int thread, int threshold) { + for (uint offset = 1; offset < 32; offset <<= 1) { + __syncwarp(); + uint t = (thread >= offset) ? s_Data[thread] + s_Data[thread - offset] : s_Data[thread]; + __syncwarp(); + s_Data[thread] = t; + } + + uint t = __ballot_sync(0xFFFFFFFF, s_Data[thread] > threshold); + return __ffs(t); + } + + __device__ void operator()(ushort2 thread, ushort2 stride, ushort2 size, WarpSharedMemory &wsm) { + static const float PI = 3.14f; + static constexpr float PI2 = PI/2.0f; + + for (int y=thread.y; y<size.y; y+=stride.y) { + // Reset histogram + //for (int i=thread.x; i < 32; i+=32) wsm.gradient_histogram[i] = 0; + wsm.gradient_histogram[thread.x] = 0; + //int maxmag = 0; + + for (int x=thread.x; x<size.x; x+=stride.x) { + auto g = calculateGradient(x,y); + angle(y,x) = uchar(min(15,int((g.y+PI2) / PI * 16.0f))); + magnitude(y,x) = uchar(g.x); + + //maxmag = max(maxmag,int(g.x)); + atomicAdd(&wsm.gradient_histogram[min(31,int(g.x / 361.0f * 32.0f))], 1); + //atomicAdd(&wsm.gradient_histogram[0], 1); + } + + //maxmag = int(float(warpMax(maxmag)) * 0.8f); + + uchar gthresh = min(255, scan(wsm.gradient_histogram, thread.x, float(width)*0.95f) * (256/32)); + + // Apply threshold + for (int x=thread.x; x<size.x; x+=stride.x) { + uchar thresh = gthresh; + for (int u=-3; u<=3; ++u) { + thresh = (x+u >= 0 && x+u < width) ? max(thresh, magnitude(y,x+u)) : thresh; + } + + uchar m = magnitude(y,x); + //if (m < thresh) angle(y,x) = 0; + //output(y,x) = (m < thresh) ? 0 : 255; + if (m >= thresh) { + int a = angle(y,x); + buckets.add(y, make_short2(x, a)); + } + } + } + } +}; + +/** + * Find salient gradient features and gather in orientation groups scanline + * buckets. Adds features to orientations either side of actual orientation for + * a degree of tolerance to exact orientation. This allows fast search for + * features based upon scanline and orientation. + */ +struct SalientGradientGrouped { Array2D<uchar>::Data image; Array2D<uchar>::Data angle; Array2D<uchar>::Data magnitude; @@ -83,8 +174,6 @@ struct SalientGradient { buckets.add(y, (a < 15) ? a+1 : 0, ushort(x)); } } - - // Next step would be to bucket the results } } };