diff --git a/components/operators/include/ftl/operators/disparity.hpp b/components/operators/include/ftl/operators/disparity.hpp index 4aba9bbd8e179c86ddc0d6305fb14b3579a6fb23..17993f24c5e1778957cc7f3f29762ccaa611b0cc 100644 --- a/components/operators/include/ftl/operators/disparity.hpp +++ b/components/operators/include/ftl/operators/disparity.hpp @@ -12,6 +12,7 @@ #ifdef HAVE_LIBSGM #include <libsgm.h> +#include <libsgm_parameters.hpp> #include <opencv2/cudaimgproc.hpp> #endif @@ -85,6 +86,7 @@ class FixstarsSGM : public ftl::operators::Operator { int max_disp_; float uniqueness_; bool use_P2_map_; + sgm::CensusShape ct_shape_; }; #endif diff --git a/components/operators/src/disparity/fixstars_sgm.cpp b/components/operators/src/disparity/fixstars_sgm.cpp index acd468f249afbed33b83ab36422dd8bede9ea8b2..60f4332dc5bc59d6e5cd0b0084542e9b1b66adda 100644 --- a/components/operators/src/disparity/fixstars_sgm.cpp +++ b/components/operators/src/disparity/fixstars_sgm.cpp @@ -64,6 +64,7 @@ FixstarsSGM::FixstarsSGM(ftl::operators::Graph *g, ftl::Configurable* cfg) : P1_ = cfg->value("P1", 10); P2_ = cfg->value("P2", 120); max_disp_ = cfg->value("num_disp", 256); + ct_shape_ = static_cast<sgm::CensusShape>(cfg->value("ct_shape", 2)); if (uniqueness_ < 0.0 || uniqueness_ > 1.0) { uniqueness_ = 1.0; @@ -118,6 +119,11 @@ FixstarsSGM::FixstarsSGM(ftl::operators::Graph *g, ftl::Configurable* cfg) : } }); + cfg->on("ct_shape", [this, cfg]() { + ct_shape_ = static_cast<sgm::CensusShape>(cfg->value("ct_shape", 2)); + updateParameters(); + }); + updateP2Parameters(); cfg->on("canny_low", [this, cfg]() { @@ -159,7 +165,7 @@ bool FixstarsSGM::init() { bool FixstarsSGM::updateParameters() { if (ssgm_ == nullptr) { return false; } return this->ssgm_->updateParameters( - sgm::StereoSGM::Parameters(P1_, P2_, uniqueness_, true)); + sgm::StereoSGM::Parameters(P1_, P2_, uniqueness_, true, ct_shape_)); } bool FixstarsSGM::updateP2Parameters() { diff --git a/lib/libsgm/include/libsgm.h b/lib/libsgm/include/libsgm.h index bf2c58ea759181d3294d9cb717fd7e65b2b6d60a..653a2a5df38d6957e2942b16313538120becc9d7 100644 --- a/lib/libsgm/include/libsgm.h +++ b/lib/libsgm/include/libsgm.h @@ -28,6 +28,7 @@ limitations under the License. #include <stdint.h> #include "libsgm_config.h" +#include "libsgm_parameters.hpp" #include <cuda_runtime.h> #if defined(LIBSGM_SHARED) @@ -71,7 +72,8 @@ namespace sgm { int P2; float uniqueness; bool subpixel; - Parameters(int P1 = 10, int P2 = 120, float uniqueness = 0.95f, bool subpixel = false) : P1(P1), P2(P2), uniqueness(uniqueness), subpixel(subpixel) {} + CensusShape ct_shape; + Parameters(int P1 = 10, int P2 = 120, float uniqueness = 0.95f, bool subpixel = false, CensusShape ct_shape = CensusShape::CIRCLE_3) : P1(P1), P2(P2), uniqueness(uniqueness), subpixel(subpixel), ct_shape(ct_shape) {} }; /** diff --git a/lib/libsgm/include/libsgm_parameters.hpp b/lib/libsgm/include/libsgm_parameters.hpp new file mode 100644 index 0000000000000000000000000000000000000000..23113a213fc1663cf83777db611bfbb07afc308a --- /dev/null +++ b/lib/libsgm/include/libsgm_parameters.hpp @@ -0,0 +1,9 @@ +#pragma once + +namespace sgm { + enum class CensusShape { + CT_5X5=0, + CS_CT_9X7, + CIRCLE_3 + }; +} diff --git a/lib/libsgm/src/census_transform.cu b/lib/libsgm/src/census_transform.cu index d437f978e76300587e4b06e2d7954420a35dd1a2..f46a985577bb7070052eaff1af67fc8e85696781 100644 --- a/lib/libsgm/src/census_transform.cu +++ b/lib/libsgm/src/census_transform.cu @@ -21,14 +21,11 @@ namespace sgm { namespace { -static constexpr int WINDOW_WIDTH = 5; -static constexpr int WINDOW_HEIGHT = 5; - static constexpr int BLOCK_SIZE = 128; static constexpr int LINES_PER_BLOCK = 16; /* Centre symmetric census */ -template <typename T> +template <typename T, int WINDOW_WIDTH, int WINDOW_HEIGHT> __global__ void cs_census_transform_kernel( feature_type *dest, const T *src, @@ -104,7 +101,7 @@ __global__ void cs_census_transform_kernel( } } -template <typename T> +template <typename T, int WINDOW_WIDTH, int WINDOW_HEIGHT> __global__ void census_transform_kernel( feature_type* __restrict__ dest, const T* __restrict__ src, @@ -138,6 +135,72 @@ __global__ void census_transform_kernel( if (x < width && y < height) dest[x+y*width] = res; } +template <typename T> +__global__ void circle_ct_3_kernel( + feature_type* __restrict__ dest, + const T* __restrict__ src, + int width, + int height, + int pitch) +{ + static constexpr int RADIUS_X = 3; + static constexpr int RADIUS_Y = 3; + + const int x = (blockIdx.x*blockDim.x + threadIdx.x); + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + feature_type res = 0; + + if (x >= RADIUS_X && y >= RADIUS_Y && x < width-RADIUS_X && y < height-RADIUS_Y) { + const T center = src[y*pitch+x]; + + int yix = y*pitch+x; + res = (res << 1) | (center < (src[yix-3]) ? 1 : 0); + res = (res << 1) | (center < (src[yix-2]) ? 1 : 0); + res = (res << 1) | (center < (src[yix-1]) ? 1 : 0); + res = (res << 1) | (center < (src[yix+1]) ? 1 : 0); + res = (res << 1) | (center < (src[yix+2]) ? 1 : 0); + res = (res << 1) | (center < (src[yix+3]) ? 1 : 0); + + yix = (y-1)*pitch+x; + res = (res << 1) | (center < (src[yix-2]) ? 1 : 0); + res = (res << 1) | (center < (src[yix-1]) ? 1 : 0); + res = (res << 1) | (center < (src[yix]) ? 1 : 0); + res = (res << 1) | (center < (src[yix+1]) ? 1 : 0); + res = (res << 1) | (center < (src[yix+2]) ? 1 : 0); + + yix = (y-2)*pitch+x; + res = (res << 1) | (center < (src[yix-2]) ? 1 : 0); + res = (res << 1) | (center < (src[yix-1]) ? 1 : 0); + res = (res << 1) | (center < (src[yix]) ? 1 : 0); + res = (res << 1) | (center < (src[yix+1]) ? 1 : 0); + res = (res << 1) | (center < (src[yix+2]) ? 1 : 0); + + yix = (y-3)*pitch+x; + res = (res << 1) | (center < (src[yix]) ? 1 : 0); + + yix = (y+1)*pitch+x; + res = (res << 1) | (center < (src[yix-2]) ? 1 : 0); + res = (res << 1) | (center < (src[yix-1]) ? 1 : 0); + res = (res << 1) | (center < (src[yix]) ? 1 : 0); + res = (res << 1) | (center < (src[yix+1]) ? 1 : 0); + res = (res << 1) | (center < (src[yix+2]) ? 1 : 0); + + yix = (y+2)*pitch+x; + res = (res << 1) | (center < (src[yix-2]) ? 1 : 0); + res = (res << 1) | (center < (src[yix-1]) ? 1 : 0); + res = (res << 1) | (center < (src[yix]) ? 1 : 0); + res = (res << 1) | (center < (src[yix+1]) ? 1 : 0); + res = (res << 1) | (center < (src[yix+2]) ? 1 : 0); + + yix = (y+3)*pitch+x; + res = (res << 1) | (center < (src[yix]) ? 1 : 0); + } + + // FIXME: Should use feature pitch, not width. + if (x < width && y < height) dest[x+y*width] = res; +} + template <typename T> void enqueue_census_transform( feature_type *dest, @@ -145,24 +208,32 @@ void enqueue_census_transform( int width, int height, int pitch, + sgm::CensusShape ct_shape, cudaStream_t stream) { /* Disable the original center symmetric algorithm */ - if (false) { - const int width_per_block = BLOCK_SIZE - WINDOW_WIDTH + 1; + if (ct_shape == sgm::CensusShape::CS_CT_9X7) { + const int width_per_block = BLOCK_SIZE - 9 + 1; const int height_per_block = LINES_PER_BLOCK; const dim3 gdim( (width + width_per_block - 1) / width_per_block, (height + height_per_block - 1) / height_per_block); const dim3 bdim(BLOCK_SIZE); - cs_census_transform_kernel<<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch); - } else { + cs_census_transform_kernel<T, 9, 7><<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch); + } else if (ct_shape == sgm::CensusShape::CT_5X5) { + static constexpr int THREADS_X = 16; + static constexpr int THREADS_Y = 16; + + const dim3 gdim((width + THREADS_X - 1)/THREADS_X, (height + THREADS_Y - 1)/THREADS_Y); + const dim3 bdim(THREADS_X, THREADS_Y); + census_transform_kernel<T, 5, 5><<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch); + } else if (ct_shape == sgm::CensusShape::CIRCLE_3) { static constexpr int THREADS_X = 16; static constexpr int THREADS_Y = 16; const dim3 gdim((width + THREADS_X - 1)/THREADS_X, (height + THREADS_Y - 1)/THREADS_Y); const dim3 bdim(THREADS_X, THREADS_Y); - census_transform_kernel<<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch); + circle_ct_3_kernel<<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch); } } @@ -180,13 +251,14 @@ void CensusTransform<T>::enqueue( int width, int height, int pitch, + sgm::CensusShape ct_shape, cudaStream_t stream) { if(m_feature_buffer.size() < static_cast<size_t>(width * height)){ m_feature_buffer = DeviceBuffer<feature_type>(width * height); } enqueue_census_transform( - m_feature_buffer.data(), src, width, height, pitch, stream); + m_feature_buffer.data(), src, width, height, pitch, ct_shape, stream); } template class CensusTransform<uint8_t>; diff --git a/lib/libsgm/src/census_transform.hpp b/lib/libsgm/src/census_transform.hpp index 8a80b903b4b20152c0c8fad7fada8032d60a1565..23c1ebd3fb4d1147a8d5775c5120dcfd1ffbf467 100644 --- a/lib/libsgm/src/census_transform.hpp +++ b/lib/libsgm/src/census_transform.hpp @@ -19,6 +19,7 @@ limitations under the License. #include "device_buffer.hpp" #include "types.hpp" +#include "libsgm_parameters.hpp" namespace sgm { @@ -43,6 +44,7 @@ public: int width, int height, int pitch, + sgm::CensusShape ct_shape, cudaStream_t stream); }; diff --git a/lib/libsgm/src/sgm.cu b/lib/libsgm/src/sgm.cu index eb5d4179e0a11b964e8238bbdcfac40598917396..0adf22301a9c9a8784475b2e881c7ab5e07f72c5 100644 --- a/lib/libsgm/src/sgm.cu +++ b/lib/libsgm/src/sgm.cu @@ -58,12 +58,13 @@ public: float uniqueness, bool subpixel, int min_disp, + sgm::CensusShape ct_shape, cudaStream_t stream) { m_census_left.enqueue( - src_left, width, height, src_pitch, stream); + src_left, width, height, src_pitch, ct_shape, stream); m_census_right.enqueue( - src_right, width, height, src_pitch, stream); + src_right, width, height, src_pitch, ct_shape, stream); m_path_aggregation.enqueue( m_census_left.get_output(), m_census_right.get_output(), @@ -109,6 +110,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::execute( float uniqueness, bool subpixel, int min_disp, + sgm::CensusShape ct_shape, cudaStream_t stream) { m_impl->enqueue( @@ -119,7 +121,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::execute( penalty1, penalty2, weights, weights_pitch, uniqueness, subpixel, - min_disp, + min_disp, ct_shape, stream); //cudaStreamSynchronize(0); } @@ -141,6 +143,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::enqueue( float uniqueness, bool subpixel, int min_disp, + sgm::CensusShape ct_shape, cudaStream_t stream) { m_impl->enqueue( @@ -151,7 +154,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::enqueue( penalty1, penalty2, weights, weights_pitch, uniqueness, subpixel, - min_disp, + min_disp, ct_shape, stream); } diff --git a/lib/libsgm/src/sgm.hpp b/lib/libsgm/src/sgm.hpp index 9aa2cd387782465c3398aab5cda6a8b6a292018d..c792c4a6ebb690708f5dd8fe99b78205411962d8 100644 --- a/lib/libsgm/src/sgm.hpp +++ b/lib/libsgm/src/sgm.hpp @@ -20,6 +20,7 @@ limitations under the License. #include <memory> #include <cstdint> #include "types.hpp" +#include "libsgm_parameters.hpp" namespace sgm { @@ -54,6 +55,7 @@ public: float uniqueness, bool subpixel, int min_disp, + sgm::CensusShape ct_shape, cudaStream_t stream); void enqueue( @@ -72,6 +74,7 @@ public: float uniqueness, bool subpixel, int min_disp, + sgm::CensusShape ct_shape, cudaStream_t stream); }; diff --git a/lib/libsgm/src/stereo_sgm.cpp b/lib/libsgm/src/stereo_sgm.cpp index 70e1263147fbe3ab4eefb948d591e8506a3be40b..6e7c0bf18b99de45a5d5351cb6bb62c45a639fe5 100644 --- a/lib/libsgm/src/stereo_sgm.cpp +++ b/lib/libsgm/src/stereo_sgm.cpp @@ -29,7 +29,7 @@ namespace sgm { public: using output_type = sgm::output_type; virtual void execute(output_type* dst_L, output_type* dst_R, const void* src_L, const void* src_R, - int w, int h, int sp, int dp, unsigned int P1, const uint8_t *P2, const uint8_t *weights, int weights_pitch, float uniqueness, bool subpixel, int min_disp, cudaStream_t stream) = 0; + int w, int h, int sp, int dp, unsigned int P1, const uint8_t *P2, const uint8_t *weights, int weights_pitch, float uniqueness, bool subpixel, int min_disp, sgm::CensusShape ct_shape, cudaStream_t stream) = 0; virtual ~SemiGlobalMatchingBase() {} }; @@ -38,9 +38,9 @@ namespace sgm { class SemiGlobalMatchingImpl : public SemiGlobalMatchingBase { public: void execute(output_type* dst_L, output_type* dst_R, const void* src_L, const void* src_R, - int w, int h, int sp, int dp, unsigned int P1, const uint8_t *P2, const uint8_t *weights, int weights_pitch, float uniqueness, bool subpixel, int min_disp, cudaStream_t stream) override + int w, int h, int sp, int dp, unsigned int P1, const uint8_t *P2, const uint8_t *weights, int weights_pitch, float uniqueness, bool subpixel, int min_disp, sgm::CensusShape ct_shape, cudaStream_t stream) override { - sgm_engine_.execute(dst_L, dst_R, (const input_type*)src_L, (const input_type*)src_R, w, h, sp, dp, P1, P2, weights, weights_pitch, uniqueness, subpixel, min_disp, stream); + sgm_engine_.execute(dst_L, dst_R, (const input_type*)src_L, (const input_type*)src_R, w, h, sp, dp, P1, P2, weights, weights_pitch, uniqueness, subpixel, min_disp, ct_shape, stream); } private: SemiGlobalMatching<input_type, DISP_SIZE> sgm_engine_; @@ -176,7 +176,7 @@ namespace sgm { d_left_disp = dst; // when threre is no device-host copy or type conversion, use passed buffer cu_res_->sgm_engine->execute((uint16_t*)d_tmp_left_disp, (uint16_t*)d_tmp_right_disp, - d_input_left, d_input_right, width, height, src_pitch, dst_pitch, param_.P1, P2, weights, weights_pitch, param_.uniqueness, param_.subpixel, min_disp, stream); + d_input_left, d_input_right, width, height, src_pitch, dst_pitch, param_.P1, P2, weights, weights_pitch, param_.uniqueness, param_.subpixel, min_disp, param_.ct_shape, stream); sgm::details::median_filter((uint16_t*)d_tmp_left_disp, (uint16_t*)d_left_disp, width, height, dst_pitch, stream); sgm::details::median_filter((uint16_t*)d_tmp_right_disp, (uint16_t*)d_right_disp, width, height, dst_pitch, stream);