diff --git a/components/operators/src/disparity/fixstars_sgm.cpp b/components/operators/src/disparity/fixstars_sgm.cpp index 453b0b0fe5ad09fcbf37435ede3b5aa11f829abb..9a904f9c7b1fa35a92f92e20b7880a09304a7ab1 100644 --- a/components/operators/src/disparity/fixstars_sgm.cpp +++ b/components/operators/src/disparity/fixstars_sgm.cpp @@ -62,7 +62,7 @@ FixstarsSGM::FixstarsSGM(ftl::operators::Graph *g, ftl::Configurable* cfg) : uniqueness_ = cfg->value("uniqueness", 0.95f); P1_ = cfg->value("P1", 10); P2_ = cfg->value("P2", 120); - max_disp_ = cfg->value("max_disp", 128); + max_disp_ = cfg->value("max_disp", 192); if (uniqueness_ < 0.0 || uniqueness_ > 1.0) { uniqueness_ = 1.0; @@ -79,7 +79,7 @@ FixstarsSGM::FixstarsSGM(ftl::operators::Graph *g, ftl::Configurable* cfg) : LOG(WARNING) << "Invalid value for P2, using value of P1 (" << P1_ << ")"; } - if (!(max_disp_ == 256 || max_disp_ == 128)) { + if (!(max_disp_ == 256 || max_disp_ == 128 || max_disp_ == 192)) { max_disp_ = 256; LOG(WARNING) << "Invalid value for max_disp, using default value (256)"; } diff --git a/lib/libsgm/src/horizontal_path_aggregation.cu b/lib/libsgm/src/horizontal_path_aggregation.cu index 11a94bc07081bbea4d29967b5390d9037c7319e7..8047354b4a5ac07d7a6a2087caa876bd420b5422 100644 --- a/lib/libsgm/src/horizontal_path_aggregation.cu +++ b/lib/libsgm/src/horizontal_path_aggregation.cu @@ -239,6 +239,20 @@ template void enqueue_aggregate_left2right_path<128u>( int min_disp, cudaStream_t stream); +template void enqueue_aggregate_left2right_path<192u>( + cost_type *dest, + const feature_type *left, + const feature_type *right, + int width, + int height, + unsigned int p1, + const uint8_t *p2, + int p2_pitch, + const uint8_t* w, + int w_pitch, + int min_disp, + cudaStream_t stream); + template void enqueue_aggregate_left2right_path<256u>( cost_type *dest, const feature_type *left, @@ -281,6 +295,20 @@ template void enqueue_aggregate_right2left_path<128u>( int min_disp, cudaStream_t stream); +template void enqueue_aggregate_right2left_path<192u>( + cost_type *dest, + const feature_type *left, + const feature_type *right, + int width, + int height, + unsigned int p1, + const uint8_t *p2, + int p2_pitch, + const uint8_t* w, + int w_pitch, + int min_disp, + cudaStream_t stream); + template void enqueue_aggregate_right2left_path<256u>( cost_type *dest, const feature_type *left, diff --git a/lib/libsgm/src/oblique_path_aggregation.cu b/lib/libsgm/src/oblique_path_aggregation.cu index 64dd9f947922bfbb250e6d47a4cdbfbf88de90fb..3734ebae8922a370ea51165c431bc6396b30f51b 100644 --- a/lib/libsgm/src/oblique_path_aggregation.cu +++ b/lib/libsgm/src/oblique_path_aggregation.cu @@ -243,6 +243,20 @@ template void enqueue_aggregate_upleft2downright_path<128u>( int min_disp, cudaStream_t stream); +template void enqueue_aggregate_upleft2downright_path<192u>( + cost_type *dest, + const feature_type *left, + const feature_type *right, + int width, + int height, + unsigned int p1, + const uint8_t *p2, + int p2_pitch, + const uint8_t* w, + int w_pitch, + int min_disp, + cudaStream_t stream); + template void enqueue_aggregate_upleft2downright_path<256u>( cost_type *dest, const feature_type *left, @@ -285,6 +299,20 @@ template void enqueue_aggregate_upright2downleft_path<128u>( int min_disp, cudaStream_t stream); +template void enqueue_aggregate_upright2downleft_path<192u>( + cost_type *dest, + const feature_type *left, + const feature_type *right, + int width, + int height, + unsigned int p1, + const uint8_t *p2, + int p2_pitch, + const uint8_t* w, + int w_pitch, + int min_disp, + cudaStream_t stream); + template void enqueue_aggregate_upright2downleft_path<256u>( cost_type *dest, const feature_type *left, @@ -327,6 +355,20 @@ template void enqueue_aggregate_downright2upleft_path<128u>( int min_disp, cudaStream_t stream); +template void enqueue_aggregate_downright2upleft_path<192u>( + cost_type *dest, + const feature_type *left, + const feature_type *right, + int width, + int height, + unsigned int p1, + const uint8_t *p2, + int p2_pitch, + const uint8_t* w, + int w_pitch, + int min_disp, + cudaStream_t stream); + template void enqueue_aggregate_downright2upleft_path<256u>( cost_type *dest, const feature_type *left, @@ -369,6 +411,20 @@ template void enqueue_aggregate_downleft2upright_path<128u>( int min_disp, cudaStream_t stream); +template void enqueue_aggregate_downleft2upright_path<192u>( + cost_type *dest, + const feature_type *left, + const feature_type *right, + int width, + int height, + unsigned int p1, + const uint8_t *p2, + int p2_pitch, + const uint8_t* w, + int w_pitch, + int min_disp, + cudaStream_t stream); + template void enqueue_aggregate_downleft2upright_path<256u>( cost_type *dest, const feature_type *left, diff --git a/lib/libsgm/src/path_aggregation.cu b/lib/libsgm/src/path_aggregation.cu index ed7cdbee7878328c49649a6cbc3c002e91dbdef7..5661d7bee452eb6d28486152c8dfa7bd75e5ecd1 100644 --- a/lib/libsgm/src/path_aggregation.cu +++ b/lib/libsgm/src/path_aggregation.cu @@ -101,6 +101,7 @@ void PathAggregation<MAX_DISPARITY>::enqueue( template class PathAggregation< 64>; template class PathAggregation<128>; +template class PathAggregation<192>; template class PathAggregation<256>; } diff --git a/lib/libsgm/src/path_aggregation_common.hpp b/lib/libsgm/src/path_aggregation_common.hpp index c897684e7003512ccd0c9e17d7a084a39319766b..5211c98103c26a27af51fbd91111043e16858d23 100644 --- a/lib/libsgm/src/path_aggregation_common.hpp +++ b/lib/libsgm/src/path_aggregation_common.hpp @@ -31,9 +31,9 @@ struct DynamicProgramming { static_assert( DP_BLOCK_SIZE >= 2, "DP_BLOCK_SIZE must be greater than or equal to 2"); - static_assert( + /*static_assert( (SUBGROUP_SIZE & (SUBGROUP_SIZE - 1)) == 0, - "SUBGROUP_SIZE must be a power of 2"); + "SUBGROUP_SIZE must be a power of 2");*/ uint32_t last_min; uint32_t dp[DP_BLOCK_SIZE]; diff --git a/lib/libsgm/src/sgm.cu b/lib/libsgm/src/sgm.cu index b13d13e0734874628d469bcb02dd9772ef08c50a..eb5d4179e0a11b964e8238bbdcfac40598917396 100644 --- a/lib/libsgm/src/sgm.cu +++ b/lib/libsgm/src/sgm.cu @@ -158,6 +158,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::enqueue( template class SemiGlobalMatching<uint8_t, 64>; template class SemiGlobalMatching<uint8_t, 128>; +template class SemiGlobalMatching<uint8_t, 192>; template class SemiGlobalMatching<uint8_t, 256>; template class SemiGlobalMatching<uint16_t, 64>; template class SemiGlobalMatching<uint16_t, 128>; diff --git a/lib/libsgm/src/stereo_sgm.cpp b/lib/libsgm/src/stereo_sgm.cpp index 58ba52284b1caa5c3d50b3e85e084a91fb7cb9c3..70e1263147fbe3ab4eefb948d591e8506a3be40b 100644 --- a/lib/libsgm/src/stereo_sgm.cpp +++ b/lib/libsgm/src/stereo_sgm.cpp @@ -63,6 +63,8 @@ namespace sgm { sgm_engine = new SemiGlobalMatchingImpl<uint8_t, 64>(); else if (input_depth_bits_ == 8 && disparity_size_ == 128) sgm_engine = new SemiGlobalMatchingImpl<uint8_t, 128>(); + else if (input_depth_bits_ == 8 && disparity_size_ == 192) + sgm_engine = new SemiGlobalMatchingImpl<uint8_t, 192>(); else if (input_depth_bits_ == 8 && disparity_size_ == 256) sgm_engine = new SemiGlobalMatchingImpl<uint8_t, 256>(); else if (input_depth_bits_ == 16 && disparity_size_ == 64) @@ -133,9 +135,9 @@ namespace sgm { width_ = height_ = input_depth_bits_ = output_depth_bits_ = disparity_size_ = 0; throw std::logic_error("depth bits must be 8 or 16"); } - if (disparity_size_ != 64 && disparity_size_ != 128 && disparity_size_ != 256) { + if (disparity_size_ != 64 && disparity_size_ != 128 && disparity_size_ != 192 && disparity_size_ != 256) { width_ = height_ = input_depth_bits_ = output_depth_bits_ = disparity_size_ = 0; - throw std::logic_error("disparity size must be 64, 128 or 256"); + throw std::logic_error("disparity size must be 64, 128, 192 or 256"); } if (param.subpixel && output_depth_bits != 16) { width_ = height_ = input_depth_bits_ = output_depth_bits_ = disparity_size_ = 0; diff --git a/lib/libsgm/src/utility.hpp b/lib/libsgm/src/utility.hpp index 17b9f45f5375e1756f581a2d7fd3e72e5f588d60..bbd5dc328a878c241c9ceb4b3a57ff3162239794 100644 --- a/lib/libsgm/src/utility.hpp +++ b/lib/libsgm/src/utility.hpp @@ -189,6 +189,14 @@ __device__ inline void load_uint16_vector<4u>(uint32_t *dest, const uint16_t *pt dest[0] = uint16x4.x; dest[1] = uint16x4.y; dest[2] = uint16x4.z; dest[3] = uint16x4.w; } +template <> +__device__ inline void load_uint16_vector<6u>(uint32_t *dest, const uint16_t *ptr){ + const auto uint32x3 = load_as<uint3>(ptr); + load_uint16_vector<2u>(dest + 0, reinterpret_cast<const uint16_t *>(&uint32x3.x)); + load_uint16_vector<2u>(dest + 2, reinterpret_cast<const uint16_t *>(&uint32x3.y)); + load_uint16_vector<2u>(dest + 4, reinterpret_cast<const uint16_t *>(&uint32x3.z)); +} + template <> __device__ inline void load_uint16_vector<8u>(uint32_t *dest, const uint16_t *ptr){ const auto uint32x4 = load_as<uint4>(ptr); diff --git a/lib/libsgm/src/vertical_path_aggregation.cu b/lib/libsgm/src/vertical_path_aggregation.cu index 9bf52977431228dca2b063a3c8201bbb380c9e3c..7f705646039d5a223ca967ecebdcdd70bb6b9667 100644 --- a/lib/libsgm/src/vertical_path_aggregation.cu +++ b/lib/libsgm/src/vertical_path_aggregation.cu @@ -206,6 +206,20 @@ template void enqueue_aggregate_up2down_path<256u>( int min_disp, cudaStream_t stream); +template void enqueue_aggregate_up2down_path<192u>( + cost_type *dest, + const feature_type *left, + const feature_type *right, + int width, + int height, + unsigned int p1, + const uint8_t *p2, + int p2_pitch, + const uint8_t* w, + int w_pitch, + int min_disp, + cudaStream_t stream); + template void enqueue_aggregate_down2up_path<64u>( cost_type *dest, const feature_type *left, @@ -234,6 +248,20 @@ template void enqueue_aggregate_down2up_path<128u>( int min_disp, cudaStream_t stream); +template void enqueue_aggregate_down2up_path<192u>( + cost_type *dest, + const feature_type *left, + const feature_type *right, + int width, + int height, + unsigned int p1, + const uint8_t *p2, + int p2_pitch, + const uint8_t* w, + int w_pitch, + int min_disp, + cudaStream_t stream); + template void enqueue_aggregate_down2up_path<256u>( cost_type *dest, const feature_type *left, diff --git a/lib/libsgm/src/winner_takes_all.cu b/lib/libsgm/src/winner_takes_all.cu index 1e82fcb65f0f7c139d8bcfeea6b931c0e0fd63ff..ea53af6c330d18bb203bfa94ce6a59e3a45c7159 100644 --- a/lib/libsgm/src/winner_takes_all.cu +++ b/lib/libsgm/src/winner_takes_all.cu @@ -303,6 +303,7 @@ void WinnerTakesAll<MAX_DISPARITY>::enqueue( template class WinnerTakesAll< 64>; template class WinnerTakesAll<128>; +template class WinnerTakesAll<192>; template class WinnerTakesAll<256>; }