diff --git a/components/operators/src/disparity/fixstars_sgm.cpp b/components/operators/src/disparity/fixstars_sgm.cpp index 176dc8d10bf23ba91ebc3cf1976b7e2e08a96766..453b0b0fe5ad09fcbf37435ede3b5aa11f829abb 100644 --- a/components/operators/src/disparity/fixstars_sgm.cpp +++ b/components/operators/src/disparity/fixstars_sgm.cpp @@ -62,7 +62,7 @@ FixstarsSGM::FixstarsSGM(ftl::operators::Graph *g, ftl::Configurable* cfg) : uniqueness_ = cfg->value("uniqueness", 0.95f); P1_ = cfg->value("P1", 10); P2_ = cfg->value("P2", 120); - max_disp_ = cfg->value("max_disp", 256); + max_disp_ = cfg->value("max_disp", 128); if (uniqueness_ < 0.0 || uniqueness_ > 1.0) { uniqueness_ = 1.0; @@ -216,9 +216,9 @@ bool FixstarsSGM::apply(Frame &in, Frame &out, cudaStream_t stream) { weightsF_.convertTo(weights_, CV_8UC1, 255.0f, cvstream); //if ((int)P2_map_.step != P2_map_.cols) LOG(ERROR) << "P2 map step error: " << P2_map_.cols << "," << P2_map_.step; - ssgm_->execute(lbw_.data, rbw_.data, disp_int_.data, P2_map_.data, (uint8_t*) weights_.data, weights_.step1(), stream); + ssgm_->execute(lbw_.data, rbw_.data, disp_int_.data, P2_map_.data, (uint8_t*) weights_.data, weights_.step1(), config()->value("min_disp", 0), stream); } else { - ssgm_->execute(lbw_.data, rbw_.data, disp_int_.data, P2_map_.data, nullptr, 0, stream); + ssgm_->execute(lbw_.data, rbw_.data, disp_int_.data, P2_map_.data, nullptr, 0, config()->value("min_disp", 0), stream); } // GpuMat left_pixels(dispt_, cv::Rect(0, 0, max_disp_, dispt_.rows)); diff --git a/lib/libsgm/include/libsgm.h b/lib/libsgm/include/libsgm.h index 279b3e5af2557fc2cbb93e660c96edfca6c1a006..bf2c58ea759181d3294d9cb717fd7e65b2b6d60a 100644 --- a/lib/libsgm/include/libsgm.h +++ b/lib/libsgm/include/libsgm.h @@ -114,13 +114,13 @@ namespace sgm { * The element_type is uint8_t for output_depth_bits == 8 and uint16_t for output_depth_bits == 16. * Note that dst element value would be multiplied StereoSGM::SUBPIXEL_SCALE if subpixel option was enabled. */ - LIBSGM_API void execute(const void* left_pixels, const void* right_pixels, void* dst, const uint8_t *P2, const uint8_t *weights, int weights_pitch, cudaStream_t stream); + LIBSGM_API void execute(const void* left_pixels, const void* right_pixels, void* dst, const uint8_t *P2, const uint8_t *weights, int weights_pitch, int min_disp, cudaStream_t stream); /** * Same as execute(left_pixels, right_pixels, dst) with image size parameters. * Dimensions must be smaller or equal to dimensions provided in constructor. */ - LIBSGM_API void execute(const void* left_pixels, const void* right_pixels, void* dst, const int width, const int height, const int src_pitch, const int dst_pitch, const uint8_t *P2, const uint8_t *weights, int weights_pitch, cudaStream_t stream); + LIBSGM_API void execute(const void* left_pixels, const void* right_pixels, void* dst, const int width, const int height, const int src_pitch, const int dst_pitch, const uint8_t *P2, const uint8_t *weights, int weights_pitch, int min_disp, cudaStream_t stream); /** * Mask for invalid pixels. Must have same shape and pitch as src. Pixels which have non-zero values diff --git a/lib/libsgm/src/horizontal_path_aggregation.cu b/lib/libsgm/src/horizontal_path_aggregation.cu index 5eba5372c3d67e9396aaf9569c8e7240c0c98759..acd53e1147746dc64b2d7fec7cf6da5e9f2c3883 100644 --- a/lib/libsgm/src/horizontal_path_aggregation.cu +++ b/lib/libsgm/src/horizontal_path_aggregation.cu @@ -39,7 +39,8 @@ __global__ void aggregate_horizontal_path_kernel( const uint8_t* __restrict__ p2, int p2_pitch, const uint8_t* __restrict__ w, - int w_pitch) + int w_pitch, + int min_disp) { static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE; static const unsigned int SUBGROUPS_PER_WARP = WARP_SIZE / SUBGROUP_SIZE; @@ -87,7 +88,7 @@ __global__ void aggregate_horizontal_path_kernel( }else{ for(unsigned int i = 0; i < DP_BLOCKS_PER_THREAD; ++i){ for(unsigned int j = 0; j < DP_BLOCK_SIZE; ++j){ - const int x = static_cast<int>(width - (j + dp_offset)); + const int x = static_cast<int>(width - min_disp - (j + dp_offset)); if(0 <= x && x < static_cast<int>(width)){ right_buffer[i][j] = __ldg(&right[i * feature_step + x]); }else{ @@ -171,6 +172,7 @@ void enqueue_aggregate_left2right_path( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream) { static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE; @@ -180,7 +182,7 @@ void enqueue_aggregate_left2right_path( const int gdim = (height + PATHS_PER_BLOCK - 1) / PATHS_PER_BLOCK; const int bdim = BLOCK_SIZE; aggregate_horizontal_path_kernel<1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>( - dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch); + dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp); } template <unsigned int MAX_DISPARITY> @@ -195,6 +197,7 @@ void enqueue_aggregate_right2left_path( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream) { static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE; @@ -204,7 +207,7 @@ void enqueue_aggregate_right2left_path( const int gdim = (height + PATHS_PER_BLOCK - 1) / PATHS_PER_BLOCK; const int bdim = BLOCK_SIZE; aggregate_horizontal_path_kernel<-1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>( - dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch); + dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp); } @@ -219,6 +222,7 @@ template void enqueue_aggregate_left2right_path<64u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_left2right_path<128u>( @@ -232,6 +236,7 @@ template void enqueue_aggregate_left2right_path<128u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_left2right_path<256u>( @@ -245,6 +250,7 @@ template void enqueue_aggregate_left2right_path<256u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_right2left_path<64u>( @@ -258,6 +264,7 @@ template void enqueue_aggregate_right2left_path<64u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_right2left_path<128u>( @@ -271,6 +278,7 @@ template void enqueue_aggregate_right2left_path<128u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_right2left_path<256u>( @@ -284,6 +292,7 @@ template void enqueue_aggregate_right2left_path<256u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); } diff --git a/lib/libsgm/src/horizontal_path_aggregation.hpp b/lib/libsgm/src/horizontal_path_aggregation.hpp index 1b7a7e088f5d4dac793694ba7f4a0a876de2538b..4626dd68f91909b2aab893fea906df5586acef72 100644 --- a/lib/libsgm/src/horizontal_path_aggregation.hpp +++ b/lib/libsgm/src/horizontal_path_aggregation.hpp @@ -34,6 +34,7 @@ void enqueue_aggregate_left2right_path( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template <unsigned int MAX_DISPARITY> @@ -48,6 +49,7 @@ void enqueue_aggregate_right2left_path( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); } diff --git a/lib/libsgm/src/oblique_path_aggregation.cu b/lib/libsgm/src/oblique_path_aggregation.cu index 97d9b1493c32e54a7bb1195f9a9261243b801444..64dd9f947922bfbb250e6d47a4cdbfbf88de90fb 100644 --- a/lib/libsgm/src/oblique_path_aggregation.cu +++ b/lib/libsgm/src/oblique_path_aggregation.cu @@ -35,7 +35,8 @@ __global__ void aggregate_oblique_path_kernel( const uint8_t* __restrict__ p2, int p2_pitch, const uint8_t* __restrict__ w, - int w_pitch) + int w_pitch, + int min_disp) { static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE; static const unsigned int PATHS_PER_WARP = WARP_SIZE / SUBGROUP_SIZE; @@ -77,7 +78,7 @@ __global__ void aggregate_oblique_path_kernel( for(unsigned int iter = 0; iter < height; ++iter){ const int y = static_cast<int>(Y_DIRECTION > 0 ? iter : height - 1 - iter); const int x = x0 + static_cast<int>(iter) * X_DIRECTION; - const int right_x0 = right_x00 + static_cast<int>(iter) * X_DIRECTION; + const int right_x0 = right_x00 + static_cast<int>(iter) * X_DIRECTION - min_disp; // Load right to smem for(unsigned int i0 = 0; i0 < RIGHT_BUFFER_SIZE; i0 += BLOCK_SIZE){ const unsigned int i = i0 + threadIdx.x; @@ -129,6 +130,7 @@ void enqueue_aggregate_upleft2downright_path( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream) { static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE; @@ -137,7 +139,7 @@ void enqueue_aggregate_upleft2downright_path( const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK; const int bdim = BLOCK_SIZE; aggregate_oblique_path_kernel<1, 1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>( - dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch); + dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp); } template <unsigned int MAX_DISPARITY> @@ -152,6 +154,7 @@ void enqueue_aggregate_upright2downleft_path( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream) { static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE; @@ -160,7 +163,7 @@ void enqueue_aggregate_upright2downleft_path( const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK; const int bdim = BLOCK_SIZE; aggregate_oblique_path_kernel<-1, 1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>( - dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch); + dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp); } template <unsigned int MAX_DISPARITY> @@ -175,6 +178,7 @@ void enqueue_aggregate_downright2upleft_path( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream) { static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE; @@ -183,7 +187,7 @@ void enqueue_aggregate_downright2upleft_path( const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK; const int bdim = BLOCK_SIZE; aggregate_oblique_path_kernel<-1, -1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>( - dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch); + dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp); } template <unsigned int MAX_DISPARITY> @@ -198,6 +202,7 @@ void enqueue_aggregate_downleft2upright_path( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream) { static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE; @@ -206,7 +211,7 @@ void enqueue_aggregate_downleft2upright_path( const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK; const int bdim = BLOCK_SIZE; aggregate_oblique_path_kernel<1, -1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>( - dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch); + dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp); } @@ -221,6 +226,7 @@ template void enqueue_aggregate_upleft2downright_path<64u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_upleft2downright_path<128u>( @@ -234,6 +240,7 @@ template void enqueue_aggregate_upleft2downright_path<128u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_upleft2downright_path<256u>( @@ -247,6 +254,7 @@ template void enqueue_aggregate_upleft2downright_path<256u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_upright2downleft_path<64u>( @@ -260,6 +268,7 @@ template void enqueue_aggregate_upright2downleft_path<64u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_upright2downleft_path<128u>( @@ -273,6 +282,7 @@ template void enqueue_aggregate_upright2downleft_path<128u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_upright2downleft_path<256u>( @@ -286,6 +296,7 @@ template void enqueue_aggregate_upright2downleft_path<256u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_downright2upleft_path<64u>( @@ -299,6 +310,7 @@ template void enqueue_aggregate_downright2upleft_path<64u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_downright2upleft_path<128u>( @@ -312,6 +324,7 @@ template void enqueue_aggregate_downright2upleft_path<128u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_downright2upleft_path<256u>( @@ -325,6 +338,7 @@ template void enqueue_aggregate_downright2upleft_path<256u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_downleft2upright_path<64u>( @@ -338,6 +352,7 @@ template void enqueue_aggregate_downleft2upright_path<64u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_downleft2upright_path<128u>( @@ -351,6 +366,7 @@ template void enqueue_aggregate_downleft2upright_path<128u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_downleft2upright_path<256u>( @@ -364,6 +380,7 @@ template void enqueue_aggregate_downleft2upright_path<256u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); } diff --git a/lib/libsgm/src/oblique_path_aggregation.hpp b/lib/libsgm/src/oblique_path_aggregation.hpp index 6504f37a2715ef64d7698b00afb5f6b0614b26af..882afed2affc344ea35bfda20c619e7521738ab3 100644 --- a/lib/libsgm/src/oblique_path_aggregation.hpp +++ b/lib/libsgm/src/oblique_path_aggregation.hpp @@ -34,6 +34,7 @@ void enqueue_aggregate_upleft2downright_path( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template <unsigned int MAX_DISPARITY> @@ -48,6 +49,7 @@ void enqueue_aggregate_upright2downleft_path( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template <unsigned int MAX_DISPARITY> @@ -62,6 +64,7 @@ void enqueue_aggregate_downright2upleft_path( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template <unsigned int MAX_DISPARITY> @@ -76,6 +79,7 @@ void enqueue_aggregate_downleft2upright_path( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); } diff --git a/lib/libsgm/src/path_aggregation.cu b/lib/libsgm/src/path_aggregation.cu index e5019ec3a67ddb1113ce2a7f1a873983359e5fb4..ed7cdbee7878328c49649a6cbc3c002e91dbdef7 100644 --- a/lib/libsgm/src/path_aggregation.cu +++ b/lib/libsgm/src/path_aggregation.cu @@ -53,6 +53,7 @@ void PathAggregation<MAX_DISPARITY>::enqueue( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream) { const size_t buffer_size = width * height * MAX_DISPARITY * NUM_PATHS; @@ -69,28 +70,28 @@ void PathAggregation<MAX_DISPARITY>::enqueue( path_aggregation::enqueue_aggregate_up2down_path<MAX_DISPARITY>( m_cost_buffer.data() + 0 * buffer_step, - left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[0]); + left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp, m_streams[0]); path_aggregation::enqueue_aggregate_down2up_path<MAX_DISPARITY>( m_cost_buffer.data() + 1 * buffer_step, - left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[1]); + left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp, m_streams[1]); path_aggregation::enqueue_aggregate_left2right_path<MAX_DISPARITY>( m_cost_buffer.data() + 2 * buffer_step, - left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[2]); + left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp, m_streams[2]); path_aggregation::enqueue_aggregate_right2left_path<MAX_DISPARITY>( m_cost_buffer.data() + 3 * buffer_step, - left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[3]); + left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp, m_streams[3]); path_aggregation::enqueue_aggregate_upleft2downright_path<MAX_DISPARITY>( m_cost_buffer.data() + 4 * buffer_step, - left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[4]); + left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp, m_streams[4]); path_aggregation::enqueue_aggregate_upright2downleft_path<MAX_DISPARITY>( m_cost_buffer.data() + 5 * buffer_step, - left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[5]); + left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp, m_streams[5]); path_aggregation::enqueue_aggregate_downright2upleft_path<MAX_DISPARITY>( m_cost_buffer.data() + 6 * buffer_step, - left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[6]); + left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp, m_streams[6]); path_aggregation::enqueue_aggregate_downleft2upright_path<MAX_DISPARITY>( m_cost_buffer.data() + 7 * buffer_step, - left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[7]); + left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp, m_streams[7]); for(unsigned int i = 0; i < NUM_PATHS; ++i){ cudaEventRecord(m_events[i], m_streams[i]); cudaStreamWaitEvent(stream, m_events[i], 0); diff --git a/lib/libsgm/src/path_aggregation.hpp b/lib/libsgm/src/path_aggregation.hpp index 7df96996da46711f7ddcb76dcbc44132ca9f194c..c8abf740df09162ea65a01bc5bf0f041d2f2c6e7 100644 --- a/lib/libsgm/src/path_aggregation.hpp +++ b/lib/libsgm/src/path_aggregation.hpp @@ -51,6 +51,7 @@ public: int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); }; diff --git a/lib/libsgm/src/sgm.cu b/lib/libsgm/src/sgm.cu index 2b350da9562962aa9e10b4554bb6b6c4517e1023..5a8b4aa70a0d1ee65b0839bb09198b5729d8f239 100644 --- a/lib/libsgm/src/sgm.cu +++ b/lib/libsgm/src/sgm.cu @@ -57,6 +57,7 @@ public: int weights_pitch, float uniqueness, bool subpixel, + int min_disp, cudaStream_t stream) { m_census_left.enqueue( @@ -70,6 +71,7 @@ public: penalty1, penalty2, src_pitch, // bug? weights, weights_pitch, + min_disp, stream); m_winner_takes_all.enqueue( dest_left, dest_right, @@ -106,6 +108,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::execute( int weights_pitch, float uniqueness, bool subpixel, + int min_disp, cudaStream_t stream) { m_impl->enqueue( @@ -116,6 +119,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::execute( penalty1, penalty2, weights, weights_pitch, uniqueness, subpixel, + min_disp, stream); //cudaStreamSynchronize(0); } @@ -136,6 +140,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::enqueue( int weights_pitch, float uniqueness, bool subpixel, + int min_disp, cudaStream_t stream) { m_impl->enqueue( @@ -146,6 +151,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::enqueue( penalty1, penalty2, weights, weights_pitch, uniqueness, subpixel, + min_disp, stream); } diff --git a/lib/libsgm/src/sgm.hpp b/lib/libsgm/src/sgm.hpp index f0c3c80ee107b95890d6dc8578c72768a70abdd4..9aa2cd387782465c3398aab5cda6a8b6a292018d 100644 --- a/lib/libsgm/src/sgm.hpp +++ b/lib/libsgm/src/sgm.hpp @@ -53,6 +53,7 @@ public: int weights_pitch, float uniqueness, bool subpixel, + int min_disp, cudaStream_t stream); void enqueue( @@ -70,6 +71,7 @@ public: int weights_pitch, float uniqueness, bool subpixel, + int min_disp, cudaStream_t stream); }; diff --git a/lib/libsgm/src/stereo_sgm.cpp b/lib/libsgm/src/stereo_sgm.cpp index a07f2daea087e38f4102c0cb92bf7d1664655c3a..34f686fcb1d98f677a44675c3ef12408f230cda1 100644 --- a/lib/libsgm/src/stereo_sgm.cpp +++ b/lib/libsgm/src/stereo_sgm.cpp @@ -29,7 +29,7 @@ namespace sgm { public: using output_type = sgm::output_type; virtual void execute(output_type* dst_L, output_type* dst_R, const void* src_L, const void* src_R, - int w, int h, int sp, int dp, unsigned int P1, const uint8_t *P2, const uint8_t *weights, int weights_pitch, float uniqueness, bool subpixel, cudaStream_t stream) = 0; + int w, int h, int sp, int dp, unsigned int P1, const uint8_t *P2, const uint8_t *weights, int weights_pitch, float uniqueness, bool subpixel, int min_disp, cudaStream_t stream) = 0; virtual ~SemiGlobalMatchingBase() {} }; @@ -38,9 +38,9 @@ namespace sgm { class SemiGlobalMatchingImpl : public SemiGlobalMatchingBase { public: void execute(output_type* dst_L, output_type* dst_R, const void* src_L, const void* src_R, - int w, int h, int sp, int dp, unsigned int P1, const uint8_t *P2, const uint8_t *weights, int weights_pitch, float uniqueness, bool subpixel, cudaStream_t stream) override + int w, int h, int sp, int dp, unsigned int P1, const uint8_t *P2, const uint8_t *weights, int weights_pitch, float uniqueness, bool subpixel, int min_disp, cudaStream_t stream) override { - sgm_engine_.execute(dst_L, dst_R, (const input_type*)src_L, (const input_type*)src_R, w, h, sp, dp, P1, P2, weights, weights_pitch, uniqueness, subpixel, stream); + sgm_engine_.execute(dst_L, dst_R, (const input_type*)src_L, (const input_type*)src_R, w, h, sp, dp, P1, P2, weights, weights_pitch, uniqueness, subpixel, min_disp, stream); } private: SemiGlobalMatching<input_type, DISP_SIZE> sgm_engine_; @@ -150,7 +150,7 @@ namespace sgm { } void StereoSGM::execute(const void* left_pixels, const void* right_pixels, void* dst, const int width, const int height, const int src_pitch, const int dst_pitch, - const uint8_t *P2, const uint8_t *weights, int weights_pitch, cudaStream_t stream) { + const uint8_t *P2, const uint8_t *weights, int weights_pitch, int min_disp, cudaStream_t stream) { const void *d_input_left, *d_input_right; @@ -174,7 +174,7 @@ namespace sgm { d_left_disp = dst; // when threre is no device-host copy or type conversion, use passed buffer cu_res_->sgm_engine->execute((uint16_t*)d_tmp_left_disp, (uint16_t*)d_tmp_right_disp, - d_input_left, d_input_right, width, height, src_pitch, dst_pitch, param_.P1, P2, weights, weights_pitch, param_.uniqueness, param_.subpixel, stream); + d_input_left, d_input_right, width, height, src_pitch, dst_pitch, param_.P1, P2, weights, weights_pitch, param_.uniqueness, param_.subpixel, min_disp, stream); sgm::details::median_filter((uint16_t*)d_tmp_left_disp, (uint16_t*)d_left_disp, width, height, dst_pitch, stream); sgm::details::median_filter((uint16_t*)d_tmp_right_disp, (uint16_t*)d_right_disp, width, height, dst_pitch, stream); @@ -198,8 +198,8 @@ namespace sgm { } } - void StereoSGM::execute(const void* left_pixels, const void* right_pixels, void* dst, const uint8_t *P2, const uint8_t *weights, int weights_pitch, cudaStream_t stream) { - execute(left_pixels, right_pixels, dst, width_, height_, src_pitch_, dst_pitch_, P2, weights, weights_pitch, stream); + void StereoSGM::execute(const void* left_pixels, const void* right_pixels, void* dst, const uint8_t *P2, const uint8_t *weights, int weights_pitch, int min_disp, cudaStream_t stream) { + execute(left_pixels, right_pixels, dst, width_, height_, src_pitch_, dst_pitch_, P2, weights, weights_pitch, min_disp, stream); } bool StereoSGM::updateParameters(const Parameters ¶ms) { diff --git a/lib/libsgm/src/vertical_path_aggregation.cu b/lib/libsgm/src/vertical_path_aggregation.cu index 6fee96892cf038a0583c576688e8456e0c3ec7d7..9bf52977431228dca2b063a3c8201bbb380c9e3c 100644 --- a/lib/libsgm/src/vertical_path_aggregation.cu +++ b/lib/libsgm/src/vertical_path_aggregation.cu @@ -35,7 +35,8 @@ __global__ void aggregate_vertical_path_kernel( const uint8_t* __restrict__ p2, int p2_pitch, const uint8_t* __restrict__ w, - int w_pitch) + int w_pitch, + int min_disp) { static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE; static const unsigned int PATHS_PER_WARP = WARP_SIZE / SUBGROUP_SIZE; @@ -81,7 +82,7 @@ __global__ void aggregate_vertical_path_kernel( for(unsigned int i0 = 0; i0 < RIGHT_BUFFER_SIZE; i0 += BLOCK_SIZE){ const unsigned int i = i0 + threadIdx.x; if(i < RIGHT_BUFFER_SIZE){ - const int x = static_cast<int>(right_x0 + PATHS_PER_BLOCK - 1 - i); + const int x = static_cast<int>(right_x0 + PATHS_PER_BLOCK - 1 - i) - min_disp; feature_type right_value = 0; if(0 <= x && x < static_cast<int>(width)){ right_value = right[x + y * width]; @@ -126,6 +127,7 @@ void enqueue_aggregate_up2down_path( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream) { static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE; @@ -134,7 +136,7 @@ void enqueue_aggregate_up2down_path( const int gdim = (width + PATHS_PER_BLOCK - 1) / PATHS_PER_BLOCK; const int bdim = BLOCK_SIZE; aggregate_vertical_path_kernel<1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>( - dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch); + dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp); } template <unsigned int MAX_DISPARITY> @@ -149,6 +151,7 @@ void enqueue_aggregate_down2up_path( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream) { static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE; @@ -157,7 +160,7 @@ void enqueue_aggregate_down2up_path( const int gdim = (width + PATHS_PER_BLOCK - 1) / PATHS_PER_BLOCK; const int bdim = BLOCK_SIZE; aggregate_vertical_path_kernel<-1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>( - dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch); + dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp); } @@ -172,6 +175,7 @@ template void enqueue_aggregate_up2down_path<64u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_up2down_path<128u>( @@ -185,6 +189,7 @@ template void enqueue_aggregate_up2down_path<128u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_up2down_path<256u>( @@ -198,6 +203,7 @@ template void enqueue_aggregate_up2down_path<256u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_down2up_path<64u>( @@ -211,6 +217,7 @@ template void enqueue_aggregate_down2up_path<64u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_down2up_path<128u>( @@ -224,6 +231,7 @@ template void enqueue_aggregate_down2up_path<128u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template void enqueue_aggregate_down2up_path<256u>( @@ -237,6 +245,7 @@ template void enqueue_aggregate_down2up_path<256u>( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); } diff --git a/lib/libsgm/src/vertical_path_aggregation.hpp b/lib/libsgm/src/vertical_path_aggregation.hpp index 434dd9231614b821ae1dfcbb439e648a9dd10b9a..18240cf264862e8d7417200c51d1e8584ec49d43 100644 --- a/lib/libsgm/src/vertical_path_aggregation.hpp +++ b/lib/libsgm/src/vertical_path_aggregation.hpp @@ -34,6 +34,7 @@ void enqueue_aggregate_up2down_path( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); template <unsigned int MAX_DISPARITY> @@ -48,6 +49,7 @@ void enqueue_aggregate_down2up_path( int p2_pitch, const uint8_t* w, int w_pitch, + int min_disp, cudaStream_t stream); }