diff --git a/components/operators/src/disparity/fixstars_sgm.cpp b/components/operators/src/disparity/fixstars_sgm.cpp
index 176dc8d10bf23ba91ebc3cf1976b7e2e08a96766..453b0b0fe5ad09fcbf37435ede3b5aa11f829abb 100644
--- a/components/operators/src/disparity/fixstars_sgm.cpp
+++ b/components/operators/src/disparity/fixstars_sgm.cpp
@@ -62,7 +62,7 @@ FixstarsSGM::FixstarsSGM(ftl::operators::Graph *g, ftl::Configurable* cfg) :
 	uniqueness_ = cfg->value("uniqueness", 0.95f);
 	P1_ = cfg->value("P1", 10);
 	P2_ = cfg->value("P2", 120);
-	max_disp_ = cfg->value("max_disp", 256);
+	max_disp_ = cfg->value("max_disp", 128);
 
 	if (uniqueness_ < 0.0 || uniqueness_ > 1.0) {
 		uniqueness_ = 1.0;
@@ -216,9 +216,9 @@ bool FixstarsSGM::apply(Frame &in, Frame &out, cudaStream_t stream) {
 		weightsF_.convertTo(weights_, CV_8UC1, 255.0f, cvstream);
 
 		//if ((int)P2_map_.step != P2_map_.cols) LOG(ERROR) << "P2 map step error: " << P2_map_.cols << "," << P2_map_.step;
-		ssgm_->execute(lbw_.data, rbw_.data, disp_int_.data, P2_map_.data, (uint8_t*) weights_.data, weights_.step1(), stream);
+		ssgm_->execute(lbw_.data, rbw_.data, disp_int_.data, P2_map_.data, (uint8_t*) weights_.data, weights_.step1(), config()->value("min_disp", 0), stream);
 	} else {
-		ssgm_->execute(lbw_.data, rbw_.data, disp_int_.data, P2_map_.data, nullptr, 0, stream);
+		ssgm_->execute(lbw_.data, rbw_.data, disp_int_.data, P2_map_.data, nullptr, 0, config()->value("min_disp", 0), stream);
 	}
 
 	// GpuMat left_pixels(dispt_, cv::Rect(0, 0, max_disp_, dispt_.rows));
diff --git a/lib/libsgm/include/libsgm.h b/lib/libsgm/include/libsgm.h
index 279b3e5af2557fc2cbb93e660c96edfca6c1a006..bf2c58ea759181d3294d9cb717fd7e65b2b6d60a 100644
--- a/lib/libsgm/include/libsgm.h
+++ b/lib/libsgm/include/libsgm.h
@@ -114,13 +114,13 @@ namespace sgm {
 		* The element_type is uint8_t for output_depth_bits == 8 and uint16_t for output_depth_bits == 16.
 		* Note that dst element value would be multiplied StereoSGM::SUBPIXEL_SCALE if subpixel option was enabled.
 		*/
-		LIBSGM_API void execute(const void* left_pixels, const void* right_pixels, void* dst, const uint8_t *P2, const uint8_t *weights, int weights_pitch, cudaStream_t stream);
+		LIBSGM_API void execute(const void* left_pixels, const void* right_pixels, void* dst, const uint8_t *P2, const uint8_t *weights, int weights_pitch, int min_disp, cudaStream_t stream);
 
 		/**
 		 * Same as execute(left_pixels, right_pixels, dst) with image size parameters.
 		 * Dimensions must be smaller or equal to dimensions provided in constructor.
 		 */
-		LIBSGM_API void execute(const void* left_pixels, const void* right_pixels, void* dst, const int width, const int height, const int src_pitch, const int dst_pitch, const uint8_t *P2, const uint8_t *weights, int weights_pitch, cudaStream_t stream);
+		LIBSGM_API void execute(const void* left_pixels, const void* right_pixels, void* dst, const int width, const int height, const int src_pitch, const int dst_pitch, const uint8_t *P2, const uint8_t *weights, int weights_pitch, int min_disp, cudaStream_t stream);
 
 		/**
 		 * Mask for invalid pixels. Must have same shape and pitch as src. Pixels which have non-zero values
diff --git a/lib/libsgm/src/horizontal_path_aggregation.cu b/lib/libsgm/src/horizontal_path_aggregation.cu
index 5eba5372c3d67e9396aaf9569c8e7240c0c98759..acd53e1147746dc64b2d7fec7cf6da5e9f2c3883 100644
--- a/lib/libsgm/src/horizontal_path_aggregation.cu
+++ b/lib/libsgm/src/horizontal_path_aggregation.cu
@@ -39,7 +39,8 @@ __global__ void aggregate_horizontal_path_kernel(
 	const uint8_t* __restrict__ p2,
 	int p2_pitch,
 	const uint8_t* __restrict__ w,
-	int w_pitch)
+	int w_pitch,
+	int min_disp)
 {
 	static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
 	static const unsigned int SUBGROUPS_PER_WARP = WARP_SIZE / SUBGROUP_SIZE;
@@ -87,7 +88,7 @@ __global__ void aggregate_horizontal_path_kernel(
 	}else{
 		for(unsigned int i = 0; i < DP_BLOCKS_PER_THREAD; ++i){
 			for(unsigned int j = 0; j < DP_BLOCK_SIZE; ++j){
-				const int x = static_cast<int>(width - (j + dp_offset));
+				const int x = static_cast<int>(width - min_disp - (j + dp_offset));
 				if(0 <= x && x < static_cast<int>(width)){
 					right_buffer[i][j] = __ldg(&right[i * feature_step + x]);
 				}else{
@@ -171,6 +172,7 @@ void enqueue_aggregate_left2right_path(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream)
 {
 	static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
@@ -180,7 +182,7 @@ void enqueue_aggregate_left2right_path(
 	const int gdim = (height + PATHS_PER_BLOCK - 1) / PATHS_PER_BLOCK;
 	const int bdim = BLOCK_SIZE;
 	aggregate_horizontal_path_kernel<1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
-		dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch);
+		dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp);
 }
 
 template <unsigned int MAX_DISPARITY>
@@ -195,6 +197,7 @@ void enqueue_aggregate_right2left_path(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream)
 {
 	static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
@@ -204,7 +207,7 @@ void enqueue_aggregate_right2left_path(
 	const int gdim = (height + PATHS_PER_BLOCK - 1) / PATHS_PER_BLOCK;
 	const int bdim = BLOCK_SIZE;
 	aggregate_horizontal_path_kernel<-1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
-		dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch);
+		dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp);
 }
 
 
@@ -219,6 +222,7 @@ template void enqueue_aggregate_left2right_path<64u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_left2right_path<128u>(
@@ -232,6 +236,7 @@ template void enqueue_aggregate_left2right_path<128u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_left2right_path<256u>(
@@ -245,6 +250,7 @@ template void enqueue_aggregate_left2right_path<256u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_right2left_path<64u>(
@@ -258,6 +264,7 @@ template void enqueue_aggregate_right2left_path<64u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_right2left_path<128u>(
@@ -271,6 +278,7 @@ template void enqueue_aggregate_right2left_path<128u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_right2left_path<256u>(
@@ -284,6 +292,7 @@ template void enqueue_aggregate_right2left_path<256u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 }
diff --git a/lib/libsgm/src/horizontal_path_aggregation.hpp b/lib/libsgm/src/horizontal_path_aggregation.hpp
index 1b7a7e088f5d4dac793694ba7f4a0a876de2538b..4626dd68f91909b2aab893fea906df5586acef72 100644
--- a/lib/libsgm/src/horizontal_path_aggregation.hpp
+++ b/lib/libsgm/src/horizontal_path_aggregation.hpp
@@ -34,6 +34,7 @@ void enqueue_aggregate_left2right_path(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template <unsigned int MAX_DISPARITY>
@@ -48,6 +49,7 @@ void enqueue_aggregate_right2left_path(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 }
diff --git a/lib/libsgm/src/oblique_path_aggregation.cu b/lib/libsgm/src/oblique_path_aggregation.cu
index 97d9b1493c32e54a7bb1195f9a9261243b801444..64dd9f947922bfbb250e6d47a4cdbfbf88de90fb 100644
--- a/lib/libsgm/src/oblique_path_aggregation.cu
+++ b/lib/libsgm/src/oblique_path_aggregation.cu
@@ -35,7 +35,8 @@ __global__ void aggregate_oblique_path_kernel(
 	const uint8_t* __restrict__ p2,
 	int p2_pitch,
 	const uint8_t* __restrict__ w,
-	int w_pitch)
+	int w_pitch,
+	int min_disp)
 {
 	static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
 	static const unsigned int PATHS_PER_WARP = WARP_SIZE / SUBGROUP_SIZE;
@@ -77,7 +78,7 @@ __global__ void aggregate_oblique_path_kernel(
 	for(unsigned int iter = 0; iter < height; ++iter){
 		const int y = static_cast<int>(Y_DIRECTION > 0 ? iter : height - 1 - iter);
 		const int x = x0 + static_cast<int>(iter) * X_DIRECTION;
-		const int right_x0 = right_x00 + static_cast<int>(iter) * X_DIRECTION;
+		const int right_x0 = right_x00 + static_cast<int>(iter) * X_DIRECTION - min_disp;
 		// Load right to smem
 		for(unsigned int i0 = 0; i0 < RIGHT_BUFFER_SIZE; i0 += BLOCK_SIZE){
 			const unsigned int i = i0 + threadIdx.x;
@@ -129,6 +130,7 @@ void enqueue_aggregate_upleft2downright_path(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream)
 {
 	static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
@@ -137,7 +139,7 @@ void enqueue_aggregate_upleft2downright_path(
 	const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
 	const int bdim = BLOCK_SIZE;
 	aggregate_oblique_path_kernel<1, 1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
-		dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch);
+		dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp);
 }
 
 template <unsigned int MAX_DISPARITY>
@@ -152,6 +154,7 @@ void enqueue_aggregate_upright2downleft_path(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream)
 {
 	static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
@@ -160,7 +163,7 @@ void enqueue_aggregate_upright2downleft_path(
 	const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
 	const int bdim = BLOCK_SIZE;
 	aggregate_oblique_path_kernel<-1, 1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
-		dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch);
+		dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp);
 }
 
 template <unsigned int MAX_DISPARITY>
@@ -175,6 +178,7 @@ void enqueue_aggregate_downright2upleft_path(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream)
 {
 	static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
@@ -183,7 +187,7 @@ void enqueue_aggregate_downright2upleft_path(
 	const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
 	const int bdim = BLOCK_SIZE;
 	aggregate_oblique_path_kernel<-1, -1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
-		dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch);
+		dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp);
 }
 
 template <unsigned int MAX_DISPARITY>
@@ -198,6 +202,7 @@ void enqueue_aggregate_downleft2upright_path(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream)
 {
 	static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
@@ -206,7 +211,7 @@ void enqueue_aggregate_downleft2upright_path(
 	const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
 	const int bdim = BLOCK_SIZE;
 	aggregate_oblique_path_kernel<1, -1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
-		dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch);
+		dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp);
 }
 
 
@@ -221,6 +226,7 @@ template void enqueue_aggregate_upleft2downright_path<64u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_upleft2downright_path<128u>(
@@ -234,6 +240,7 @@ template void enqueue_aggregate_upleft2downright_path<128u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_upleft2downright_path<256u>(
@@ -247,6 +254,7 @@ template void enqueue_aggregate_upleft2downright_path<256u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_upright2downleft_path<64u>(
@@ -260,6 +268,7 @@ template void enqueue_aggregate_upright2downleft_path<64u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_upright2downleft_path<128u>(
@@ -273,6 +282,7 @@ template void enqueue_aggregate_upright2downleft_path<128u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_upright2downleft_path<256u>(
@@ -286,6 +296,7 @@ template void enqueue_aggregate_upright2downleft_path<256u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_downright2upleft_path<64u>(
@@ -299,6 +310,7 @@ template void enqueue_aggregate_downright2upleft_path<64u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_downright2upleft_path<128u>(
@@ -312,6 +324,7 @@ template void enqueue_aggregate_downright2upleft_path<128u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_downright2upleft_path<256u>(
@@ -325,6 +338,7 @@ template void enqueue_aggregate_downright2upleft_path<256u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_downleft2upright_path<64u>(
@@ -338,6 +352,7 @@ template void enqueue_aggregate_downleft2upright_path<64u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_downleft2upright_path<128u>(
@@ -351,6 +366,7 @@ template void enqueue_aggregate_downleft2upright_path<128u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_downleft2upright_path<256u>(
@@ -364,6 +380,7 @@ template void enqueue_aggregate_downleft2upright_path<256u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 }
diff --git a/lib/libsgm/src/oblique_path_aggregation.hpp b/lib/libsgm/src/oblique_path_aggregation.hpp
index 6504f37a2715ef64d7698b00afb5f6b0614b26af..882afed2affc344ea35bfda20c619e7521738ab3 100644
--- a/lib/libsgm/src/oblique_path_aggregation.hpp
+++ b/lib/libsgm/src/oblique_path_aggregation.hpp
@@ -34,6 +34,7 @@ void enqueue_aggregate_upleft2downright_path(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template <unsigned int MAX_DISPARITY>
@@ -48,6 +49,7 @@ void enqueue_aggregate_upright2downleft_path(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template <unsigned int MAX_DISPARITY>
@@ -62,6 +64,7 @@ void enqueue_aggregate_downright2upleft_path(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template <unsigned int MAX_DISPARITY>
@@ -76,6 +79,7 @@ void enqueue_aggregate_downleft2upright_path(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 }
diff --git a/lib/libsgm/src/path_aggregation.cu b/lib/libsgm/src/path_aggregation.cu
index e5019ec3a67ddb1113ce2a7f1a873983359e5fb4..ed7cdbee7878328c49649a6cbc3c002e91dbdef7 100644
--- a/lib/libsgm/src/path_aggregation.cu
+++ b/lib/libsgm/src/path_aggregation.cu
@@ -53,6 +53,7 @@ void PathAggregation<MAX_DISPARITY>::enqueue(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream)
 {
 	const size_t buffer_size = width * height * MAX_DISPARITY * NUM_PATHS;
@@ -69,28 +70,28 @@ void PathAggregation<MAX_DISPARITY>::enqueue(
 
 	path_aggregation::enqueue_aggregate_up2down_path<MAX_DISPARITY>(
 		m_cost_buffer.data() + 0 * buffer_step,
-		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[0]);
+		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp, m_streams[0]);
 	path_aggregation::enqueue_aggregate_down2up_path<MAX_DISPARITY>(
 		m_cost_buffer.data() + 1 * buffer_step,
-		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[1]);
+		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp, m_streams[1]);
 	path_aggregation::enqueue_aggregate_left2right_path<MAX_DISPARITY>(
 		m_cost_buffer.data() + 2 * buffer_step,
-		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[2]);
+		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp, m_streams[2]);
 	path_aggregation::enqueue_aggregate_right2left_path<MAX_DISPARITY>(
 		m_cost_buffer.data() + 3 * buffer_step,
-		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[3]);
+		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp, m_streams[3]);
 	path_aggregation::enqueue_aggregate_upleft2downright_path<MAX_DISPARITY>(
 		m_cost_buffer.data() + 4 * buffer_step,
-		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[4]);
+		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp, m_streams[4]);
 	path_aggregation::enqueue_aggregate_upright2downleft_path<MAX_DISPARITY>(
 		m_cost_buffer.data() + 5 * buffer_step,
-		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[5]);
+		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp, m_streams[5]);
 	path_aggregation::enqueue_aggregate_downright2upleft_path<MAX_DISPARITY>(
 		m_cost_buffer.data() + 6 * buffer_step,
-		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[6]);
+		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp, m_streams[6]);
 	path_aggregation::enqueue_aggregate_downleft2upright_path<MAX_DISPARITY>(
 		m_cost_buffer.data() + 7 * buffer_step,
-		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[7]);
+		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp, m_streams[7]);
 	for(unsigned int i = 0; i < NUM_PATHS; ++i){
 		cudaEventRecord(m_events[i], m_streams[i]);
 		cudaStreamWaitEvent(stream, m_events[i], 0);
diff --git a/lib/libsgm/src/path_aggregation.hpp b/lib/libsgm/src/path_aggregation.hpp
index 7df96996da46711f7ddcb76dcbc44132ca9f194c..c8abf740df09162ea65a01bc5bf0f041d2f2c6e7 100644
--- a/lib/libsgm/src/path_aggregation.hpp
+++ b/lib/libsgm/src/path_aggregation.hpp
@@ -51,6 +51,7 @@ public:
 		int p2_pitch,
 		const uint8_t* w,
 		int w_pitch,
+		int min_disp,
 		cudaStream_t stream);
 
 };
diff --git a/lib/libsgm/src/sgm.cu b/lib/libsgm/src/sgm.cu
index 2b350da9562962aa9e10b4554bb6b6c4517e1023..5a8b4aa70a0d1ee65b0839bb09198b5729d8f239 100644
--- a/lib/libsgm/src/sgm.cu
+++ b/lib/libsgm/src/sgm.cu
@@ -57,6 +57,7 @@ public:
 		int weights_pitch,
 		float uniqueness,
 		bool subpixel,
+		int min_disp,
 		cudaStream_t stream)
 	{
 		m_census_left.enqueue(
@@ -70,6 +71,7 @@ public:
 			penalty1, penalty2,
 			src_pitch, // bug?
 			weights, weights_pitch,
+			min_disp,
 			stream);
 		m_winner_takes_all.enqueue(
 			dest_left, dest_right,
@@ -106,6 +108,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::execute(
 	int weights_pitch,
 	float uniqueness,
 	bool subpixel,
+	int min_disp,
 	cudaStream_t stream)
 {
 	m_impl->enqueue(
@@ -116,6 +119,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::execute(
 		penalty1, penalty2,
 		weights, weights_pitch,
 		uniqueness, subpixel,
+		min_disp,
 		stream);
 	//cudaStreamSynchronize(0);
 }
@@ -136,6 +140,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::enqueue(
 	int weights_pitch,
 	float uniqueness,
 	bool subpixel,
+	int min_disp,
 	cudaStream_t stream)
 {
 	m_impl->enqueue(
@@ -146,6 +151,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::enqueue(
 		penalty1, penalty2,
 		weights, weights_pitch,
 		uniqueness, subpixel,
+		min_disp,
 		stream);
 }
 
diff --git a/lib/libsgm/src/sgm.hpp b/lib/libsgm/src/sgm.hpp
index f0c3c80ee107b95890d6dc8578c72768a70abdd4..9aa2cd387782465c3398aab5cda6a8b6a292018d 100644
--- a/lib/libsgm/src/sgm.hpp
+++ b/lib/libsgm/src/sgm.hpp
@@ -53,6 +53,7 @@ public:
 		int weights_pitch,
 		float uniqueness,
 		bool subpixel,
+		int min_disp,
 		cudaStream_t stream);
 
 	void enqueue(
@@ -70,6 +71,7 @@ public:
 		int weights_pitch,
 		float uniqueness,
 		bool subpixel,
+		int min_disp,
 		cudaStream_t stream);
 
 };
diff --git a/lib/libsgm/src/stereo_sgm.cpp b/lib/libsgm/src/stereo_sgm.cpp
index a07f2daea087e38f4102c0cb92bf7d1664655c3a..34f686fcb1d98f677a44675c3ef12408f230cda1 100644
--- a/lib/libsgm/src/stereo_sgm.cpp
+++ b/lib/libsgm/src/stereo_sgm.cpp
@@ -29,7 +29,7 @@ namespace sgm {
 	public:
 		using output_type = sgm::output_type;
 		virtual void execute(output_type* dst_L, output_type* dst_R, const void* src_L, const void* src_R,
-			int w, int h, int sp, int dp, unsigned int P1, const uint8_t *P2, const uint8_t *weights, int weights_pitch, float uniqueness, bool subpixel, cudaStream_t stream) = 0;
+			int w, int h, int sp, int dp, unsigned int P1, const uint8_t *P2, const uint8_t *weights, int weights_pitch, float uniqueness, bool subpixel, int min_disp, cudaStream_t stream) = 0;
 
 		virtual ~SemiGlobalMatchingBase() {}
 	};
@@ -38,9 +38,9 @@ namespace sgm {
 	class SemiGlobalMatchingImpl : public SemiGlobalMatchingBase {
 	public:
 		void execute(output_type* dst_L, output_type* dst_R, const void* src_L, const void* src_R,
-			int w, int h, int sp, int dp, unsigned int P1, const uint8_t *P2, const uint8_t *weights, int weights_pitch, float uniqueness, bool subpixel, cudaStream_t stream) override
+			int w, int h, int sp, int dp, unsigned int P1, const uint8_t *P2, const uint8_t *weights, int weights_pitch, float uniqueness, bool subpixel, int min_disp, cudaStream_t stream) override
 		{
-			sgm_engine_.execute(dst_L, dst_R, (const input_type*)src_L, (const input_type*)src_R, w, h, sp, dp, P1, P2, weights, weights_pitch, uniqueness, subpixel, stream);
+			sgm_engine_.execute(dst_L, dst_R, (const input_type*)src_L, (const input_type*)src_R, w, h, sp, dp, P1, P2, weights, weights_pitch, uniqueness, subpixel, min_disp, stream);
 		}
 	private:
 		SemiGlobalMatching<input_type, DISP_SIZE> sgm_engine_;
@@ -150,7 +150,7 @@ namespace sgm {
 	}
 
 	void StereoSGM::execute(const void* left_pixels, const void* right_pixels, void* dst, const int width, const int height, const int src_pitch, const int dst_pitch,
-		const uint8_t *P2, const uint8_t *weights, int weights_pitch, cudaStream_t stream) {
+		const uint8_t *P2, const uint8_t *weights, int weights_pitch, int min_disp, cudaStream_t stream) {
 
 		const void *d_input_left, *d_input_right;
 
@@ -174,7 +174,7 @@ namespace sgm {
 			d_left_disp = dst; // when threre is no device-host copy or type conversion, use passed buffer
 
 		cu_res_->sgm_engine->execute((uint16_t*)d_tmp_left_disp, (uint16_t*)d_tmp_right_disp,
-			d_input_left, d_input_right, width, height, src_pitch, dst_pitch, param_.P1, P2, weights, weights_pitch, param_.uniqueness, param_.subpixel, stream);
+			d_input_left, d_input_right, width, height, src_pitch, dst_pitch, param_.P1, P2, weights, weights_pitch, param_.uniqueness, param_.subpixel, min_disp, stream);
 
 		sgm::details::median_filter((uint16_t*)d_tmp_left_disp, (uint16_t*)d_left_disp, width, height, dst_pitch, stream);
 		sgm::details::median_filter((uint16_t*)d_tmp_right_disp, (uint16_t*)d_right_disp, width, height, dst_pitch, stream);
@@ -198,8 +198,8 @@ namespace sgm {
 		}
 	}
 
-	void StereoSGM::execute(const void* left_pixels, const void* right_pixels, void* dst, const uint8_t *P2, const uint8_t *weights, int weights_pitch, cudaStream_t stream) {
-		execute(left_pixels, right_pixels, dst, width_, height_, src_pitch_, dst_pitch_, P2, weights, weights_pitch, stream);
+	void StereoSGM::execute(const void* left_pixels, const void* right_pixels, void* dst, const uint8_t *P2, const uint8_t *weights, int weights_pitch, int min_disp, cudaStream_t stream) {
+		execute(left_pixels, right_pixels, dst, width_, height_, src_pitch_, dst_pitch_, P2, weights, weights_pitch, min_disp, stream);
 	}
 
 	bool StereoSGM::updateParameters(const Parameters &params) {
diff --git a/lib/libsgm/src/vertical_path_aggregation.cu b/lib/libsgm/src/vertical_path_aggregation.cu
index 6fee96892cf038a0583c576688e8456e0c3ec7d7..9bf52977431228dca2b063a3c8201bbb380c9e3c 100644
--- a/lib/libsgm/src/vertical_path_aggregation.cu
+++ b/lib/libsgm/src/vertical_path_aggregation.cu
@@ -35,7 +35,8 @@ __global__ void aggregate_vertical_path_kernel(
 	const uint8_t* __restrict__ p2,
 	int p2_pitch,
 	const uint8_t* __restrict__ w,
-	int w_pitch)
+	int w_pitch,
+	int min_disp)
 {
 	static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
 	static const unsigned int PATHS_PER_WARP = WARP_SIZE / SUBGROUP_SIZE;
@@ -81,7 +82,7 @@ __global__ void aggregate_vertical_path_kernel(
 		for(unsigned int i0 = 0; i0 < RIGHT_BUFFER_SIZE; i0 += BLOCK_SIZE){
 			const unsigned int i = i0 + threadIdx.x;
 			if(i < RIGHT_BUFFER_SIZE){
-				const int x = static_cast<int>(right_x0 + PATHS_PER_BLOCK - 1 - i);
+				const int x = static_cast<int>(right_x0 + PATHS_PER_BLOCK - 1 - i) - min_disp;
 				feature_type right_value = 0;
 				if(0 <= x && x < static_cast<int>(width)){
 					right_value = right[x + y * width];
@@ -126,6 +127,7 @@ void enqueue_aggregate_up2down_path(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream)
 {
 	static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
@@ -134,7 +136,7 @@ void enqueue_aggregate_up2down_path(
 	const int gdim = (width + PATHS_PER_BLOCK - 1) / PATHS_PER_BLOCK;
 	const int bdim = BLOCK_SIZE;
 	aggregate_vertical_path_kernel<1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
-		dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch);
+		dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp);
 }
 
 template <unsigned int MAX_DISPARITY>
@@ -149,6 +151,7 @@ void enqueue_aggregate_down2up_path(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream)
 {
 	static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
@@ -157,7 +160,7 @@ void enqueue_aggregate_down2up_path(
 	const int gdim = (width + PATHS_PER_BLOCK - 1) / PATHS_PER_BLOCK;
 	const int bdim = BLOCK_SIZE;
 	aggregate_vertical_path_kernel<-1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
-		dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch);
+		dest, left, right, width, height, p1, p2, p2_pitch, w, w_pitch, min_disp);
 }
 
 
@@ -172,6 +175,7 @@ template void enqueue_aggregate_up2down_path<64u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_up2down_path<128u>(
@@ -185,6 +189,7 @@ template void enqueue_aggregate_up2down_path<128u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_up2down_path<256u>(
@@ -198,6 +203,7 @@ template void enqueue_aggregate_up2down_path<256u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_down2up_path<64u>(
@@ -211,6 +217,7 @@ template void enqueue_aggregate_down2up_path<64u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_down2up_path<128u>(
@@ -224,6 +231,7 @@ template void enqueue_aggregate_down2up_path<128u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template void enqueue_aggregate_down2up_path<256u>(
@@ -237,6 +245,7 @@ template void enqueue_aggregate_down2up_path<256u>(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 }
diff --git a/lib/libsgm/src/vertical_path_aggregation.hpp b/lib/libsgm/src/vertical_path_aggregation.hpp
index 434dd9231614b821ae1dfcbb439e648a9dd10b9a..18240cf264862e8d7417200c51d1e8584ec49d43 100644
--- a/lib/libsgm/src/vertical_path_aggregation.hpp
+++ b/lib/libsgm/src/vertical_path_aggregation.hpp
@@ -34,6 +34,7 @@ void enqueue_aggregate_up2down_path(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 template <unsigned int MAX_DISPARITY>
@@ -48,6 +49,7 @@ void enqueue_aggregate_down2up_path(
 	int p2_pitch,
 	const uint8_t* w,
 	int w_pitch,
+	int min_disp,
 	cudaStream_t stream);
 
 }