diff --git a/lib/libstereo/middlebury/algorithms.hpp b/lib/libstereo/middlebury/algorithms.hpp
index 88d711e2a19ac6f6e635c0979f8837235e45cddf..b5bbca40782a9ccf3ca565040ff534d7b882fc74 100644
--- a/lib/libstereo/middlebury/algorithms.hpp
+++ b/lib/libstereo/middlebury/algorithms.hpp
@@ -67,7 +67,7 @@ namespace Impl {
 	};
 
 	struct StableSGM : public Algorithm {
-		StableSGM() { P1 = 8.0f; P2 = 24.0f; }
+		StableSGM() { P1 = 1.0f; P2 = 8.0f; }
 
 		virtual void run(const MiddleburyData &data, cv::Mat &disparity) override {
 			StereoStableSgm stereo;
@@ -84,7 +84,7 @@ namespace Impl {
 	};
 
 	struct HStableSGM : public Algorithm {
-		HStableSGM() { P1 = 36.0f; P2 = 96.0f; }
+		HStableSGM() { P1 = 3.0f; P2 = 24.0f; }
 
 		virtual void run(const MiddleburyData &data, cv::Mat &disparity) override {
 			StereoHStableSgm stereo;
@@ -92,7 +92,7 @@ namespace Impl {
 			stereo.params.P2 = P2;
 			stereo.params.subpixel = subpixel;
 			stereo.params.lr_consistency = lr_consistency;
-			stereo.params.wsize = 9;
+			stereo.params.wsize = 7;
 			stereo.params.d_min = data.calib.vmin;
 			stereo.params.d_max = data.calib.vmax;
 			stereo.params.debug = false;
diff --git a/lib/libstereo/src/algorithms/hstablesgm.cu b/lib/libstereo/src/algorithms/hstablesgm.cu
index 870277b61cb229242aaaaa00f8a1a9f207a3ffdc..f7e9732ea8e331e558ad33333e799f4507ed3808 100644
--- a/lib/libstereo/src/algorithms/hstablesgm.cu
+++ b/lib/libstereo/src/algorithms/hstablesgm.cu
@@ -112,11 +112,11 @@ void StereoHStableSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArr
     cv::imshow("Var", tmp);
 
     impl_->cost_fine.generateFilterMask(params.wsize, 16);
-    impl_->cost_medium.generateFilterMask(params.wsize, 16);
-    impl_->cost_coarse.generateFilterMask(params.wsize, 16);
+    impl_->cost_medium.setFilter(impl_->cost_fine.getFilter());
+    impl_->cost_coarse.setFilter(impl_->cost_fine.getFilter());
     impl_->cost_fine.set(impl_->l, impl_->r);
-    impl_->cost_medium.set(medium_l, medium_r);
-    impl_->cost_coarse.set(coarse_l, coarse_r);
+    impl_->cost_medium.set(medium_l, medium_r, l.cols(), l.rows());
+    impl_->cost_coarse.set(coarse_l, coarse_r, l.cols(), l.rows());
     impl_->cost.set();
 	impl_->compute(disparity);
 }
diff --git a/lib/libstereo/src/costs/stable.cu b/lib/libstereo/src/costs/stable.cu
index 845f99147efd2b6803da275b5640cbd73de73b09..25111afb3b2004b555925de6fecb0ecda0cf517c 100644
--- a/lib/libstereo/src/costs/stable.cu
+++ b/lib/libstereo/src/costs/stable.cu
@@ -19,7 +19,7 @@ namespace algorithms {
 			uint16_t i = 0;
 			for (int wy = -WINY/2; wy <= WINY/2; wy++) {
 				for (int wx = -WINX/2; wx <= WINX/2; wx++) {
-					const int16_t value = im(y + wy, x + wx);
+					const int16_t value = im(min(height,max(0,int(float(y)*scaleY) + wy)), min(width,max(0,int(float(x)*scaleX) + wx)));
 					const int16_t filter = filter_mask(0, i++);
 					const int16_t sign = filter > 0 ? 1 : -1;
 					// NOTE: indexing starts from 1
@@ -54,6 +54,10 @@ namespace algorithms {
 
 		const int WINX;
 		const int WINY;
+		float scaleX;
+		float scaleY;
+		const int width;
+		const int height;
 
 		// number of uint64_t values for each window
 		const int WSTEP = (NBITS - 1)/(sizeof(uint64_t)*8) + 1;
@@ -93,8 +97,15 @@ void StableMatchingCost::generateFilterMask(const int wsize, const int bits) {
 }
 
 void StableMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r) {
-	parallel2D<algorithms::Stable<16>>({l.data(), filter_mask_.data(), stable_l_.data(), wsize_, wsize_}, l.width, l.height);
-	parallel2D<algorithms::Stable<16>>({r.data(), filter_mask_.data(), stable_r_.data(), wsize_, wsize_}, r.width, r.height);
+	parallel2D<algorithms::Stable<16>>({l.data(), filter_mask_.data(), stable_l_.data(), wsize_, wsize_, 1.0f, 1.0f, l.width, l.height}, l.width, l.height);
+	parallel2D<algorithms::Stable<16>>({r.data(), filter_mask_.data(), stable_r_.data(), wsize_, wsize_, 1.0f, 1.0f, r.width, r.height}, r.width, r.height);
+}
+
+void StableMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r, size_t w, size_t h) {
+	float scaleX = float(l.width) / float(w);
+	float scaleY = float(l.height) / float(h);
+	parallel2D<algorithms::Stable<16>>({l.data(), filter_mask_.data(), stable_l_.data(), wsize_, wsize_, scaleX, scaleY, l.width, l.height}, w, h);
+	parallel2D<algorithms::Stable<16>>({r.data(), filter_mask_.data(), stable_r_.data(), wsize_, wsize_, scaleX, scaleY, r.width, r.height}, w, h);
 }
 
 void StableMatchingCost::set(cv::InputArray l, cv::InputArray r) {
diff --git a/lib/libstereo/src/costs/stable.hpp b/lib/libstereo/src/costs/stable.hpp
index ef07929e4d8097dff4acf4471d1da7f733e30667..7da102753505e668ec2e6dccd08af15cf92d8c24 100644
--- a/lib/libstereo/src/costs/stable.hpp
+++ b/lib/libstereo/src/costs/stable.hpp
@@ -25,8 +25,12 @@ public:
 	void generateFilterMask(const int wsize, const int bits);
 	void set(cv::InputArray l, cv::InputArray r);
 	void set(const Array2D<uchar>& l, const Array2D<uchar>& r);
+	void set(const Array2D<uchar>& l, const Array2D<uchar>& r, size_t w, size_t h);
 	static constexpr Type COST_MAX = DataType::COST_MAX;
 
+	Array2D<int16_t> &getFilter() { return filter_mask_; }
+	void setFilter(const Array2D<int16_t> &f) { filter_mask_ = f; }
+
 protected:
 	int wsize_;
 	Array2D<int16_t> filter_mask_;