diff --git a/lib/libstereo/middlebury/algorithms.hpp b/lib/libstereo/middlebury/algorithms.hpp
index 8cf989142c135515461f2105d137b567f4983dca..a85b5622d5733d8e0629fd6bb5a65b702f9bdf3a 100644
--- a/lib/libstereo/middlebury/algorithms.hpp
+++ b/lib/libstereo/middlebury/algorithms.hpp
@@ -75,7 +75,7 @@ namespace Impl {
 			stereo.params.P2 = P2;
 			stereo.params.subpixel = subpixel;
 			stereo.params.lr_consistency = lr_consistency;
-
+			stereo.params.wsize = 9;
 			stereo.params.d_min = data.calib.vmin;
 			stereo.params.d_max = data.calib.vmax;
 			stereo.params.debug = false;
diff --git a/lib/libstereo/src/algorithms/stablesgm.cu b/lib/libstereo/src/algorithms/stablesgm.cu
index 335ee20d8495eb15e87ff2bc68cfb462ae027b93..3aeb33a528e483d4bc5885f863bcb78549faa9d9 100644
--- a/lib/libstereo/src/algorithms/stablesgm.cu
+++ b/lib/libstereo/src/algorithms/stablesgm.cu
@@ -25,7 +25,7 @@ void StereoStableSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArra
 
 	mat2gray(l, impl_->l);
 	mat2gray(r, impl_->r);
-	impl_->cost.generateFilterMask(params.wsize, 127); // hardcoded in implementation
+	impl_->cost.generateFilterMask(params.wsize, 16); // hardcoded in implementation
 	impl_->cost.set(impl_->l, impl_->r);
 
 	cudaSafeCall(cudaDeviceSynchronize());
diff --git a/lib/libstereo/src/costs/stable.cu b/lib/libstereo/src/costs/stable.cu
index 21db97678804887c4d8f3de469045c9eaab3603f..845f99147efd2b6803da275b5640cbd73de73b09 100644
--- a/lib/libstereo/src/costs/stable.cu
+++ b/lib/libstereo/src/costs/stable.cu
@@ -93,8 +93,8 @@ void StableMatchingCost::generateFilterMask(const int wsize, const int bits) {
 }
 
 void StableMatchingCost::set(const Array2D<uchar> &l, const Array2D<uchar> &r) {
-	parallel2D<algorithms::Stable<127>>({l.data(), filter_mask_.data(), stable_l_.data(), wsize_, wsize_}, l.width, l.height);
-	parallel2D<algorithms::Stable<127>>({r.data(), filter_mask_.data(), stable_r_.data(), wsize_, wsize_}, r.width, r.height);
+	parallel2D<algorithms::Stable<16>>({l.data(), filter_mask_.data(), stable_l_.data(), wsize_, wsize_}, l.width, l.height);
+	parallel2D<algorithms::Stable<16>>({r.data(), filter_mask_.data(), stable_r_.data(), wsize_, wsize_}, r.width, r.height);
 }
 
 void StableMatchingCost::set(cv::InputArray l, cv::InputArray r) {
diff --git a/lib/libstereo/src/costs/stable.hpp b/lib/libstereo/src/costs/stable.hpp
index bf5ac2b147760c3f93332b80f632a4eea0e7a506..ef07929e4d8097dff4acf4471d1da7f733e30667 100644
--- a/lib/libstereo/src/costs/stable.hpp
+++ b/lib/libstereo/src/costs/stable.hpp
@@ -6,9 +6,9 @@
 /**
  * STABLE descriptor matching cost
  */
-class StableMatchingCost : public DSBase<impl::HammingCost<127>> {
+class StableMatchingCost : public DSBase<impl::HammingCost<16>> {
 public:
-	typedef impl::HammingCost<127> DataType;
+	typedef impl::HammingCost<16> DataType;
 	typedef DataType::Type Type;
 
 	StableMatchingCost() : DSBase<DataType>(0, 0, 0, 0) {};