diff --git a/components/operators/src/disparity/fixstars_sgm.cpp b/components/operators/src/disparity/fixstars_sgm.cpp
index 453b0b0fe5ad09fcbf37435ede3b5aa11f829abb..9a904f9c7b1fa35a92f92e20b7880a09304a7ab1 100644
--- a/components/operators/src/disparity/fixstars_sgm.cpp
+++ b/components/operators/src/disparity/fixstars_sgm.cpp
@@ -62,7 +62,7 @@ FixstarsSGM::FixstarsSGM(ftl::operators::Graph *g, ftl::Configurable* cfg) :
 	uniqueness_ = cfg->value("uniqueness", 0.95f);
 	P1_ = cfg->value("P1", 10);
 	P2_ = cfg->value("P2", 120);
-	max_disp_ = cfg->value("max_disp", 128);
+	max_disp_ = cfg->value("max_disp", 192);
 
 	if (uniqueness_ < 0.0 || uniqueness_ > 1.0) {
 		uniqueness_ = 1.0;
@@ -79,7 +79,7 @@ FixstarsSGM::FixstarsSGM(ftl::operators::Graph *g, ftl::Configurable* cfg) :
 		LOG(WARNING) << "Invalid value for P2, using value of P1 (" << P1_ << ")";
 	}
 
-	if (!(max_disp_ == 256 || max_disp_ == 128)) {
+	if (!(max_disp_ == 256 || max_disp_ == 128 || max_disp_ == 192)) {
 		max_disp_ = 256;
 		LOG(WARNING) << "Invalid value for max_disp, using default value (256)";
 	}
diff --git a/lib/libsgm/src/horizontal_path_aggregation.cu b/lib/libsgm/src/horizontal_path_aggregation.cu
index 11a94bc07081bbea4d29967b5390d9037c7319e7..8047354b4a5ac07d7a6a2087caa876bd420b5422 100644
--- a/lib/libsgm/src/horizontal_path_aggregation.cu
+++ b/lib/libsgm/src/horizontal_path_aggregation.cu
@@ -239,6 +239,20 @@ template void enqueue_aggregate_left2right_path<128u>(
 	int min_disp,
 	cudaStream_t stream);
 
+template void enqueue_aggregate_left2right_path<192u>(
+	cost_type *dest,
+	const feature_type *left,
+	const feature_type *right,
+	int width,
+	int height,
+	unsigned int p1,
+	const uint8_t *p2,
+	int p2_pitch,
+	const uint8_t* w,
+	int w_pitch,
+	int min_disp,
+	cudaStream_t stream);
+
 template void enqueue_aggregate_left2right_path<256u>(
 	cost_type *dest,
 	const feature_type *left,
@@ -281,6 +295,20 @@ template void enqueue_aggregate_right2left_path<128u>(
 	int min_disp,
 	cudaStream_t stream);
 
+template void enqueue_aggregate_right2left_path<192u>(
+	cost_type *dest,
+	const feature_type *left,
+	const feature_type *right,
+	int width,
+	int height,
+	unsigned int p1,
+	const uint8_t *p2,
+	int p2_pitch,
+	const uint8_t* w,
+	int w_pitch,
+	int min_disp,
+	cudaStream_t stream);
+
 template void enqueue_aggregate_right2left_path<256u>(
 	cost_type *dest,
 	const feature_type *left,
diff --git a/lib/libsgm/src/oblique_path_aggregation.cu b/lib/libsgm/src/oblique_path_aggregation.cu
index 64dd9f947922bfbb250e6d47a4cdbfbf88de90fb..3734ebae8922a370ea51165c431bc6396b30f51b 100644
--- a/lib/libsgm/src/oblique_path_aggregation.cu
+++ b/lib/libsgm/src/oblique_path_aggregation.cu
@@ -243,6 +243,20 @@ template void enqueue_aggregate_upleft2downright_path<128u>(
 	int min_disp,
 	cudaStream_t stream);
 
+template void enqueue_aggregate_upleft2downright_path<192u>(
+	cost_type *dest,
+	const feature_type *left,
+	const feature_type *right,
+	int width,
+	int height,
+	unsigned int p1,
+	const uint8_t *p2,
+	int p2_pitch,
+	const uint8_t* w,
+	int w_pitch,
+	int min_disp,
+	cudaStream_t stream);
+
 template void enqueue_aggregate_upleft2downright_path<256u>(
 	cost_type *dest,
 	const feature_type *left,
@@ -285,6 +299,20 @@ template void enqueue_aggregate_upright2downleft_path<128u>(
 	int min_disp,
 	cudaStream_t stream);
 
+template void enqueue_aggregate_upright2downleft_path<192u>(
+	cost_type *dest,
+	const feature_type *left,
+	const feature_type *right,
+	int width,
+	int height,
+	unsigned int p1,
+	const uint8_t *p2,
+	int p2_pitch,
+	const uint8_t* w,
+	int w_pitch,
+	int min_disp,
+	cudaStream_t stream);
+
 template void enqueue_aggregate_upright2downleft_path<256u>(
 	cost_type *dest,
 	const feature_type *left,
@@ -327,6 +355,20 @@ template void enqueue_aggregate_downright2upleft_path<128u>(
 	int min_disp,
 	cudaStream_t stream);
 
+template void enqueue_aggregate_downright2upleft_path<192u>(
+	cost_type *dest,
+	const feature_type *left,
+	const feature_type *right,
+	int width,
+	int height,
+	unsigned int p1,
+	const uint8_t *p2,
+	int p2_pitch,
+	const uint8_t* w,
+	int w_pitch,
+	int min_disp,
+	cudaStream_t stream);
+
 template void enqueue_aggregate_downright2upleft_path<256u>(
 	cost_type *dest,
 	const feature_type *left,
@@ -369,6 +411,20 @@ template void enqueue_aggregate_downleft2upright_path<128u>(
 	int min_disp,
 	cudaStream_t stream);
 
+template void enqueue_aggregate_downleft2upright_path<192u>(
+	cost_type *dest,
+	const feature_type *left,
+	const feature_type *right,
+	int width,
+	int height,
+	unsigned int p1,
+	const uint8_t *p2,
+	int p2_pitch,
+	const uint8_t* w,
+	int w_pitch,
+	int min_disp,
+	cudaStream_t stream);
+
 template void enqueue_aggregate_downleft2upright_path<256u>(
 	cost_type *dest,
 	const feature_type *left,
diff --git a/lib/libsgm/src/path_aggregation.cu b/lib/libsgm/src/path_aggregation.cu
index ed7cdbee7878328c49649a6cbc3c002e91dbdef7..5661d7bee452eb6d28486152c8dfa7bd75e5ecd1 100644
--- a/lib/libsgm/src/path_aggregation.cu
+++ b/lib/libsgm/src/path_aggregation.cu
@@ -101,6 +101,7 @@ void PathAggregation<MAX_DISPARITY>::enqueue(
 
 template class PathAggregation< 64>;
 template class PathAggregation<128>;
+template class PathAggregation<192>;
 template class PathAggregation<256>;
 
 }
diff --git a/lib/libsgm/src/path_aggregation_common.hpp b/lib/libsgm/src/path_aggregation_common.hpp
index c897684e7003512ccd0c9e17d7a084a39319766b..5211c98103c26a27af51fbd91111043e16858d23 100644
--- a/lib/libsgm/src/path_aggregation_common.hpp
+++ b/lib/libsgm/src/path_aggregation_common.hpp
@@ -31,9 +31,9 @@ struct DynamicProgramming {
 	static_assert(
 		DP_BLOCK_SIZE >= 2,
 		"DP_BLOCK_SIZE must be greater than or equal to 2");
-	static_assert(
+	/*static_assert(
 		(SUBGROUP_SIZE & (SUBGROUP_SIZE - 1)) == 0,
-		"SUBGROUP_SIZE must be a power of 2");
+		"SUBGROUP_SIZE must be a power of 2");*/
 
 	uint32_t last_min;
 	uint32_t dp[DP_BLOCK_SIZE];
diff --git a/lib/libsgm/src/sgm.cu b/lib/libsgm/src/sgm.cu
index b13d13e0734874628d469bcb02dd9772ef08c50a..eb5d4179e0a11b964e8238bbdcfac40598917396 100644
--- a/lib/libsgm/src/sgm.cu
+++ b/lib/libsgm/src/sgm.cu
@@ -158,6 +158,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::enqueue(
 
 template class SemiGlobalMatching<uint8_t,   64>;
 template class SemiGlobalMatching<uint8_t,  128>;
+template class SemiGlobalMatching<uint8_t,  192>;
 template class SemiGlobalMatching<uint8_t,  256>;
 template class SemiGlobalMatching<uint16_t,  64>;
 template class SemiGlobalMatching<uint16_t, 128>;
diff --git a/lib/libsgm/src/stereo_sgm.cpp b/lib/libsgm/src/stereo_sgm.cpp
index 58ba52284b1caa5c3d50b3e85e084a91fb7cb9c3..70e1263147fbe3ab4eefb948d591e8506a3be40b 100644
--- a/lib/libsgm/src/stereo_sgm.cpp
+++ b/lib/libsgm/src/stereo_sgm.cpp
@@ -63,6 +63,8 @@ namespace sgm {
 				sgm_engine = new SemiGlobalMatchingImpl<uint8_t, 64>();
 			else if (input_depth_bits_ == 8 && disparity_size_ == 128)
 				sgm_engine = new SemiGlobalMatchingImpl<uint8_t, 128>();
+			else if (input_depth_bits_ == 8 && disparity_size_ == 192)
+				sgm_engine = new SemiGlobalMatchingImpl<uint8_t, 192>();
 			else if (input_depth_bits_ == 8 && disparity_size_ == 256)
 				sgm_engine = new SemiGlobalMatchingImpl<uint8_t, 256>();
 			else if (input_depth_bits_ == 16 && disparity_size_ == 64)
@@ -133,9 +135,9 @@ namespace sgm {
 			width_ = height_ = input_depth_bits_ = output_depth_bits_ = disparity_size_ = 0;
 			throw std::logic_error("depth bits must be 8 or 16");
 		}
-		if (disparity_size_ != 64 && disparity_size_ != 128 && disparity_size_ != 256) {
+		if (disparity_size_ != 64 && disparity_size_ != 128 && disparity_size_ != 192 && disparity_size_ != 256) {
 			width_ = height_ = input_depth_bits_ = output_depth_bits_ = disparity_size_ = 0;
-			throw std::logic_error("disparity size must be 64, 128 or 256");
+			throw std::logic_error("disparity size must be 64, 128, 192 or 256");
 		}
 		if (param.subpixel && output_depth_bits != 16) {
 			width_ = height_ = input_depth_bits_ = output_depth_bits_ = disparity_size_ = 0;
diff --git a/lib/libsgm/src/utility.hpp b/lib/libsgm/src/utility.hpp
index 17b9f45f5375e1756f581a2d7fd3e72e5f588d60..bbd5dc328a878c241c9ceb4b3a57ff3162239794 100644
--- a/lib/libsgm/src/utility.hpp
+++ b/lib/libsgm/src/utility.hpp
@@ -189,6 +189,14 @@ __device__ inline void load_uint16_vector<4u>(uint32_t *dest, const uint16_t *pt
 	dest[0] = uint16x4.x; dest[1] = uint16x4.y; dest[2] = uint16x4.z; dest[3] = uint16x4.w;
 }
 
+template <>
+__device__ inline void load_uint16_vector<6u>(uint32_t *dest, const uint16_t *ptr){
+	const auto uint32x3 = load_as<uint3>(ptr);
+	load_uint16_vector<2u>(dest + 0, reinterpret_cast<const uint16_t *>(&uint32x3.x));
+	load_uint16_vector<2u>(dest + 2, reinterpret_cast<const uint16_t *>(&uint32x3.y));
+	load_uint16_vector<2u>(dest + 4, reinterpret_cast<const uint16_t *>(&uint32x3.z));
+}
+
 template <>
 __device__ inline void load_uint16_vector<8u>(uint32_t *dest, const uint16_t *ptr){
 	const auto uint32x4 = load_as<uint4>(ptr);
diff --git a/lib/libsgm/src/vertical_path_aggregation.cu b/lib/libsgm/src/vertical_path_aggregation.cu
index 9bf52977431228dca2b063a3c8201bbb380c9e3c..7f705646039d5a223ca967ecebdcdd70bb6b9667 100644
--- a/lib/libsgm/src/vertical_path_aggregation.cu
+++ b/lib/libsgm/src/vertical_path_aggregation.cu
@@ -206,6 +206,20 @@ template void enqueue_aggregate_up2down_path<256u>(
 	int min_disp,
 	cudaStream_t stream);
 
+template void enqueue_aggregate_up2down_path<192u>(
+	cost_type *dest,
+	const feature_type *left,
+	const feature_type *right,
+	int width,
+	int height,
+	unsigned int p1,
+	const uint8_t *p2,
+	int p2_pitch,
+	const uint8_t* w,
+	int w_pitch,
+	int min_disp,
+	cudaStream_t stream);
+
 template void enqueue_aggregate_down2up_path<64u>(
 	cost_type *dest,
 	const feature_type *left,
@@ -234,6 +248,20 @@ template void enqueue_aggregate_down2up_path<128u>(
 	int min_disp,
 	cudaStream_t stream);
 
+template void enqueue_aggregate_down2up_path<192u>(
+	cost_type *dest,
+	const feature_type *left,
+	const feature_type *right,
+	int width,
+	int height,
+	unsigned int p1,
+	const uint8_t *p2,
+	int p2_pitch,
+	const uint8_t* w,
+	int w_pitch,
+	int min_disp,
+	cudaStream_t stream);
+
 template void enqueue_aggregate_down2up_path<256u>(
 	cost_type *dest,
 	const feature_type *left,
diff --git a/lib/libsgm/src/winner_takes_all.cu b/lib/libsgm/src/winner_takes_all.cu
index 1e82fcb65f0f7c139d8bcfeea6b931c0e0fd63ff..ea53af6c330d18bb203bfa94ce6a59e3a45c7159 100644
--- a/lib/libsgm/src/winner_takes_all.cu
+++ b/lib/libsgm/src/winner_takes_all.cu
@@ -303,6 +303,7 @@ void WinnerTakesAll<MAX_DISPARITY>::enqueue(
 
 template class WinnerTakesAll< 64>;
 template class WinnerTakesAll<128>;
+template class WinnerTakesAll<192>;
 template class WinnerTakesAll<256>;
 
 }