diff --git a/components/operators/include/ftl/operators/disparity.hpp b/components/operators/include/ftl/operators/disparity.hpp
index 4aba9bbd8e179c86ddc0d6305fb14b3579a6fb23..17993f24c5e1778957cc7f3f29762ccaa611b0cc 100644
--- a/components/operators/include/ftl/operators/disparity.hpp
+++ b/components/operators/include/ftl/operators/disparity.hpp
@@ -12,6 +12,7 @@
 
 #ifdef HAVE_LIBSGM
 #include <libsgm.h>
+#include <libsgm_parameters.hpp>
 #include <opencv2/cudaimgproc.hpp>
 #endif
 
@@ -85,6 +86,7 @@ class FixstarsSGM : public ftl::operators::Operator {
 	int max_disp_;
 	float uniqueness_;
 	bool use_P2_map_;
+	sgm::CensusShape ct_shape_;
 };
 #endif
 
diff --git a/components/operators/src/disparity/fixstars_sgm.cpp b/components/operators/src/disparity/fixstars_sgm.cpp
index acd468f249afbed33b83ab36422dd8bede9ea8b2..60f4332dc5bc59d6e5cd0b0084542e9b1b66adda 100644
--- a/components/operators/src/disparity/fixstars_sgm.cpp
+++ b/components/operators/src/disparity/fixstars_sgm.cpp
@@ -64,6 +64,7 @@ FixstarsSGM::FixstarsSGM(ftl::operators::Graph *g, ftl::Configurable* cfg) :
 	P1_ = cfg->value("P1", 10);
 	P2_ = cfg->value("P2", 120);
 	max_disp_ = cfg->value("num_disp", 256);
+	ct_shape_ = static_cast<sgm::CensusShape>(cfg->value("ct_shape", 2));
 
 	if (uniqueness_ < 0.0 || uniqueness_ > 1.0) {
 		uniqueness_ = 1.0;
@@ -118,6 +119,11 @@ FixstarsSGM::FixstarsSGM(ftl::operators::Graph *g, ftl::Configurable* cfg) :
 		}
 	});
 
+	cfg->on("ct_shape", [this, cfg]() {
+		ct_shape_ = static_cast<sgm::CensusShape>(cfg->value("ct_shape", 2));
+		updateParameters();
+	});
+
 	updateP2Parameters();
 
 	cfg->on("canny_low", [this, cfg]() {
@@ -159,7 +165,7 @@ bool FixstarsSGM::init() {
 bool FixstarsSGM::updateParameters() {
 	if (ssgm_ == nullptr) { return false; }
 	return this->ssgm_->updateParameters(
-		sgm::StereoSGM::Parameters(P1_, P2_, uniqueness_, true));
+		sgm::StereoSGM::Parameters(P1_, P2_, uniqueness_, true, ct_shape_));
 }
 
 bool FixstarsSGM::updateP2Parameters() {
diff --git a/lib/libsgm/include/libsgm.h b/lib/libsgm/include/libsgm.h
index bf2c58ea759181d3294d9cb717fd7e65b2b6d60a..653a2a5df38d6957e2942b16313538120becc9d7 100644
--- a/lib/libsgm/include/libsgm.h
+++ b/lib/libsgm/include/libsgm.h
@@ -28,6 +28,7 @@ limitations under the License.
 
 #include <stdint.h>
 #include "libsgm_config.h"
+#include "libsgm_parameters.hpp"
 #include <cuda_runtime.h>
 
 #if defined(LIBSGM_SHARED)
@@ -71,7 +72,8 @@ namespace sgm {
 			int P2;
 			float uniqueness;
 			bool subpixel;
-			Parameters(int P1 = 10, int P2 = 120, float uniqueness = 0.95f, bool subpixel = false) : P1(P1), P2(P2), uniqueness(uniqueness), subpixel(subpixel) {}
+			CensusShape ct_shape;
+			Parameters(int P1 = 10, int P2 = 120, float uniqueness = 0.95f, bool subpixel = false, CensusShape ct_shape = CensusShape::CIRCLE_3) : P1(P1), P2(P2), uniqueness(uniqueness), subpixel(subpixel), ct_shape(ct_shape) {}
 		};
 
 		/**
diff --git a/lib/libsgm/include/libsgm_parameters.hpp b/lib/libsgm/include/libsgm_parameters.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..23113a213fc1663cf83777db611bfbb07afc308a
--- /dev/null
+++ b/lib/libsgm/include/libsgm_parameters.hpp
@@ -0,0 +1,9 @@
+#pragma once
+
+namespace sgm {
+	enum class CensusShape {
+		CT_5X5=0,
+		CS_CT_9X7,
+		CIRCLE_3
+	};
+}
diff --git a/lib/libsgm/src/census_transform.cu b/lib/libsgm/src/census_transform.cu
index d437f978e76300587e4b06e2d7954420a35dd1a2..f46a985577bb7070052eaff1af67fc8e85696781 100644
--- a/lib/libsgm/src/census_transform.cu
+++ b/lib/libsgm/src/census_transform.cu
@@ -21,14 +21,11 @@ namespace sgm {
 
 namespace {
 
-static constexpr int WINDOW_WIDTH  = 5;
-static constexpr int WINDOW_HEIGHT = 5;
-
 static constexpr int BLOCK_SIZE = 128;
 static constexpr int LINES_PER_BLOCK = 16;
 
 /* Centre symmetric census */
-template <typename T>
+template <typename T, int WINDOW_WIDTH, int WINDOW_HEIGHT>
 __global__ void cs_census_transform_kernel(
 	feature_type *dest,
 	const T *src,
@@ -104,7 +101,7 @@ __global__ void cs_census_transform_kernel(
 	}
 }
 
-template <typename T>
+template <typename T, int WINDOW_WIDTH, int WINDOW_HEIGHT>
 __global__ void census_transform_kernel(
 	feature_type* __restrict__ dest,
 	const T* __restrict__ src,
@@ -138,6 +135,72 @@ __global__ void census_transform_kernel(
 	if (x < width && y < height) dest[x+y*width] = res;
 }
 
+template <typename T>
+__global__ void circle_ct_3_kernel(
+	feature_type* __restrict__ dest,
+	const T* __restrict__ src,
+	int width,
+	int height,
+	int pitch)
+{
+	static constexpr int RADIUS_X = 3;
+	static constexpr int RADIUS_Y = 3;
+
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x);
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	feature_type res = 0;
+
+	if (x >= RADIUS_X && y >= RADIUS_Y && x < width-RADIUS_X && y < height-RADIUS_Y) {
+		const T center = src[y*pitch+x];
+
+		int yix = y*pitch+x;
+		res = (res << 1) | (center < (src[yix-3]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix-2]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix-1]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix+1]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix+2]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix+3]) ? 1 : 0);
+
+		yix = (y-1)*pitch+x;
+		res = (res << 1) | (center < (src[yix-2]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix-1]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix+1]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix+2]) ? 1 : 0);
+
+		yix = (y-2)*pitch+x;
+		res = (res << 1) | (center < (src[yix-2]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix-1]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix+1]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix+2]) ? 1 : 0);
+
+		yix = (y-3)*pitch+x;
+		res = (res << 1) | (center < (src[yix]) ? 1 : 0);
+
+		yix = (y+1)*pitch+x;
+		res = (res << 1) | (center < (src[yix-2]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix-1]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix+1]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix+2]) ? 1 : 0);
+
+		yix = (y+2)*pitch+x;
+		res = (res << 1) | (center < (src[yix-2]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix-1]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix+1]) ? 1 : 0);
+		res = (res << 1) | (center < (src[yix+2]) ? 1 : 0);
+
+		yix = (y+3)*pitch+x;
+		res = (res << 1) | (center < (src[yix]) ? 1 : 0);
+	}
+
+	// FIXME: Should use feature pitch, not width.
+	if (x < width && y < height) dest[x+y*width] = res;
+}
+
 template <typename T>
 void enqueue_census_transform(
 	feature_type *dest,
@@ -145,24 +208,32 @@ void enqueue_census_transform(
 	int width,
 	int height,
 	int pitch,
+	sgm::CensusShape ct_shape,
 	cudaStream_t stream)
 {
 	/* Disable the original center symmetric algorithm */
-	if (false) {
-		const int width_per_block = BLOCK_SIZE - WINDOW_WIDTH + 1;
+	if (ct_shape == sgm::CensusShape::CS_CT_9X7) {
+		const int width_per_block = BLOCK_SIZE - 9 + 1;
 		const int height_per_block = LINES_PER_BLOCK;
 		const dim3 gdim(
 			(width  + width_per_block  - 1) / width_per_block,
 			(height + height_per_block - 1) / height_per_block);
 		const dim3 bdim(BLOCK_SIZE);
-		cs_census_transform_kernel<<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch);
-	} else {
+		cs_census_transform_kernel<T, 9, 7><<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch);
+	} else if (ct_shape == sgm::CensusShape::CT_5X5) {
+		static constexpr int THREADS_X = 16;
+		static constexpr int THREADS_Y = 16;
+
+		const dim3 gdim((width + THREADS_X - 1)/THREADS_X, (height + THREADS_Y - 1)/THREADS_Y);
+		const dim3 bdim(THREADS_X, THREADS_Y);
+		census_transform_kernel<T, 5, 5><<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch);
+	} else if (ct_shape == sgm::CensusShape::CIRCLE_3) {
 		static constexpr int THREADS_X = 16;
 		static constexpr int THREADS_Y = 16;
 
 		const dim3 gdim((width + THREADS_X - 1)/THREADS_X, (height + THREADS_Y - 1)/THREADS_Y);
 		const dim3 bdim(THREADS_X, THREADS_Y);
-		census_transform_kernel<<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch);
+		circle_ct_3_kernel<<<gdim, bdim, 0, stream>>>(dest, src, width, height, pitch);
 	}
 }
 
@@ -180,13 +251,14 @@ void CensusTransform<T>::enqueue(
 	int width,
 	int height,
 	int pitch,
+	sgm::CensusShape ct_shape,
 	cudaStream_t stream)
 {
 	if(m_feature_buffer.size() < static_cast<size_t>(width * height)){
 		m_feature_buffer = DeviceBuffer<feature_type>(width * height);
 	}
 	enqueue_census_transform(
-		m_feature_buffer.data(), src, width, height, pitch, stream);
+		m_feature_buffer.data(), src, width, height, pitch, ct_shape, stream);
 }
 
 template class CensusTransform<uint8_t>;
diff --git a/lib/libsgm/src/census_transform.hpp b/lib/libsgm/src/census_transform.hpp
index 8a80b903b4b20152c0c8fad7fada8032d60a1565..23c1ebd3fb4d1147a8d5775c5120dcfd1ffbf467 100644
--- a/lib/libsgm/src/census_transform.hpp
+++ b/lib/libsgm/src/census_transform.hpp
@@ -19,6 +19,7 @@ limitations under the License.
 
 #include "device_buffer.hpp"
 #include "types.hpp"
+#include "libsgm_parameters.hpp"
 
 namespace sgm {
 
@@ -43,6 +44,7 @@ public:
 		int width,
 		int height,
 		int pitch,
+		sgm::CensusShape ct_shape,
 		cudaStream_t stream);
 
 };
diff --git a/lib/libsgm/src/sgm.cu b/lib/libsgm/src/sgm.cu
index eb5d4179e0a11b964e8238bbdcfac40598917396..0adf22301a9c9a8784475b2e881c7ab5e07f72c5 100644
--- a/lib/libsgm/src/sgm.cu
+++ b/lib/libsgm/src/sgm.cu
@@ -58,12 +58,13 @@ public:
 		float uniqueness,
 		bool subpixel,
 		int min_disp,
+		sgm::CensusShape ct_shape,
 		cudaStream_t stream)
 	{
 		m_census_left.enqueue(
-			src_left, width, height, src_pitch, stream);
+			src_left, width, height, src_pitch, ct_shape, stream);
 		m_census_right.enqueue(
-			src_right, width, height, src_pitch, stream);
+			src_right, width, height, src_pitch, ct_shape, stream);
 		m_path_aggregation.enqueue(
 			m_census_left.get_output(),
 			m_census_right.get_output(),
@@ -109,6 +110,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::execute(
 	float uniqueness,
 	bool subpixel,
 	int min_disp,
+	sgm::CensusShape ct_shape,
 	cudaStream_t stream)
 {
 	m_impl->enqueue(
@@ -119,7 +121,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::execute(
 		penalty1, penalty2,
 		weights, weights_pitch,
 		uniqueness, subpixel,
-		min_disp,
+		min_disp, ct_shape,
 		stream);
 	//cudaStreamSynchronize(0);
 }
@@ -141,6 +143,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::enqueue(
 	float uniqueness,
 	bool subpixel,
 	int min_disp,
+	sgm::CensusShape ct_shape,
 	cudaStream_t stream)
 {
 	m_impl->enqueue(
@@ -151,7 +154,7 @@ void SemiGlobalMatching<T, MAX_DISPARITY>::enqueue(
 		penalty1, penalty2,
 		weights, weights_pitch,
 		uniqueness, subpixel,
-		min_disp,
+		min_disp, ct_shape,
 		stream);
 }
 
diff --git a/lib/libsgm/src/sgm.hpp b/lib/libsgm/src/sgm.hpp
index 9aa2cd387782465c3398aab5cda6a8b6a292018d..c792c4a6ebb690708f5dd8fe99b78205411962d8 100644
--- a/lib/libsgm/src/sgm.hpp
+++ b/lib/libsgm/src/sgm.hpp
@@ -20,6 +20,7 @@ limitations under the License.
 #include <memory>
 #include <cstdint>
 #include "types.hpp"
+#include "libsgm_parameters.hpp"
 
 namespace sgm {
 
@@ -54,6 +55,7 @@ public:
 		float uniqueness,
 		bool subpixel,
 		int min_disp,
+		sgm::CensusShape ct_shape,
 		cudaStream_t stream);
 
 	void enqueue(
@@ -72,6 +74,7 @@ public:
 		float uniqueness,
 		bool subpixel,
 		int min_disp,
+		sgm::CensusShape ct_shape,
 		cudaStream_t stream);
 
 };
diff --git a/lib/libsgm/src/stereo_sgm.cpp b/lib/libsgm/src/stereo_sgm.cpp
index 70e1263147fbe3ab4eefb948d591e8506a3be40b..6e7c0bf18b99de45a5d5351cb6bb62c45a639fe5 100644
--- a/lib/libsgm/src/stereo_sgm.cpp
+++ b/lib/libsgm/src/stereo_sgm.cpp
@@ -29,7 +29,7 @@ namespace sgm {
 	public:
 		using output_type = sgm::output_type;
 		virtual void execute(output_type* dst_L, output_type* dst_R, const void* src_L, const void* src_R,
-			int w, int h, int sp, int dp, unsigned int P1, const uint8_t *P2, const uint8_t *weights, int weights_pitch, float uniqueness, bool subpixel, int min_disp, cudaStream_t stream) = 0;
+			int w, int h, int sp, int dp, unsigned int P1, const uint8_t *P2, const uint8_t *weights, int weights_pitch, float uniqueness, bool subpixel, int min_disp, sgm::CensusShape ct_shape, cudaStream_t stream) = 0;
 
 		virtual ~SemiGlobalMatchingBase() {}
 	};
@@ -38,9 +38,9 @@ namespace sgm {
 	class SemiGlobalMatchingImpl : public SemiGlobalMatchingBase {
 	public:
 		void execute(output_type* dst_L, output_type* dst_R, const void* src_L, const void* src_R,
-			int w, int h, int sp, int dp, unsigned int P1, const uint8_t *P2, const uint8_t *weights, int weights_pitch, float uniqueness, bool subpixel, int min_disp, cudaStream_t stream) override
+			int w, int h, int sp, int dp, unsigned int P1, const uint8_t *P2, const uint8_t *weights, int weights_pitch, float uniqueness, bool subpixel, int min_disp, sgm::CensusShape ct_shape, cudaStream_t stream) override
 		{
-			sgm_engine_.execute(dst_L, dst_R, (const input_type*)src_L, (const input_type*)src_R, w, h, sp, dp, P1, P2, weights, weights_pitch, uniqueness, subpixel, min_disp, stream);
+			sgm_engine_.execute(dst_L, dst_R, (const input_type*)src_L, (const input_type*)src_R, w, h, sp, dp, P1, P2, weights, weights_pitch, uniqueness, subpixel, min_disp, ct_shape, stream);
 		}
 	private:
 		SemiGlobalMatching<input_type, DISP_SIZE> sgm_engine_;
@@ -176,7 +176,7 @@ namespace sgm {
 			d_left_disp = dst; // when threre is no device-host copy or type conversion, use passed buffer
 
 		cu_res_->sgm_engine->execute((uint16_t*)d_tmp_left_disp, (uint16_t*)d_tmp_right_disp,
-			d_input_left, d_input_right, width, height, src_pitch, dst_pitch, param_.P1, P2, weights, weights_pitch, param_.uniqueness, param_.subpixel, min_disp, stream);
+			d_input_left, d_input_right, width, height, src_pitch, dst_pitch, param_.P1, P2, weights, weights_pitch, param_.uniqueness, param_.subpixel, min_disp, param_.ct_shape, stream);
 
 		sgm::details::median_filter((uint16_t*)d_tmp_left_disp, (uint16_t*)d_left_disp, width, height, dst_pitch, stream);
 		sgm::details::median_filter((uint16_t*)d_tmp_right_disp, (uint16_t*)d_right_disp, width, height, dst_pitch, stream);