diff --git a/lib/libstereo/CMakeLists.txt b/lib/libstereo/CMakeLists.txt
index 64e19cd84f2ec398fad46ab9c1c64e751bbeab17..47a6aa69fe65fc5c1d542c47ea5f4721658fb2ff 100644
--- a/lib/libstereo/CMakeLists.txt
+++ b/lib/libstereo/CMakeLists.txt
@@ -46,7 +46,8 @@ if (LIBSTEREO_SHARED)
                 src/algorithms/hwcensussgm.cu
                 src/algorithms/brefcensussgm.cu
                 src/algorithms/meancensussgm.cu
-                src/algorithms/hstablesgm.cu
+				src/algorithms/hstablesgm.cu
+				src/algorithms/clustersf.cu
                 #src/stereo_hier_census.cu
                 src/stereo_wcensussgm.cu
                 src/stereo_census_adaptive.cu
@@ -82,7 +83,8 @@ else()
                 src/algorithms/hwcensussgm.cu
                 src/algorithms/brefcensussgm.cu
                 src/algorithms/meancensussgm.cu
-                src/algorithms/hstablesgm.cu
+				src/algorithms/hstablesgm.cu
+				src/algorithms/clustersf.cu
                 #src/stereo_hier_census.cu
                 src/stereo_wcensussgm.cu
                 src/stereo_census_adaptive.cu
diff --git a/lib/libstereo/include/stereo.hpp b/lib/libstereo/include/stereo.hpp
index c596986b02bf3243f15a4f00db4b919a2fa062db..d0a2e6e928ea07be880027ab73f885c549746b6c 100644
--- a/lib/libstereo/include/stereo.hpp
+++ b/lib/libstereo/include/stereo.hpp
@@ -188,6 +188,25 @@ private:
 	Impl *impl_;
 };
 
+/* Clustered Salient Features with focal point */
+class StereoCSF {
+public:
+	StereoCSF();
+	~StereoCSF();
+
+	void compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity);
+	void setPrior(cv::InputArray disp) {};
+
+	struct Parameters {
+		bool debug = false;
+	};
+	Parameters params;
+
+private:
+	struct Impl;
+	Impl *impl_;
+};
+
 /**
  * STABLE Binary descriptor. This is a general implementation.
  * 
diff --git a/lib/libstereo/middlebury/algorithms.hpp b/lib/libstereo/middlebury/algorithms.hpp
index 604f0f57c73529ffa5ae6b88b9a42465f6e51a3f..7c0d2ca4bf9a7f51d6ece71629dd9ab754fd0be1 100644
--- a/lib/libstereo/middlebury/algorithms.hpp
+++ b/lib/libstereo/middlebury/algorithms.hpp
@@ -32,6 +32,15 @@ namespace Impl {
 		}
 	};
 
+	struct ClusterSF : public Algorithm {
+		ClusterSF() {  }
+
+		virtual void run(const MiddleburyData &data, cv::Mat &disparity) override {
+			StereoCSF stereo;
+			stereo.compute(data.imL, data.imR, disparity);
+		}
+	};
+
 	struct ECensusSGM : public Algorithm {
 		ECensusSGM() { P1 = 8.0f; P2 = 40.0f; }  // Tuned to total error 2.0
 
@@ -341,6 +350,7 @@ namespace Impl {
 
 static const std::map<std::string, Algorithm*> algorithms = {
 	{ "censussgm", new Impl::CensusSGM() },
+	{ "cluster", new Impl::ClusterSF() },
 	{ "mcensussgm", new Impl::MeanCensusSGM() },
 	{ "gcensussgm", new Impl::GCensusSGM() },
 	{ "ecensussgm", new Impl::ECensusSGM() },
diff --git a/lib/libstereo/src/algorithms/clustersf.cu b/lib/libstereo/src/algorithms/clustersf.cu
new file mode 100644
index 0000000000000000000000000000000000000000..454a42b3cb3db9b5f579160b3255c2b638dee260
--- /dev/null
+++ b/lib/libstereo/src/algorithms/clustersf.cu
@@ -0,0 +1,52 @@
+#include "stereo.hpp"
+#include "stereosgm.hpp"
+#include "../filters/salient_gradient.hpp"
+
+#include <opencv2/highgui.hpp>
+
+struct StereoCSF::Impl {
+	Array2D<uchar> l;
+	Array2D<uchar> r;
+	Array2D<uchar> gl;
+	Array2D<uchar> gr;
+	Array2D<uchar> temp;
+
+	Impl(int width, int height) :
+		l(width, height), r(width, height),
+		gl(width, height), gr(width, height), temp(width, height) {}
+};
+
+StereoCSF::StereoCSF() : impl_(nullptr) {
+	impl_ = new Impl(0, 0);
+}
+
+void StereoCSF::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disparity) {
+
+	//cudaSetDevice(0);
+
+	if (l.rows() != impl_->l.height || r.cols() != impl_->l.width) {
+		delete impl_; impl_ = nullptr;
+		impl_ = new Impl(l.cols(), l.rows());
+	}
+
+	mat2gray(l, impl_->l);
+	mat2gray(r, impl_->r);
+
+	SalientGradient sg = {impl_->l.data(), impl_->gl.data(), impl_->temp.data(), impl_->l.width, impl_->l.height};
+	parallel1DWarp(sg, l.rows(), l.cols());
+
+	cv::Mat tmp;
+	impl_->gl.toGpuMat().download(tmp);
+	cv::imshow("Gradients", tmp);
+
+	cudaSafeCall(cudaDeviceSynchronize());
+	disparity.create(l.rows(), l.cols(), CV_32F);
+}
+
+StereoCSF::~StereoCSF() {
+	if (impl_) {
+		delete impl_;
+		impl_ = nullptr;
+	}
+}
+
diff --git a/lib/libstereo/src/filters/salient_gradient.hpp b/lib/libstereo/src/filters/salient_gradient.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..fd521d1742d886c2a26ce7552dd9473ea72e8d2d
--- /dev/null
+++ b/lib/libstereo/src/filters/salient_gradient.hpp
@@ -0,0 +1,56 @@
+#ifndef _FTL_LIBSTEREO_FILTERS_SALIENT_GRADIENT_HPP_
+#define _FTL_LIBSTEREO_FILTERS_SALIENT_GRADIENT_HPP_
+
+#include "../util.hpp"
+#include "../array2d.hpp"
+
+struct SalientGradient {
+	Array2D<uchar>::Data image;
+	Array2D<uchar>::Data output;
+	Array2D<uchar>::Data magnitude;
+
+	int width, height;
+
+	__cuda__ inline float2 calculateGradient(int x, int y) {
+		if (x < 1 || y < 1 || x >= width-1 || y >= height-1) return make_float2(0,0);
+
+		float dx = -1.0f*float(image(y-1,x-1)) + -2.0f*float(image(y, x-1)) + -1*float(image(y+1, x-1)) +
+             float(image(y-1, x+1)) + 2.0f*float(image(y, x+1)) + float(image(y+1, x+1));
+        float dy = float(image(y-1, x-1)) + 2.0f*float(image(y-1, x)) + float(image(y-1, x+1)) +
+             -1.0f*float(image(y+1, x-1)) + -2.0f*float(image(y+1, x)) + -1.0f*float(image(y+1, x+1));
+
+        float g = sqrt( (dx*dx) + (dy*dy) );
+		float a = atan2(dy, dx);
+		return make_float2(g,a);
+	}
+
+	__device__ void operator()(ushort2 thread, ushort2 stride, ushort2 size) {
+		static const float PI = 3.14f;
+		static constexpr float PI2 = PI/2.0f;
+
+		for (int y=thread.y; y<size.y; y+=stride.y) {
+			for (int x=thread.x; x<size.x; x+=stride.x) {
+				auto g = calculateGradient(x,y);
+				output(y,x) = uchar((g.y+PI2) / PI * 255.0f);
+				magnitude(y,x) = uchar(g.x);
+			}
+
+			__syncwarp();
+
+			// Apply threshold
+			for (int x=thread.x; x<size.x; x+=stride.x) {
+				uchar thresh = 0;
+				for (int u=-15; u<=15; ++u) {
+					thresh = (x+u >= 0 && x+u < width) ? max(thresh, magnitude(y,x+u)-abs(u)) : thresh;
+				}
+
+				uchar m = magnitude(y,x);
+				if (m < thresh) output(y,x) = 0;
+			}
+
+			// Next step would be to bucket the results
+		}
+	}
+};
+
+#endif