diff --git a/lib/libstereo/src/algorithms/clustersf.cu b/lib/libstereo/src/algorithms/clustersf.cu
index 273025d75485fdf9a535445762633de9e53c35b6..a00a1d6dacb56111c2bdd42bad75c63e10525c4f 100644
--- a/lib/libstereo/src/algorithms/clustersf.cu
+++ b/lib/libstereo/src/algorithms/clustersf.cu
@@ -1,6 +1,7 @@
 #include "stereo.hpp"
 #include "stereosgm.hpp"
 #include "../filters/salient_gradient.hpp"
+#include "../filters/focal_cluster.hpp"
 
 #include <opencv2/highgui.hpp>
 #include <opencv2/imgproc.hpp>
@@ -13,11 +14,12 @@ struct StereoCSF::Impl {
 	Array2D<uchar> temp;
 	Bucket1D<short2, 64> buckets_l;
 	Bucket2D<ushort, 64> buckets_r;
+	Array1D<int> focal;
 
 	Impl(int width, int height) :
 		l(width, height), r(width, height),
 		gl(width, height), gr(width, height), temp(width, height),
-		buckets_l(height), buckets_r(16, height) {}
+		buckets_l(height), buckets_r(16, height), focal(1024) {}
 };
 
 StereoCSF::StereoCSF() : impl_(nullptr) {
@@ -41,10 +43,13 @@ void StereoCSF::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray disp
 	SalientGradientGrouped sgr = {impl_->r.data(), impl_->gr.data(), impl_->temp.data(), impl_->buckets_r.data(), impl_->r.width, impl_->r.height};
 	parallel1DWarpSM(sgr, r.rows(), r.cols());
 
+	FocalCluster fc = {make_short2(300, 300), impl_->buckets_l.data(), impl_->buckets_r.data(), impl_->focal.data(), 1024};
+	parallel1DWarp(fc, l.rows(), 1);
+
 	cv::Mat tmp;
-	impl_->buckets_r.toGpuMat().download(tmp);
-	tmp.convertTo(tmp, CV_8UC1, 4.0);
-	cv::resize(tmp,tmp, cv::Size(tmp.cols*40, tmp.rows/2));
+	impl_->focal.toGpuMat().download(tmp);
+	tmp.convertTo(tmp, CV_8UC1, 0.1);
+	cv::resize(tmp,tmp, cv::Size(tmp.cols, 100));
 	cv::applyColorMap(tmp, tmp, cv::COLORMAP_TURBO);
 	cv::imshow("Gradients Right", tmp);
 
diff --git a/lib/libstereo/src/array1d.hpp b/lib/libstereo/src/array1d.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a2bdb80e3aa295df98505fe32814493e1e67ed55
--- /dev/null
+++ b/lib/libstereo/src/array1d.hpp
@@ -0,0 +1,137 @@
+#ifndef _FTL_LIBSTEREO_ARRAY1D_HPP_
+#define _FTL_LIBSTEREO_ARRAY1D_HPP_
+
+#include "memory.hpp"
+
+template<typename T>
+class Array1D {
+public:
+	Array1D() : width(0), needs_free_(false) {
+		data_.data = nullptr;
+	}
+
+	Array1D(int w) : width(w), needs_free_(true) {
+		data_.data = allocateMemory<T>(w);
+	}
+
+	/*explicit Array1D(cv::Mat &m) : needs_free_(false) {
+		#ifdef USE_GPU
+		create(m.cols, m.rows);
+		cudaSafeCall(cudaMemcpy2D(data_.data, data_.pitch*sizeof(T), m.data, m.step, width*sizeof(T), height, cudaMemcpyHostToDevice));
+		#else
+		needs_free_ = false;
+		data_.data = (T*)m.data;
+		data_.pitch = m.step / sizeof(T);
+		width = m.cols;
+		height = m.rows;
+		#endif
+	}
+
+	explicit Array2D(cv::cuda::GpuMat &m) : needs_free_(false) {
+		#ifdef USE_GPU
+		needs_free_ = false;
+		data_.data = (T*)m.data;
+		data_.pitch = m.step / sizeof(T);
+		width = m.cols;
+		height = m.rows;
+		#else
+		create(m.cols, m.rows);
+		cudaSafeCall(cudaMemcpy2D(data_.data, data_.pitch*sizeof(T), m.data, m.step, width*sizeof(T), height, cudaMemcpyDeviceToHost));
+		#endif
+	}*/
+
+	~Array1D() {
+		free();
+	}
+
+	void free() {
+		if (needs_free_ && data_.data) freeMemory(data_.data);
+	}
+
+	Array1D<T> &operator=(const Array1D<T> &c) {
+		data_ = c.data_;
+		width = c.width;
+		needs_free_ = false;
+		return *this;
+	}
+
+	struct Data {
+		__host__ __device__ inline T& operator() (const int x) {
+			return data[x];
+		}
+
+		__host__ __device__ inline const T& operator() (const int x) const {
+			return data[x];
+		}
+
+		T *data;
+	};
+
+	void create(int w) {
+		if (w == width) return;
+		width = w;
+		free();
+		needs_free_ = true;
+		data_.data = allocateMemory<T>(w);
+	}
+
+	inline Data &data() { return data_; }
+	inline const Data &data() const { return data_; }
+
+	void toMat(cv::Mat &m) {
+		#ifdef USE_GPU
+		cv::cuda::GpuMat gm;
+		toGpuMat(gm);
+		gm.download(m);
+		#else
+		m = cv::Mat(1, width, cv::traits::Type<T>::value, data_.data);
+		#endif
+	}
+
+	cv::Mat toMat() {
+		#ifdef USE_GPU
+		cv::cuda::GpuMat gm;
+		toGpuMat(gm);
+		cv::Mat m;
+		gm.download(m);
+		return m;
+		#else
+		return cv::Mat(1, width, cv::traits::Type<T>::value, data_.data);
+		#endif
+	}
+
+	const cv::Mat toMat() const {
+		#ifdef USE_GPU
+		cv::cuda::GpuMat gm(1, width, cv::traits::Type<T>::value, (void*)data_.data);
+		cv::Mat m;
+		gm.download(m);
+		return m;
+		#else
+		return cv::Mat(1, width, cv::traits::Type<T>::value, data_.data);
+		#endif
+	}
+
+	void toGpuMat(cv::cuda::GpuMat &m) {
+		#ifdef USE_GPU
+		m = cv::cuda::GpuMat(1, width, cv::traits::Type<T>::value, (void*)data_.data);
+		#else
+		// TODO
+		#endif
+	}
+
+	cv::cuda::GpuMat toGpuMat() {
+		#ifdef USE_GPU
+		return cv::cuda::GpuMat(1, width, cv::traits::Type<T>::value, (void*)data_.data);
+		#else
+		return cv::cuda::GpuMat(1, width, cv::traits::Type<T>::value);
+		#endif
+	}
+
+	int width;
+
+private:
+	Data data_;
+	bool needs_free_;
+};
+
+#endif
diff --git a/lib/libstereo/src/filters/focal_cluster.hpp b/lib/libstereo/src/filters/focal_cluster.hpp
index 6aec3128a2f8db6ef94158fd9bfbba9219d5ad06..26e6cda17a69e428c48366ea8aa508829a2ea186 100644
--- a/lib/libstereo/src/filters/focal_cluster.hpp
+++ b/lib/libstereo/src/filters/focal_cluster.hpp
@@ -1,21 +1,37 @@
 #ifndef _FTL_LIBSTEREO_FILTERS_CLUSTER_HPP_
 #define _FTL_LIBSTEREO_FILTERS_CLUSTER_HPP_
 
+#include "../util.hpp"
+#include "../array1d.hpp"
+#include "../bucket1d.hpp"
+#include "../bucket2d.hpp"
+
 struct FocalCluster {
 	short2 focal_pt;
-	Bucket2D<ushort, 64>::Data left;
+	Bucket1D<short2, 64>::Data left;
 	Bucket2D<ushort, 64>::Data right;
-	Array2D<float>::Data histogram;
+	Array1D<int>::Data histogram;
+
+	int max_disparity = 1024;
 
 	__device__ void operator()(ushort2 thread, ushort2 stride, ushort2 size) {
 		for (int y=thread.y; y<size.y; y+=stride.y) {
-			for (int f=thread.x; f<)
-			// For each feature or features near to focal point
-
-			
-			// - Calc distance to focal in X
-			// - For each feature in right image that matches
-			//   - Add focal dist to feature X and add to histogram
+			int count = left(y);
+			// Stride a warp of threads over the features
+			for (int f=thread.x; f<count; f+=stride.x) {
+				// For each feature or features near to focal point
+				short2 feature = left(y,f);
+				int distx = feature.x - focal_pt.x;
+				
+				// - For each feature in right image that matches
+				const ushort *ptr = right.ptr(y, feature.y);
+				int count2 = right(y,feature.y);
+				for (int i=0; i<count2; ++i) {
+					//   - Add focal dist to feature X and add to histogram
+					int disparity = max(0,focal_pt.x - int(ptr[i]) + distx);
+					if (disparity < max_disparity && disparity > 0) atomicAdd(&histogram(disparity), 1);
+				}
+			}
 		}
 	}
 };