From 887aa933094504e1e6012fb6b406dc9c555b3775 Mon Sep 17 00:00:00 2001
From: Sebastian Hahta <joseha@utu.fi>
Date: Tue, 21 Apr 2020 18:28:06 +0300
Subject: [PATCH] more useful middlebury testing

---
 lib/libstereo/include/stereo.hpp              |   6 +-
 lib/libstereo/middlebury/main.cpp             | 253 +++++++++++-------
 lib/libstereo/src/costs/mutual_information.cu |  12 +-
 .../src/costs/mutual_information.hpp          |  18 +-
 lib/libstereo/src/stereo_censussgm.cu         |   2 +-
 lib/libstereo/src/stereo_misgm.cu             |  11 +-
 lib/libstereo/src/stereo_misgm2.cu            |  37 ++-
 7 files changed, 206 insertions(+), 133 deletions(-)

diff --git a/lib/libstereo/include/stereo.hpp b/lib/libstereo/include/stereo.hpp
index d210d3688..2ccab32d6 100644
--- a/lib/libstereo/include/stereo.hpp
+++ b/lib/libstereo/include/stereo.hpp
@@ -155,8 +155,8 @@ public:
 	struct Parameters {
 		int d_min = 0;
 		int d_max = 0;
-		unsigned short P1 = 2;
-		unsigned short P2 = 8;
+		unsigned short P1 = 1;
+		unsigned short P2 = 16;
 		float uniqueness = std::numeric_limits<short>::max();
 		int subpixel = 1; // subpixel interpolation method
 		int paths = AggregationDirections::HORIZONTAL |
@@ -164,7 +164,7 @@ public:
 					AggregationDirections::DIAGONAL;
 
 		float alpha = 0.2; /** alpha: minimum weight for census cost */
-		float beta = 0.8; /** 1-beta: minimum weight for MI cost */
+		float beta = 0.7; /** 1-beta: minimum weight for MI cost */
 		bool debug = false;
 	};
 	Parameters params;
diff --git a/lib/libstereo/middlebury/main.cpp b/lib/libstereo/middlebury/main.cpp
index 1c9409cb5..60dfee546 100644
--- a/lib/libstereo/middlebury/main.cpp
+++ b/lib/libstereo/middlebury/main.cpp
@@ -1,6 +1,7 @@
 #include <opencv2/opencv.hpp>
 
 #include <vector>
+#include <map>
 #include <chrono>
 
 #include "middlebury.hpp"
@@ -32,130 +33,176 @@ void colorize(const cv::Mat &disp, cv::Mat &out, int ndisp=-1) {
 
 ////////////////////////////////////////////////////////////////////////////////
 
-using cv::Mat;
-using std::vector;
+struct MiddleburyData {
+	const std::string name;
+	const cv::Mat imL;
+	const cv::Mat imR;
+	const cv::Mat gtL;
+	const cv::Mat maskL;
+	const MiddEvalCalib calib;
+};
+
+struct Result {
+	std::vector<MiddEvalResult> results;
+	cv::Mat disp;
+};
+
+static void run_censussgm(MiddleburyData &data, cv::Mat &disparity) {
+	auto stereo = StereoCensusSgm();
+	stereo.params.P1 = 4;
+	stereo.params.P2 = 18;
+
+	stereo.params.d_min = data.calib.vmin;
+	stereo.params.d_max = data.calib.vmax;
+	stereo.params.subpixel = 1;
+	stereo.params.debug = false;
+	stereo.compute(data.imL, data.imR, disparity);
+}
 
-int main(int argc, char* argv[]) {
-	#if USE_GPU
-	std::cout << "GPU VERSION\n";
-	#else
-	std::cout << "CPU VERSION\n";
-	#endif
+static void run_misgm(MiddleburyData &data, cv::Mat &disparity) {
+	auto stereo = StereoMiSgm();
+	stereo.params.P1 = 4;
+	stereo.params.P2 = 18;
 
-	Mat imL;
-	Mat imR;
-	Mat gtL;
-	Mat maskL;
-	MiddEvalCalib calib;
+	stereo.params.d_min = data.calib.vmin;
+	stereo.params.d_max = data.calib.vmax;
+	stereo.params.subpixel = 1;
+	stereo.params.debug = false;
+	stereo.compute(data.imL, data.imR, disparity);
+	stereo.setPrior(disparity);
+	stereo.compute(data.imL, data.imR, disparity);
+}
 
-	if (argc < 2) {
-		std::cerr << "usage: middlebury [path]\n";
-		return 1;
-	}
+static const std::map<std::string, std::function<void(MiddleburyData&, cv::Mat&)>> algorithms = {
+	{ "censussgm", run_censussgm },
+	{ "misgm", run_misgm }
+};
 
-	imL = cv::imread(argv[1] + std::string("im0.png"));
-	imR = cv::imread(argv[1] + std::string("im1.png"));
-	gtL = read_pfm(argv[1] + std::string("disp0.pfm"));
+////////////////////////////////////////////////////////////////////////////////
+
+MiddleburyData load_input(const std::string &path) {
+	cv::Mat imL;
+	cv::Mat imR;
+	cv::Mat gtL;
+	cv::Mat maskL;
+
+	imL = cv::imread(path + std::string("im0.png"));
+	imR = cv::imread(path + std::string("im1.png"));
+	gtL = read_pfm(path + std::string("disp0.pfm"));
 	if (gtL.empty()) {
-		gtL = read_pfm(argv[1] + std::string("disp0GT.pfm"));
+		gtL = read_pfm(path + std::string("disp0GT.pfm"));
 	}
 	//maskL = cv::imread(argv[1] + std::string("mask0nocc.png"), cv::IMREAD_GRAYSCALE);
-	calib = read_calibration(argv[1] + std::string("calib.txt"));
+	auto calib = read_calibration(path + std::string("calib.txt"));
 
 	maskL.create(imL.size(), CV_8UC1);
 	maskL.setTo(cv::Scalar(255));
 
-	int ndisp = calib.vmax - calib.vmin;
+	if (imL.empty() || imR.empty() || gtL.empty() || maskL.empty()) {
+		throw std::exception();
+	}
 
-	auto stereo = StereoCensusAdaptive();
-	stereo.params.P1 = 8;
-	//stereo.params.P2 = 25;
+	std::string name;
+	name = path.substr(0, path.size()-1);
+	name = name.substr(name.find_last_of("/") + 1);
 
-	stereo.params.d_min = calib.vmin;
-	stereo.params.d_max = calib.vmax;
-	stereo.params.subpixel = 1;
-	stereo.params.debug = true;
-	//stereo.params.paths = AggregationDirections::ALL;
-	stereo.params.uniqueness = 80;
+	return {name, imL, imR, gtL, maskL, calib};
+}
+
+Result run_one(MiddleburyData &input, std::function<void(MiddleburyData&, cv::Mat&)> f) {
+	Result result;
+	f(input, result.disp);
+	for (const float t : {4.0f, 1.0f, 0.5f, 0.25f}) {
+		result.results.push_back(evaluate(result.disp, input.gtL, input.maskL, t));
+	}
+	return result;
+}
 
-	int i_max = 1;
-	float t = 4.0f;
+Result run_one(MiddleburyData &input, std::string name) {
+	return run_one(input, algorithms.at(name));
+}
 
-	if (imL.empty() || imR.empty() || gtL.empty()) {
-		std::cerr << "can't load image\n";
-		return 1;
+std::map<std::string, Result> run_all(MiddleburyData &input) {
+	std::map<std::string, Result> results;
+
+	for (auto const& [name, f] : algorithms) {
+		results[name] = run_one(input, f);
 	}
 
-	if (imL.size() != imR.size()) {
-		std::cerr << "images must be same size\n";
-		return 1;
+	return results;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+cv::Mat visualize_error(const cv::Mat &disp, const cv::Mat &gt, float t) {
+	cv::Mat err(gt.size(), CV_32FC1);
+	cv::Mat err_color(gt.size(), CV_8UC1);
+
+	cv::absdiff(gt, disp, err);
+	err.setTo(t, err > t);
+	err += 1.0f;
+	err.setTo(0, disp == 0.0);
+	err.convertTo(err, CV_8UC1, 255.0f/(t+1.0f));
+	cv::applyColorMap(err, err_color, cv::COLORMAP_PLASMA);
+
+	return err_color;
+}
+
+void show_result(std::string name, const MiddleburyData &data, const Result &result) {
+	cv::Mat err_color = visualize_error(result.disp, data.gtL, 4.0);
+	cv::Mat disp_color(data.gtL.size(), CV_8UC1);
+
+	colorize(result.disp, disp_color, data.calib.ndisp);
+	cv::imshow(data.name + " (disparity) - " + name, disp_color);
+	cv::imshow(data.name + " (error) - " + name, err_color);
+
+	printf("\n%s: %s\n", name.c_str(), data.name.c_str());
+	for (auto res : result.results) {
+		printf("%9.2f %%    correct (err < %.2f)\n", 100.0f * res.err_bad_nonoccl, res.threshold);
+		printf("%9.2f px   RMSE\n", res.rms_bad_nonoccl);
 	}
+}
 
-	Mat disp(imL.size(), CV_32FC1, cv::Scalar(0.0f));
-	std::cout << "resolution: " << imL.cols << "x" << imL.rows << ", " << ndisp << " disparities [" << calib.vmin << "," << calib.vmax << "]\n";
-
-	std::vector<cv::Mat> disp_color;
-	std::vector<cv::Mat> err_color;
-
-	for (int i = 0; i < i_max; i++) {
-		auto start = std::chrono::high_resolution_clock::now();
-
-		cv::cuda::GpuMat gpu_imL(imL);
-		cv::cuda::GpuMat gpu_imR(imR);
-		cv::cuda::GpuMat gpu_disp(disp);
-		stereo.compute(gpu_imL, gpu_imR, gpu_disp);
-		gpu_disp.download(disp);
-		stereo.setPrior(disp);
-
-		auto stop = std::chrono::high_resolution_clock::now();
-		std::cout	<< "disparity complete in "
-					<< std::chrono::duration_cast<std::chrono::milliseconds>(stop-start).count()
-					<< " ms\n";
-
-		MiddEvalResult res;
-
-		std::cout << "TYPE: " << gtL.type() << std::endl;
-
-		for (const float t : {4.0f, 1.0f, 0.5f, 0.25f}) {
-			res = evaluate(disp, gtL, maskL, t);
-			if (i == 0 || i == i_max-1) {
-				printf("%9.2f %%    correct (err < %.2f)\n", 100.0f * res.err_bad, res.threshold);
-				//printf("%9.2f %%    correct (non-occluded, err < %.2f)\n", 100.0f * res.err_bad_nonoccl, res.threshold);
-				printf("%9.2f px   RMSE (all)\n", res.rms_bad);
-				//printf("%9.2f px   RMSE (non-occluded)\n", res.rms_bad_nonoccl);
-			}
-		}
-
-		if (i == 0 || i == i_max-1) {
-			printf("%9.2f %%    total\n", 100.0f * res.err_total);
-			printf("%9.2f %%    non-occluded\n", 100.0f * res.err_nonoccl);
-		}
-
-		Mat &err_color_ = err_color.emplace_back();
-		Mat &disp_color_ = disp_color.emplace_back();
-
-		// visualize error: set missing values at 0 and scale error to [1.0, t+1.0]
-		// errors > t truncate to t
-		Mat err(gtL.size(), CV_32FC1);
-		cv::absdiff(gtL, disp, err);
-		err.setTo(t, err > t);
-		err += 1.0f;
-		err.setTo(0, disp == 0.0);
-		err.convertTo(err, CV_8UC1, 255.0f/(t+1.0f));
-		cv::applyColorMap(err, err_color_, cv::COLORMAP_PLASMA);
-
-		colorize(disp, disp_color_, calib.ndisp);
+void show_results( const MiddleburyData &data, const std::map<std::string, Result> &results) {
+	for (auto const& [name, result] : results) {
+		show_result(name, data, result);
 	}
+}
 
-	cv::imshow(std::to_string(0) + ": " + "error (err < " + std::to_string(t) + ")", err_color[0]);
-	cv::imshow(std::to_string(0) + ": " + "disparity", disp_color[0]);
-	cv::imshow(std::to_string(i_max-1) + ": " + "error (err < " + std::to_string(t) + ")", err_color[i_max-1]);
-	cv::imshow(std::to_string(i_max-1) + ": " + "disparity", disp_color[i_max-1]);
+////////////////////////////////////////////////////////////////////////////////
+
+using cv::Mat;
+using std::vector;
+
+int main(int argc, char* argv[]) {
+	#if USE_GPU
+	std::cout << "GPU VERSION\n";
+	#else
+	std::cout << "CPU VERSION\n";
+	#endif
+
+	if (argc < 2) {
+		std::cerr << "usage: path [algorithm]\n";
+		return 1;
+	}
+
+	if (argc == 2) {
+		auto input = load_input(argv[1]);
+		auto results = run_all(input);
+		show_results(input, results);
+		while(cv::waitKey() != 27);
+		return 0;
+	}
+	else if (argc == 3) {
+		auto input = load_input(argv[1]);
+		auto result = run_one(input, argv[2]);
+		show_result(argv[2], input, result);
+		while(cv::waitKey() != 27);
+		return 0;
+	}
 
-	Mat gt_color;
-	colorize(gtL, gt_color, calib.ndisp);
-	cv::imshow("gt", gt_color);
-	while(cv::waitKey() != 27);
+	// todo: save results
 
-	return 0;
+	std::cerr << "error";
+	return 1;
 }
diff --git a/lib/libstereo/src/costs/mutual_information.cu b/lib/libstereo/src/costs/mutual_information.cu
index af32f0c53..85771f207 100644
--- a/lib/libstereo/src/costs/mutual_information.cu
+++ b/lib/libstereo/src/costs/mutual_information.cu
@@ -74,7 +74,7 @@ void calculate_h12(const cv::Mat &P, cv::Mat &out, int ksize=7, double sigma=1.0
 
 	// possible numerical issues? should also be less than 1/(width*height) of
 	// original image.
-	static float e = 0.00000001;
+	const float e = 0.00000001;
 	cv::Mat tmp;
 
 	P.copyTo(out);
@@ -154,4 +154,14 @@ void MutualInformationMatchingCost::set(const cv::Mat &l, const cv::Mat &r, cons
 	h1.copyTo(h1_.toMat());
 	h2.copyTo(h2_.toMat());
 	#endif
+
+	data().normalize = 0;
+	double min, max;
+	cv::minMaxLoc(h12, &min, &max);
+	data().normalize += max;
+	cv::minMaxLoc(h1, &min, &max);
+	data().normalize -= min;
+	cv::minMaxLoc(h2, &min, &max);
+	data().normalize -= min;
+	data().normalize = 1.0f/data().normalize;
 }
diff --git a/lib/libstereo/src/costs/mutual_information.hpp b/lib/libstereo/src/costs/mutual_information.hpp
index 05f526f0a..3b86c6541 100644
--- a/lib/libstereo/src/costs/mutual_information.hpp
+++ b/lib/libstereo/src/costs/mutual_information.hpp
@@ -8,13 +8,14 @@
 #include <cuda_runtime.h>
 
 namespace impl {
-	struct MutualInformationMatchingCost : DSImplBase<unsigned short> {
+	template<typename T>
+	struct MutualInformationMatchingCost : DSImplBase<T> {
 		typedef unsigned short Type;
 
-		MutualInformationMatchingCost(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<unsigned short>({w,h,dmin,dmax}) {}
+		MutualInformationMatchingCost(ushort w, ushort h, ushort dmin, ushort dmax) : DSImplBase<T>({w,h,dmin,dmax}) {}
 
-		__host__ __device__ inline unsigned short operator()(const int y, const int x, const int d) const {
-			if ((x-d) < 0) { return 24; }
+		__host__ __device__ inline T operator()(const int y, const int x, const int d) const {
+			if ((x-d) < 0) { return 0; }
 			const int I1 = l(y,x);
 			const int I2 = r(y,x-d);
 			const float H1 = h1(0,I1);
@@ -23,19 +24,20 @@ namespace impl {
 			return -(H1+H2-H12);
 		}
 
-		static constexpr unsigned short COST_MAX = 255;
+		static constexpr T COST_MAX = 255;
 
 		Array2D<unsigned char>::Data l;
 		Array2D<unsigned char>::Data r;
 		Array2D<float>::Data h1;
 		Array2D<float>::Data h2;
 		Array2D<float>::Data h12;
+		float normalize = 1.0f;
 	};
 }
 
-class MutualInformationMatchingCost : public DSBase<impl::MutualInformationMatchingCost>{
+class MutualInformationMatchingCost : public DSBase<impl::MutualInformationMatchingCost<unsigned short>>{
 public:
-	typedef impl::MutualInformationMatchingCost DataType;
+	typedef impl::MutualInformationMatchingCost<unsigned short> DataType;
 	typedef unsigned short Type;
 
 	MutualInformationMatchingCost(int width, int height, int disp_min, int disp_max) :
@@ -64,4 +66,4 @@ protected:
 	cv::Mat r_warp_;
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/lib/libstereo/src/stereo_censussgm.cu b/lib/libstereo/src/stereo_censussgm.cu
index d707e7846..c74782d3c 100644
--- a/lib/libstereo/src/stereo_censussgm.cu
+++ b/lib/libstereo/src/stereo_censussgm.cu
@@ -118,7 +118,7 @@ void StereoCensusSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArra
 		auto uncertainty = impl_->uncertainty.toMat();
 		cv::subtract(impl_->wta.min_cost.toMat(), impl_->cost_min_paths.toMat(), uncertainty);
 		cv::compare(uncertainty, params.uniqueness, uncertainty, cv::CMP_GT);
-		impl_->wta.disparity.toGpuMat().setTo(0, uncertainty);
+		impl_->wta.disparity.toMat().setTo(0, uncertainty);
 	}
 
 	median_filter(impl_->wta.disparity, disparity);
diff --git a/lib/libstereo/src/stereo_misgm.cu b/lib/libstereo/src/stereo_misgm.cu
index ccc6005d9..18981318b 100644
--- a/lib/libstereo/src/stereo_misgm.cu
+++ b/lib/libstereo/src/stereo_misgm.cu
@@ -11,6 +11,7 @@
 #include "wta.hpp"
 #include "cost_aggregation.hpp"
 #include "aggregations/standard_sgm.hpp"
+#include "median_filter.hpp"
 
 #ifdef __GNUG__
 
@@ -131,14 +132,8 @@ void StereoMiSgm::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray di
 	impl_->wta(out, 0);
 	cudaSafeCall(cudaDeviceSynchronize());
 	if (params.debug) { timer_print("WTA"); }
-	if (disparity.isGpuMat()) {
-		impl_->wta.disparity.toGpuMat(disparity.getGpuMatRef());
-	}
-	else {
-		cv::Mat &disparity_ = disparity.getMatRef();
-		impl_->wta.disparity.toMat(disparity_);
-		cv::medianBlur(disparity_, disparity_, 3);
-	}
+
+	median_filter(impl_->wta.disparity, disparity);
 	// confidence estimate
 
 	// Drory, A., Haubold, C., Avidan, S., & Hamprecht, F. A. (2014).
diff --git a/lib/libstereo/src/stereo_misgm2.cu b/lib/libstereo/src/stereo_misgm2.cu
index e23ab955b..f2fba6f81 100644
--- a/lib/libstereo/src/stereo_misgm2.cu
+++ b/lib/libstereo/src/stereo_misgm2.cu
@@ -13,6 +13,7 @@
 #include "wta.hpp"
 #include "cost_aggregation.hpp"
 #include "aggregations/standard_sgm.hpp"
+#include "aggregations/adaptive_penalty.hpp"
 #include "median_filter.hpp"
 
 #include "costs/dual.hpp"
@@ -49,7 +50,7 @@ static void timer_print(const std::string &msg, const bool reset=true) {}
 
 using cv::Mat;
 using cv::Size;
-using ftl::stereo::aggregations::StandardSGM;
+using ftl::stereo::aggregations::AdaptivePenaltySGM;
 
 static void variance_mask(cv::InputArray in, cv::OutputArray out, int wsize=3) {
 	if (in.isGpuMat()) {
@@ -68,8 +69,7 @@ static void variance_mask(cv::InputArray in, cv::OutputArray out, int wsize=3) {
 		else {
 			im = in.getGpuMat();
 		}
-		std::cout << im.type() << "\n";
-		std::cout << mean.type() << "\n";
+
 		cv::cuda::multiply(im, im, im2);
 		auto filter = cv::cuda::createBoxFilter(CV_32FC1, CV_32FC1, cv::Size(wsize,wsize));
 		filter->apply(im, mean);   // E[X]
@@ -90,6 +90,7 @@ struct StereoMiSgm2::Impl {
 	Array2D<float> variance_r;
 	DualCostsWeighted<CensusMatchingCost, MutualInformationMatchingCost> cost;
 
+	Array2D<unsigned char> penalty;
 	Array2D<unsigned short> cost_min;
 	Array2D<unsigned short> cost_min_paths;
 	Array2D<unsigned short> uncertainty;
@@ -100,7 +101,7 @@ struct StereoMiSgm2::Impl {
 
 	Mat prior; // used only to calculate MI
 
-	PathAggregator<StandardSGM<DualCostsWeighted<CensusMatchingCost, MutualInformationMatchingCost>::DataType>> aggr;
+	PathAggregator<AdaptivePenaltySGM<DualCostsWeighted<CensusMatchingCost, MutualInformationMatchingCost>::DataType>> aggr;
 	WinnerTakesAll<DSImage16U,float> wta;
 
 	Impl(int width, int height, int min_disp, int max_disp) :
@@ -109,6 +110,7 @@ struct StereoMiSgm2::Impl {
 		variance(width, height),
 		variance_r(width, height),
 		cost(width, height, min_disp, max_disp, census, mi, variance, variance_r),
+		penalty(width, height),
 		cost_min(width, height),
 		cost_min_paths(width, height),
 		uncertainty(width, height),
@@ -156,9 +158,9 @@ void StereoMiSgm2::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray d
 	timer_set();
 
 	cv::cuda::GpuMat var_l = impl_->variance.toGpuMat();
-	variance_mask(impl_->l.toGpuMat(), var_l, 9);
+	variance_mask(impl_->l.toGpuMat(), var_l, 17);
 	cv::cuda::GpuMat var_r = impl_->variance_r.toGpuMat();
-	variance_mask(impl_->r.toGpuMat(), var_r, 9);
+	variance_mask(impl_->r.toGpuMat(), var_r, 17);
 
 	cv::cuda::normalize(var_l, var_l, params.alpha, params.beta, cv::NORM_MINMAX, -1);
 	cv::cuda::normalize(var_r, var_r, params.alpha, params.beta, cv::NORM_MINMAX, -1);
@@ -179,13 +181,30 @@ void StereoMiSgm2::compute(cv::InputArray l, cv::InputArray r, cv::OutputArray d
 
 	cudaSafeCall(cudaDeviceSynchronize());
 
-	StandardSGM<DualCostsWeighted<CensusMatchingCost, MutualInformationMatchingCost>::DataType> func = {impl_->cost.data(), impl_->cost_min_paths.data(), params.P1, params.P2};
-	auto &out = impl_->aggr(func, AggregationDirections::ALL);  // params.paths
+	auto penalty = impl_->penalty.toGpuMat();
+	penalty.setTo(params.P2);
+
+	/*cv::cuda::GpuMat mask(penalty.size(), CV_8UC1);
+	cv::cuda::compare(var_l, 0.25, mask, cv::CMP_LT);
+	penalty.setTo(params.P2*2, mask);
+	cv::cuda::compare(var_l, 0.75, mask, cv::CMP_GT);
+	penalty.setTo(params.P1, mask);*/
+
+	AdaptivePenaltySGM<DualCostsWeighted<CensusMatchingCost, MutualInformationMatchingCost>::DataType> func = {impl_->cost.data(), impl_->cost_min_paths.data(), params.P1};
+	impl_->aggr.getDirectionData(AggregationDirections::LEFTRIGHT).penalties = impl_->penalty;
+	impl_->aggr.getDirectionData(AggregationDirections::RIGHTLEFT).penalties = impl_->penalty;
+	impl_->aggr.getDirectionData(AggregationDirections::UPDOWN).penalties = impl_->penalty;
+	impl_->aggr.getDirectionData(AggregationDirections::DOWNUP).penalties = impl_->penalty;
+	impl_->aggr.getDirectionData(AggregationDirections::TOPLEFTBOTTOMRIGHT).penalties = impl_->penalty;
+	impl_->aggr.getDirectionData(AggregationDirections::BOTTOMRIGHTTOPLEFT).penalties = impl_->penalty;
+	impl_->aggr.getDirectionData(AggregationDirections::BOTTOMLEFTTOPRIGHT).penalties = impl_->penalty;
+	impl_->aggr.getDirectionData(AggregationDirections::TOPRIGHTBOTTOMLEFT).penalties = impl_->penalty;
+	auto &out = impl_->aggr(func, params.paths);
 
 	cudaSafeCall(cudaDeviceSynchronize());
 	if (params.debug) { timer_print("Aggregation"); }
 
-	impl_->wta(out, 0);
+	impl_->wta(out, params.subpixel);
 	cudaSafeCall(cudaDeviceSynchronize());
 	if (params.debug) { timer_print("WTA"); }
 
-- 
GitLab