Skip to content
Snippets Groups Projects
Commit 9b42ecb8 authored by Sebastian Hahta's avatar Sebastian Hahta
Browse files

Merge branch 'feature/clustering' into 'master'

DBSCAN

See merge request nicolas.pope/ftl!315
parents 0c1688c3 968c143f
No related branches found
No related tags found
1 merge request!315DBSCAN
Pipeline #27901 passed
...@@ -57,4 +57,6 @@ target_include_directories(ftloperators PUBLIC ...@@ -57,4 +57,6 @@ target_include_directories(ftloperators PUBLIC
target_link_libraries(ftloperators ftlrender ftlrgbd ftlcommon sgm libstereo Eigen3::Eigen Threads::Threads ${OpenCV_LIBS}) target_link_libraries(ftloperators ftlrender ftlrgbd ftlcommon sgm libstereo Eigen3::Eigen Threads::Threads ${OpenCV_LIBS})
#ADD_SUBDIRECTORY(test) if (BUILD_TESTS)
add_subdirectory(test)
endif()
#ifndef HPP_FTL_ALGORITHMS_DBSCAN_
#define HPP_FTL_ALGORITHMS_DBSCAN_
#include <vector>
#include <deque>
#include <opencv2/core/core.hpp>
namespace ftl {
/**
* DBSCAN clustering algorithm. Iterates over each points and assigns a label
* based on local neighborhood. Complexity O(n*O(RangeQuery)) for n points.
*
* points Input parameter: points
* RangeQuery function vector<size_t>(points, i, radius) which returns point
* indices (excluding input point with index i) which are within
* given radius. Called at least once for each point (but at most
* twice).
* min_points DBSCAN parameter: minimum cluster size (core point).
* radius DBSCAN parameter: search radius
* labels Output paramters: cluster labels. Negative labels are used for
* noise.
* centroids Output parameter: cluster centroids
*/
template<typename T>
void dbscan(const std::vector<T> &points,
std::function<std::vector<size_t>(const std::vector<T>&, size_t, float)> RangeQuery,
unsigned int min_points, float radius,
std::vector<short> &labels, std::vector<T> &centroids) {
const short NONE = -2;
const short NOISE = -1;
labels.resize(points.size());
std::fill(labels.begin(), labels.end(), NONE);
int cluster_count = 0;
for (unsigned i = 0; i < points.size(); i++) {
short cluster = NONE;
if (labels[i] != NONE) {
continue;
}
// get neighbours of points[i]
std::vector<size_t> neighbors = RangeQuery(points, i, radius);
if (neighbors.size() < min_points) {
labels[i] = NOISE;
continue;
}
// assign new cluster id
cluster = cluster_count++;
labels[i] = cluster;
T centroid = points[i];
int n_points = 1;
// seed_set: neighboring points to this cluster
std::deque<size_t> seed_set;
for (const auto &n : neighbors) {
seed_set.push_back(n);
}
while(!seed_set.empty()) {
auto i_n = seed_set.front();
seed_set.pop_front();
if (labels[i_n] == NOISE) {
// add to cluster (few lines down)
}
else if (labels[i_n] != NONE){
continue;
}
labels[i_n] = cluster;
centroid += points[i_n];
n_points++;
neighbors = RangeQuery(points, i_n, radius);
if (neighbors.size() < min_points) {
continue;
}
else {
for (const auto &n : neighbors) {
seed_set.push_back(n);
}
}
}
centroids.push_back(centroid/n_points);
}
}
}
#endif
### DBSCAN Unit ################################################################
add_executable(dbscan_unit
$<TARGET_OBJECTS:CatchTest>
./dbscan_unit.cpp
)
target_include_directories(dbscan_unit PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include")
target_link_libraries(dbscan_unit ftlcommon)
add_test(DBSCANUnitTest dbscan_unit)
This diff is collapsed.
#include <catch.hpp>
#include <random>
#include <opencv2/core.hpp>
#include <ftl/algorithms/dbscan.hpp>
#include <ftl/profiler.hpp>
#include "data.hpp"
using std::vector;
template<typename T>
static vector<size_t> linearSearch(const vector<T> &points, size_t idx, float radius) {
vector<size_t> neighbors;
for (auto i = 0u; i < points.size(); i++) {
if (i == idx) {
continue;
}
if (cv::norm(points[idx] - points[i]) < radius) {
neighbors.push_back(i);
}
}
return neighbors;
}
TEST_CASE("DBSCAN 3D clustering (linear search)") {
vector<cv::Vec3f> points {
{1.0,2.1,3.0},
{1.0,1.9,3.0},
{1.0,2.0,3.0},
{1.0,1.9,3.0},
{1.0,2.1,3.0},
{3.0,2.1,1.0},
{3.0,2.0,1.0},
{3.0,2.0,1.0},
{3.0,2.0,1.0},
{3.0,1.9,1.0}
};
vector<cv::Vec3f> centroids;
vector<short> labels;
ftl::dbscan<cv::Vec3f>(points, linearSearch<cv::Vec3f>, 3, 1.0f, labels, centroids);
REQUIRE(centroids.size() == 2);
REQUIRE(centroids[0] == cv::Vec3f(1,2,3));
REQUIRE(centroids[1] == cv::Vec3f(3,2,1));
}
TEST_CASE("DBSCAN 3D clustering (random points)") {
std::random_device rd;
std::mt19937::result_type seed = rd();
vector<cv::Vec3f> true_centroids = {
{ 1, 5, 3},
{ 3, 5, 1},
{ 0, 0, 0},
{ 3, 3, 3},
{-3,-3,-3},
{ 7, 7, 7},
{-7,-7,-7},
};
int n_points = 16;
float sigma = 0.33;
float eps = sigma; // error threshold for test case
vector<cv::Vec3f> points;
std::mt19937 gen(seed);
for (const auto &c : true_centroids) {
std::normal_distribution<float> x{c[0], sigma};
std::normal_distribution<float> y{c[1], sigma};
std::normal_distribution<float> z{c[2], sigma};
for (int i = 0; i < n_points; i++) {
points.push_back({x(gen), y(gen), z(gen)});
}
}
vector<cv::Vec3f> centroids;
vector<short> labels;
ftl::dbscan<cv::Vec3f>(points, linearSearch<cv::Vec3f>, 8, 1.0f, labels, centroids);
REQUIRE(centroids.size() == true_centroids.size());
for (unsigned i = 0; i < true_centroids.size(); i++) {
// assumes same order as points were added (no shuffle)
REQUIRE(cv::norm(centroids[i] - true_centroids[i]) < eps);
}
}
TEST_CASE("DBSCAN 2D clustering (noisy moons)") {
vector<cv::Vec2f> centroids;
vector<short> labels;
{
//ftl::Profiler __profile(__func__, "DBSCAN 1500 points linear search", 0);
//__profile.verbosity(1);
// ~ 10ms (release)
ftl::dbscan<cv::Vec2f>(noisy_moons, linearSearch<cv::Vec2f>, 5, 0.2f, labels, centroids);
}
// assumes clustering returns same labels each time
REQUIRE(centroids.size() == 2);
REQUIRE(cv::norm(centroids[0] - cv::Vec2f(1.0, 0.0)) < 0.15); // 0.13359162681252454
REQUIRE(cv::norm(centroids[1] - cv::Vec2f(0.0, 0.5)) < 0.15); // 0.13651460122147505
for (unsigned i = 0; i < labels.size(); i++) {
if (labels[i] < 0) continue; // label: NOISE
REQUIRE(labels[i] == noisy_moons_labels[i]);
}
}
TEST_CASE("DBSCAN 2D clustering (noisy circles)") {
vector<cv::Vec2f> centroids;
vector<short> labels;
{
//ftl::Profiler __profile(__func__, "DBSCAN 1500 points linear search", 0);
//__profile.verbosity(1);
// ~10ms (release)
ftl::dbscan<cv::Vec2f>(noisy_circles, linearSearch<cv::Vec2f>, 5, 0.1f, labels, centroids);
}
// assumes clustering returns same labels each time
REQUIRE(centroids.size() == 2);
REQUIRE(cv::norm(centroids[0]) < 0.01); // 0.0008899436718976423
REQUIRE(cv::norm(centroids[0]) < 0.01); // 0.0014477936451883612
for (unsigned i = 0; i < labels.size(); i++) {
if (labels[i] < 0) continue; // label: NOISE
REQUIRE(labels[i] == noisy_circles_labels[i]);
}
}
#define CATCH_CONFIG_MAIN
#include "catch.hpp"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment