Created
March 29, 2022 00:20
-
-
Save dno89/4a0efa22469738daa748dd9f56774ff6 to your computer and use it in GitHub Desktop.
Naive implementation of K-means for 2D points
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <vector> | |
#include <random> | |
#include <algorithm> | |
#include <numeric> | |
#include <cstdlib> | |
#include <Eigen/Dense> | |
#define T(X) cerr << #X": " << X << endl; | |
template<typename T> | |
std::ostream& operator<<(std::ostream& out, const std::vector<T>& v) { | |
for(const auto& x : v) { | |
out << x << ", "; | |
} | |
return out; | |
} | |
using namespace std; | |
// Typedefs | |
using Point2 = Eigen::Vector2d; | |
vector<Point2> GeneratePoints(const vector<Point2>& means, size_t points_per_mean) { | |
default_random_engine dre; | |
vector<Point2> result; | |
result.reserve(means.size()*points_per_mean); | |
for(const auto& mean : means) { | |
normal_distribution<> gaussian_noise(0.0, 1.0); | |
for(size_t ii = 0; ii < points_per_mean; ++ii) { | |
result.push_back( | |
mean + Point2{gaussian_noise(dre), gaussian_noise(dre)} | |
); | |
} | |
} | |
random_shuffle(result.begin(), result.end()); | |
return result; | |
} | |
bool CheckConvergence(const vector<Point2>& old_means, const vector<Point2>& new_means, double distance_threshold) { | |
double residual = 0.0; | |
for(size_t ii = 0; ii < old_means.size(); ++ii) { | |
residual += (old_means[ii]-new_means[ii]).squaredNorm(); | |
} | |
return residual < distance_threshold; | |
} | |
std::pair<vector<size_t>, vector<Point2>> KMeansClusters(const vector<Point2>& points, size_t k) { | |
vector<size_t> assigned_clusters(points.size()); // size N | |
generate(assigned_clusters.begin(), assigned_clusters.end(), [k](){ return rand()%k; }); | |
// T(assigned_clusters) | |
vector<Point2> clusters_mean; // size k | |
auto assign = [&](){ | |
for(size_t ii = 0; ii < points.size(); ++ii) { | |
const auto& p = points[ii]; | |
auto closest_cluster = min_element(clusters_mean.begin(), clusters_mean.end(), | |
[&p](const Point2& m1, const Point2& m2) { return (m1-p).squaredNorm() < (m2-p).squaredNorm();} ); | |
assigned_clusters[ii] = std::distance(clusters_mean.begin(), closest_cluster); | |
} | |
}; | |
auto update = [&](){ | |
vector<Point2> result(k, Point2{0.0, 0.0}); | |
// T(result) | |
vector<size_t> clusters_counter(k, 0); | |
for(size_t ii = 0; ii < points.size(); ++ii) { | |
result[assigned_clusters[ii]] += points[ii]; | |
++clusters_counter[assigned_clusters[ii]]; | |
} | |
// T(clusters_counter) | |
for(size_t ii = 0; ii < result.size(); ++ii) { | |
if(clusters_counter[ii] > 0) | |
result[ii] /= clusters_counter[ii]; | |
} | |
return result; | |
}; | |
clusters_mean = update(); | |
// T(clusters_mean) | |
while(true) { | |
assign(); | |
auto new_means = update(); | |
// T(new_means) | |
if(CheckConvergence(new_means, clusters_mean, 1e-3)) { | |
break; | |
} | |
clusters_mean = std::move(new_means); | |
}; | |
return {std::move(assigned_clusters), std::move(clusters_mean)}; | |
} | |
int main(int, char**) { | |
const auto points = GeneratePoints({{0.0, 0.0}, {10.0, 10.0}, {5.0, 5.0}}, 10); | |
const auto& [clusters, means] = KMeansClusters(points, 3); | |
// T(points) | |
T(clusters) | |
T(means) | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment