In this note, I'll show how to cluster the points into K clusters via MST and K-means.
Note that these two algorithms will generate different results.
#include <iostream>
#include <vector>
#include <string>
#include <cmath>
#include <fstream>
#include <iostream>
#include <sstream>
using namespace std;
struct Point {
string name;
double x, y;
Point(string name, double x, double y) {
this->name = name;
this->x = x;
this->y = y;
}
};
vector<Point> readtxt() {
vector<Point> points;
string line;
ifstream file("hw6_sample_input.txt");
while (getline(file, line)) {
stringstream lineStream(line);
string bit;
string name;
double x, y;
getline(lineStream, bit, ' ');
name = bit;
getline(lineStream, bit, ' ');
x = stof(bit);
getline(lineStream, bit, '\n');
y = stof(bit);
points.push_back(Point(name, x, y));
}
return points;
}
vector< vector<Point> > initializeCluster(vector<Point> points) {
vector< vector<Point> > clusters;
for (int i = 0; i < points.size(); i++) {
clusters.push_back(vector<Point> (1, points[i]));
}
return clusters;
}
vector< vector<float> > generateDistanceMap(vector<Point> points) {
int n = points.size();
vector< vector<float> > dist(n, vector<float> (n, 100));
for (int i = 0; i < n; i++) {
for (int j = i + 1; j < n; j++) {
dist[i][j] = pow((points[i].x - points[j].x), 2) + pow((points[i].y - points[j].y), 2);
}
}
return dist;
}
void selectNearestNeighbors(vector< vector<float> >& dist, int& x, int& y) {
int minDist = 100;
for (int i = 0; i < dist.size(); i++)
for (int j = i + 1; j < dist[i].size(); j++)
if (dist[i][j] < minDist) {
x = i;
y = j;
minDist = dist[i][j];
}
dist[x][y] = 100;
return ;
}
int mergerTwoClusters(vector< vector<Point> >& clusters, Point& pointX, Point& pointY) {
int a = -1, b = -1;
for (int i = 0; i < clusters.size(); i++) {
for (int j = 0; j < clusters[i].size(); j++) {
if (clusters[i][j].name == pointX.name) a = i;
if (clusters[i][j].name == pointY.name) b = i;
}
}
if (a == b) return clusters.size();
for (int i = 0; i < clusters[b].size(); i++)
clusters[a].push_back(clusters[b][i]);
clusters.erase(clusters.begin() + b);
return clusters.size();
}
void printCluster(vector< vector<Point> > clusters) {
for(int i = 0; i < clusters.size(); i++) {
cout << "Cluster " << i << ": ";
for (int j = 0; j < clusters[i].size(); j++) {
cout << clusters[i][j].name << ", ";
}
cout << endl;
}
}
void printDistanceMap(vector< vector<float> > dist) {
for(int i = 0; i < dist.size(); i++) {
for (int j = 0; j < dist[i].size(); j++) {
cout << dist[i][j] << ", ";
}
cout << endl;
}
}
int main(int argc, char const *argv[])
{
int K = 2;
vector<Point> points = readtxt();
vector< vector<Point> > clusters = initializeCluster(points);
vector< vector<float> > dist = generateDistanceMap(points);
int clusterNum = 100;
while (clusterNum != K) {
int x = 0, y = 0;
selectNearestNeighbors(dist, x, y);
Point pointX = points[x], pointY = points[y];
clusterNum = mergerTwoClusters(clusters, pointX, pointY);
}
printCluster(clusters);
return 0;
}
#include <ctime>
#include <fstream>
#include <iostream>
#include <sstream>
#include <vector>
#include <string>
using namespace std;
struct Point {
string name;
double x, y;
int cluster;
double minDist;
Point() :
name(""),
x(0.0),
y(0.0),
cluster(-1),
minDist(__DBL_MAX__) {}
Point(string name, double x, double y) :
name(name),
x(x),
y(y),
cluster(-1),
minDist(__DBL_MAX__) {}
double distance(Point p) {
return (p.x - x) * (p.x - x) + (p.y - y) * (p.y - y);
}
};
vector<Point> readtxt() {
vector<Point> points;
string line;
ifstream file("hw6_sample_input.txt");
while (getline(file, line)) {
stringstream lineStream(line);
string bit;
string name;
double x, y;
getline(lineStream, bit, ' ');
name = bit;
getline(lineStream, bit, ' ');
x = stof(bit);
getline(lineStream, bit, '\n');
y = stof(bit);
points.push_back(Point(name, x, y));
}
return points;
}
void kMeansClustering(vector<Point>* points, int n, int iterations, int k) {
vector<Point> centroids;
srand(time(0));
// for (int i = 0; i < k; ++i) {
// centroids.push_back(points->at(rand() % n));
// }
centroids.push_back(points->at(0));
centroids.push_back(points->at(14));
for(int a = 0; a < iterations; a++) {
for (vector<Point>::iterator c = begin(centroids); c != end(centroids); ++c) {
int clusterId = c - begin(centroids);
for (vector<Point>::iterator it = points->begin();
it != points->end(); ++it) {
Point p = *it;
double dist = c->distance(p);
if (dist < p.minDist) {
p.minDist = dist;
p.cluster = clusterId;
}
*it = p;
}
}
vector<int> nPoints;
vector<double> sumX, sumY;
for (int j = 0; j < k; ++j) {
nPoints.push_back(0);
sumX.push_back(0.0);
sumY.push_back(0.0);
}
for (vector<Point>::iterator it = points->begin();
it != points->end(); ++it) {
int clusterId = it->cluster;
nPoints[clusterId] += 1;
sumX[clusterId] += it->x;
sumY[clusterId] += it->y;
it->minDist = __DBL_MAX__;
}
for (vector<Point>::iterator c = begin(centroids);
c != end(centroids); ++c) {
int clusterId = c - begin(centroids);
c->x = sumX[clusterId] / nPoints[clusterId];
c->y = sumY[clusterId] / nPoints[clusterId];
}
}
ofstream myfile;
myfile.open("output.txt");
myfile << "x,y,c" << endl;
for (vector<Point>::iterator it = points->begin(); it != points->end(); ++it) {
myfile << it->name << it->x << "," << it->y << "," << it->cluster << endl;
}
myfile.close();
}
int main(int argc, char const *argv[])
{
vector<Point> points = readtxt();
for(int i = 0; i < 100; i ++) {kMeansClustering(&points, 15, 30, 2);}
return 0;
}