Skip to content

Instantly share code, notes, and snippets.

@paultsw
Last active September 14, 2015 16:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save paultsw/592482b68d5a76ec574a to your computer and use it in GitHub Desktop.
Save paultsw/592482b68d5a76ec574a to your computer and use it in GitHub Desktop.
function dist(p,q) {
/*
Euclidean distance function between two points.
*/
return Math.sqrt((p.x - q.x)^2, (p.y - q.y)^2);
}
function average(datapts) {
/*
Takes an array of datapoints, where each
datapoint is of the form {"x": Number, "y": Number},
and returns the average point in the middle.
*/
var x_mid = datapts.reduce(function(a,b) {
return a.x + b.x;
}, 0);
var y_mid = datapts.reduce(function(a,b) {
return a.y + b.y;
}, 0);
return {
"x": (x_mid / datapts.length),
"y": (y_mid / datapts.length)
};
}
function assign(centroids, assignments, datapts) {
/*
Takes:
a list of centroids of length k, each a JSON containing x and y coords.;
a list of assignments, each entry an integer from 0 to k-1,
of the same length as datapts;
a list of datapts, of length n, each a JSON containing x and y coords.
Returns:
an updated assignments list that performs one iteration of re-assignment.
*/
// list of centroid assignments for each datapoint:
var newAssignments = [];
for (var j = 0; j < datapts.length; j++) {
// compute the distance between the datapoint and centroids:
var currAssn = assignments[j];
var distances = centroids.map(function (centroid) {
return dist(datapts[j],centroid);
});
// find the index that minimizes distance
var minIndex = distances.indexOf(Math.min.apply(Math,distances));
// append to new assignments
newAssignments.push(minIndex);
}
// return new assignments list
return newAssignments;
}
function kmeans(k, data) {
/*
Actual k-means algorithm, implemented.
*/
// create k random datapoints to act as the centroids
var centroids = [];
for (var i = 0; i < k; i++) {
centroids.push({
"x": Math.random() * 100,
"y": Math.random() * 100
});
}
// generate initial list of centroid assignments for each datapoint:
var nList = new Array(data.length + 1).join('0').split('').map(parseFloat); // empty array of zeroes
var assignments = assign(centroids, nList, data);
// repeat re-centering process until it stabilizes
var stable = false;
while (!stable) {
var currAssignments = assignments;
// assign each datapoint to its closest centroid:
assignments = assign(centroids, assignments, data);
// move centroids to average position of all their assigned datapts:
centroids.map(function (centroid, index) {
// get list of all datapoints that a given centroid owns:
var assignedDataPts = [];
for (var i = 0; i < data.length; i++) {
if (assignments[i] == index) {
assignedDataPts.push(data[i]);
}
}
// return computed averages:
return average(assignedDataPts);
});
// check if stable:
if (currAssignments == assignments)
stable = true;
}
// return results
return {
"centroids": centroids,
"assignments": assignments,
"datapts": data
};
}
(function () {
/*
Run demonstration and print to console.
*/
// example dataset; all numbers bounded between 0 and 100.
// data points chosen to be surround two clusters.
var data = [
{
"x": 23,
"y": 32
},
{
"x": 66,
"y": 11
},
{
"x": 25,
"y": 35
},
{
"x": 20,
"y": 29
},
{
"x": 28,
"y": 28
},
{
"x": 70,
"y": 10
},
{
"x": 59,
"y": 13
},
{
"x": 25,
"y": 33
}
];
console.log(kmeans(2,data));
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment