Skip to content

Instantly share code, notes, and snippets.

@MonteShaffer
Last active October 22, 2017 17:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MonteShaffer/b4b86cf39789281b7e5f52fc0ed85d94 to your computer and use it in GitHub Desktop.
Save MonteShaffer/b4b86cf39789281b7e5f52fc0ed85d94 to your computer and use it in GitHub Desktop.
# I wrote this in 2009 as part of my M.S. Stats - Statistical Computing Course
# Not all the bells and whistles, but should be understandable
# © Monte J. Shaffer
# monte.shaffer@gmail.com
# MIT license
myKmeans = function(data,clusters,maxIter=25)
{
data = as.matrix(data);
n = nrow(data);
p = ncol(data); # p-dim space
## step 0: initialize k-points as centers
if(length(clusters)==1)
{
k = clusters;
init = data[sample(1:n, k),];
}
else
{
# cluster centers are passed
k = nrow(cluster);
init = clusters;
}
myDist = function(x,y)
{
# simple Euclidean
tDist = 0;
for(d in 1:length(x))
{
tDist = tDist + (x[d]-y[d])^2;
}
tDist; #sqrt not necessary as WSS is not calculated, just to find MIN
}
kOneStep = function(data,init,n,k,myReturn="centroid")
{
# init is centroids
## step 1: classify n points to k-centers (init)
## grab distances [could use mahalanobis(, but just dist(
## which defaults to Euclidean
newData = matrix(0, nrow=n,ncol=k);
# loop and calculate every pairwise distance, could put into vector,
# but this will work [although processor expensive]
for(i in 1:n)
{
for(j in 1:k)
{
newData[i,j] = myDist(data[i,],init[j,]);
}
}
## classify to classes
newClass = matrix(0, nrow=1,ncol=k);
## loop through distances and classify based on min
for(i in 1:n)
{
myMin = min(newData[i,]);
for(j in 1:k)
{
if(newData[i,j]==myMin)
{
myIndex = j;
break;
}
}
newClass[i]=myIndex;
}
if(myReturn != "centroid")
{
newClass;
}
else
{
## step 2: compute new center
myData = cbind(newClass,data);
#print(myData);
centroid = matrix(0, nrow=k,ncol=p);
for(j in 1:k)
{
tempData = subset(myData,newClass==j)[,-1];
#print(tempData);
for(m in 1:p)
{
centroid[j,m] = mean(tempData[,m]);
}
}
centroid;
}
}
for(iter in 1:maxIter)
{
init = kOneStep(data,init,n,k);
}
# one more time to get classifications
newClass = kOneStep(data,init,n,k,"class");
out = list(cluster = newClass, centers = init);
class(out) = "myKmeans";
out;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment