Skip to content

Instantly share code, notes, and snippets.

@sjs7007
Last active November 21, 2015 23:14
Show Gist options
  • Save sjs7007/9e6148cffd398678a5c5 to your computer and use it in GitHub Desktop.
Save sjs7007/9e6148cffd398678a5c5 to your computer and use it in GitHub Desktop.
#https://stat.ethz.ch/R-manual/R-devel/library/cluster/html/silhouette.html
# install.packages("cluster") # for silhouette
# install.packages("Rmisc")
library(ggplot2)
library(cluster)
library(Rmisc)
#library(cluster)
library(fpc)
library(rgl)
points <- read.csv("dataset1.csv")
points <- points[1:3]
#points = c(1,2,3,4,5,500,502,503,6,9,1000,1002,1003)
#points = c(50,51,52,1000,1001,1002)
SSEValues <- c()
silWidth= c()
#kValues <- seq(ceiling(sqrt(nrow(points)/2))-10,ceiling(sqrt(nrow(points)/2))+10,by=1)
kValues <- 23
#kValues <- 2:(length(points)-1)
for (i in kValues) {
#print(i)
model <- kmeans(points,i)
SSEValues = c(SSEValues,model$tot.withinss)
silWidth <- c(silWidth,summary(silhouette(model$cluster,dist(points)))$avg.width)
}
#plot(kValues,SSEValues,type="o",xlab="Number of clusters",ylab="SSE")
#abline(h=SSEValues)
#plot(kValues,silWidth,type="o",xlab="Number of Clusters",ylab="Average Silhouette Coefficient")
#abline(v=kValues)
#axis(1,at=kValues,kValues)
p1 <- qplot(kValues,SSEValues,type="o",xlab="Number of clusters",ylab="SSE")
p2 <- qplot(kValues,silWidth,type="o",xlab="Number of Clusters",ylab="Average Silhouette Coefficient")
#plot(p1)
multiplot(p1,p2)
kPos = which.max(silWidth) # returns which entry in kValues has highest coefficient
k = kValues[kPos]
plot3d(points,col=model$cluster)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment