Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
ks.default <- function(rows) seq(2, max(3, rows %/% 4))
many_kmeans <- function(x, ks = ks.default(nrow(x)), ...) {
ldply(seq_along(ks), function(i) {
cl <- kmeans(x, centers = ks[i], ...)
data.frame(obs = seq_len(nrow(x)), i = i, k = ks[i], cluster = cl$cluster)
})
}
all_hclust <- function(x, ks = ks.default(nrow(x)), point.dist = "euclidean", cluster.dist = "ward") {
d <- dist(x, method = point.dist)
cl <- hclust(d, method = cluster.dist)
ldply(seq_along(ks), function(i) {
data.frame(
obs = seq_len(nrow(x)), i = i, k = ks[i],
cluster = cutree(cl, ks[i])
)
})
}
center <- function(x) x - mean(range(x))
#' @param clusters data frame giving cluster assignments as produced by
#' many_kmeans or all_hclust
#' @param y value to plot on the y-axis. Should be length
#' \code{max(clusters$obs)}
clustergram <- function(clusters, y, line.width = NULL) {
clusters$y <- y[clusters$obs]
clusters$center <- ave(clusters$y, clusters$i, clusters$cluster)
if (is.null(line.width)) {
line.width <- 0.5 * diff(range(clusters$center, na.rm = TRUE)) /
length(unique(clusters$obs))
}
clusters$line.width <- line.width
# Adjust center positions so that they don't overlap
clusters <- clusters[with(clusters, order(i, center, y, obs)), ]
clusters <- ddply(clusters, c("i", "cluster"), transform,
adj = center + (line.width * center(seq_along(y)))
)
structure(clusters,
class = c("clustergram", class(clusters)),
line.width = line.width)
}
plot.clustergram <- function(x) {
i_pos <- !duplicated(x$i)
means <- ddply(x, c("cluster", "i"), summarise,
min = min(adj), max = max(adj))
ggplot(x, aes(i)) +
geom_ribbon(aes(y = adj, group = obs, fill = y, ymin = adj - line.width/2, ymax = adj + line.width/2, colour = y)) +
geom_errorbar(aes(ymin = min, ymax = max), data = means, width = 0.1) +
scale_x_continuous("cluster", breaks = x$i[i_pos], labels = x$k[i_pos]) +
labs(y = "Cluster average", colour = "Obs\nvalue", fill = "Obs\nvalue")
}
iris_s <- scale(iris[,-5])
k_def <- many_kmeans(iris_s)
k_10 <- many_kmeans(iris_s, 2:10)
k_rep <- many_kmeans(iris_s, rep(4, 5))
h_def <- all_hclust(iris_s)
h_10 <- all_hclust(iris_s, 2:10)
h_5 <- all_hclust(iris_s, seq(2, 20, by = 4))
pr <- princomp(iris_s)
pr1 <- predict(pr)[, 1]
pr2 <- predict(pr)[, 2]
plot(clustergram(k_def, pr1))
plot(clustergram(k_rep, pr1))
plot(clustergram(k_rep, pr2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.