Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@hadley
Created July 3, 2010 15:57
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hadley/462654 to your computer and use it in GitHub Desktop.
Save hadley/462654 to your computer and use it in GitHub Desktop.
data <- iris
d <- dist(scale(iris[, 1:4]))
h <- hclust(d, "ward")
data$ORDER <- order(h$order)
data$HEIGHT <- 0
data$LEVEL <- 0
data$POINTS <- 1
combine <- function(x, weights) {
if (is.factor(x)) {
if (length(unique(x)) == 1) x[1] else NA
} else {
weighted.mean(x, weights, na.rm = TRUE)
}
}
combine_rows <- function(df) {
combined <- as.data.frame(llply(df, combine, weights = df$POINTS))
combined$POINTS <- sum(df$POINTS)
combined
}
for (i in 1:nrow(h$merge)) {
newr <- combine_rows(data[as.character(-h$merge[i,]),])
newr$HEIGHT <- h$height[i]
newr$LEVEL <- i
rownames(newr) <- as.character(-i)
data <- rbind(data, newr)
}
data$node <- (as.numeric(rownames(data)) < 0) + 0
leaves <- subset(data, node == 0)
nodes <- subset(data, node == 1)
# < 0 = observation, > 0 = cluster
merge <- h$merge
merge[merge > 0] <- merge[merge > 0] + nrow(leaves)
merge[merge < 0] <- abs(merge[merge < 0])
lines <- data.frame(rbind(
cbind(nodes[, ], data[merge[, 1], ]),
cbind(nodes[, ], data[merge[, 2], ])
))
ggplot(, aes(ORDER, POINTS)) +
geom_point(data = leaves) +
geom_point(data = nodes, size = 0.5) +
geom_segment(aes(xend=ORDER.1, yend=POINTS.1, group=1), data=lines) +
xlab(NULL) +
scale_y_continuous("Number of points in cluster")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment