Last active
March 18, 2023 16:53
-
-
Save clairemcwhite/12965e7d221281b32ac7832dbbc8ff80 to your computer and use it in GitHub Desktop.
Demo of going from an igraph dendrogram to a simplified view as a ggraph circlepack as in https://twitter.com/clairemcwhite/status/1079895521446293505
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidygraph) | |
library(tidyverse) | |
library(ggraph) | |
library(dendextend) | |
library(igraph) | |
palette_OkabeIto <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999") | |
cut_df <- function(dendrogram, height, c){ | |
#Function to cut a dendrogram | |
cd <- cutree(dendrogram, h = height) %>% as.data.frame() | |
cd$ID <- row.names(cd) | |
cd <- cd %>% as_tibble() | |
colname <- paste("cut", as.character(round(c, 2)), sep = "_") | |
names(cd) <- c(colname, "ID") | |
return(cd) | |
} | |
cut_dend <- function(dendrogram, cuts){ | |
#Function to cut the dendrogram at particular heights | |
ht <- max(get_nodes_attr(dendrogram, "height")) | |
cut_clusters <- data.frame(ID = as.character()) | |
for (c in cuts){ | |
cut_clusters <- merge(cut_clusters, cut_df(dendrogram, c*ht, c), all=TRUE) | |
} | |
return(cut_clusters) | |
} | |
#The point of this code is to threshold a dendrogram at various points, and plot clusters as subsets of their parent cluster | |
d_iris <- as.dendrogram(hclust(dist(iris))) | |
plot(d_iris, vertex.label = NA) | |
#Choose points up the dendrogram at which to cut into clusters, 1.0 is stem and 0.0 is tips | |
#In this iris dendrogram, 0.6 cuts into two clusters, 0.4 into 4 clusters, and 0.2 into 12 clusters | |
cut_clusters <- cut_dend(d_iris, c(0.6,0.4,0.2)) | |
#Each cluster needs a unique ID, not a plain number | |
#Borrow unique identifier for each column from its column header | |
clusters_uniqued <- cut_clusters %>% | |
gather(clusterset, clusternum, -ID) %>% | |
mutate(clusterid = paste0(clusternum, clusterset)) %>% | |
select(-clusternum) %>% | |
spread( clusterset, clusterid) | |
#All nodes need to be connected back to single origin point | |
#This can be called anything, just not the same thing as the other cluster ids | |
clusters_uniqued$link <- "origin" | |
#create edges in "from", "to" format | |
#https://www.r-graph-gallery.com/334-basic-dendrogram-with-ggraph/ | |
edges_level0_1 = clusters_uniqued %>% select(link, cut_0.6) %>% unique %>% rename(from = link, to = cut_0.6) | |
edges_level1_2 = clusters_uniqued %>% select( cut_0.6, cut_0.4) %>% unique %>% rename(from = cut_0.6, to = cut_0.4) | |
edges_level2_3 = clusters_uniqued %>% select( cut_0.4, cut_0.2) %>% unique %>% rename(from = cut_0.4, to = cut_0.2) | |
#The final level is the individual tips of the dendrogram as the smallest dots in the circlepack | |
edges_level3_final = clusters_uniqued %>% select( cut_0.2, ID) %>% unique %>% rename(from = cut_0.2, to = ID) | |
edge_list=rbind(edges_level0_1, edges_level1_2, edges_level2_3, edges_level3_final ) | |
mygraph <- as_tbl_graph(edge_list) | |
# Get the row names from the iris data set into a normal data column | |
iris_plain <- iris %>% mutate(name = rownames(iris)) | |
# Add on species labels to the nodes | |
mygraph <- mygraph %>% activate("nodes") %>% | |
left_join(iris_plain, by = "name") | |
#If you want to hide the first large circle see: https://www.r-graph-gallery.com/315-hide-first-level-in-circle-packing/ | |
ggraph(mygraph, layout = 'circlepack') + | |
geom_node_circle(aes(fill = Species )) + | |
theme_void() + | |
scale_fill_manual(values = palette_OkabeIto, na.value = "white", limits = c("setosa", "versicolor", "virginica")) + | |
NULL |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Now with species labels
Code updated to use tidygraph that makes adding annotations to nodes easier
Instead of using the igraph function "graph_from_data_frame(edge_list)"
use the tidygraph function "as_tbl_graph(edge_list)"
And also a very lazy MS paint schematic of how the dendrogram is simplified.