Created
January 21, 2022 17:13
-
-
Save jdidion/a36e808c2ec92734dff5061cf11deba1 to your computer and use it in GitHub Desktop.
Creates a simple UpSet plot in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create an UpSet plot given the counts for each intersection. | |
# | |
# The input is a data frame with N + 1 columns, where N is the | |
# number of sets, and M rows, where M is the number of intersections | |
# between the sets. The values in columns 1:N are binary values | |
# indicating which sets are involved in each intersection. The | |
# N+1 column must have the name "count" and holds the count values | |
# for the intersections. You can generate a template counts data | |
# frame using the create_counts_df() function. | |
# | |
# Example: | |
# > counts = create_counts_df(c("A", "B", "C")) | |
# > counts$count = sample(100, nrow(counts)) | |
# > upset_simple(counts) | |
create_counts_df = function(sets) { | |
n = length(sets) | |
m = matrix(0, nrow=n+1, ncol=n) | |
colnames(m) = sets | |
diag(m) = 1 | |
m[n+1,] = 1 | |
for (i in 2:(n-1)) { | |
combo = combn(n, i) | |
mm = matrix(0, nrow=ncol(combo), ncol=n) | |
for (j in 1:ncol(combo)) { | |
mm[j, combo[,j]] = 1 | |
} | |
m = rbind(m, mm) | |
} | |
data.frame(m, count=0) | |
} | |
upset_simple = function(df, xlog10=F, ylog10=F) { | |
n = ncol(df) - 1 | |
df = df[order(apply(df[,1:n], 1, sum), -df$count),] | |
sums = data.frame( | |
x=1:n, | |
y=sapply(1:n, function(i) sum(df[which(df[,i] == 1), "count"])) | |
) | |
if (xlog10) { | |
sums$y = log10(sums$y) | |
ylabel = "Set Size (log 10)" | |
} else { | |
ylabel = "Set Size" | |
} | |
if (ylog10) { | |
df$count = log10(df$count) | |
xlabel = "Intersection Size (log10)" | |
} else { | |
xlabel = "Intersection Size" | |
} | |
p1 = ggplot(sums, aes(x=x, y=y)) + | |
geom_bar(stat="identity", position = position_stack(reverse = TRUE), width=0.5) + | |
coord_flip() + | |
theme_minimal() + | |
theme( | |
axis.title.y=element_blank(), | |
panel.grid.major.y=element_blank(), | |
panel.grid.minor.y=element_blank(), | |
plot.margin = unit(c(0, 0, 0, 0), "null") | |
) + | |
ylab(ylabel) + | |
scale_x_continuous(position="top", breaks=1:n, labels=colnames(df)[1:n], limits=c(0.5, n+0.5)) | |
p2 = ggplot(df, aes(x=1:nrow(df), y=count)) + | |
geom_bar(stat="identity", width=0.5) + | |
theme_minimal() + | |
theme( | |
axis.text.x = element_blank(), | |
axis.ticks.x = element_blank(), | |
axis.title.x = element_blank(), | |
plot.margin = unit(c(0, 0, 0, 0), "null") | |
) + | |
labs(title=NULL, x=NULL, y=NULL) + | |
ylab(xlabel) + | |
scale_y_continuous(expand=c(0, 0)) | |
cowplot::plot_grid( | |
NULL, p2, p1, make_matrix_plot(df), align="hv", axis="tblr", nrow=2, ncol=2, rel_widths = c(2, 3), | |
rel_heights = c(3, 2) | |
) + | |
theme(panel.spacing=unit(0, "lines")) | |
} | |
make_mat_data = function(df) { | |
n = ncol(df)-1 | |
df = df[,1:n] | |
df2 = NULL | |
for (i in 1:nrow(df)) { | |
w = which(df[i,] == 1) | |
name = paste(colnames(df)[w], collapse="&") | |
for (j in w) { | |
df2 = rbind(df2, data.frame(x=i, y=j, group=name)) | |
} | |
} | |
df2 | |
} | |
make_shading_data = function(nrows, ncols) { | |
data.frame( | |
xmin=0.5, | |
xmax=ncols+0.5, | |
ymin=0.5:(nrows-0.5), | |
ymax=1.5:(nrows+0.5), | |
color=c("gray", "white")[(0:(nrows-1) %% 2) + 1], | |
alpha=0.5 | |
) | |
} | |
make_matrix_plot <- function(df, point_size=3, line_size=0.75, text_scale=1, shading_data=NULL) { | |
nsets = ncol(df) - 1 | |
nbars = nrow(df) | |
Mat_data = make_mat_data(df) | |
if (is.null(shading_data)) { | |
shading_data = make_shading_data(nsets, nbars) | |
} | |
all_points = expand.grid(y=1:nsets, x=1:nbars) | |
ggplot_gtable(ggplot_build( | |
ggplot() | |
+ theme_minimal() | |
+ theme( | |
panel.grid.major=element_blank(), | |
panel.grid.minor=element_blank(), | |
axis.text.x=element_blank(), | |
axis.text.y=element_blank(), | |
plot.margin = unit(c(0, 0, 0, 0), "null") | |
) | |
+ xlab("") | |
+ ylab("") | |
+ scale_y_continuous( | |
labels=colnames(df)[1:nsets], | |
breaks=c(1:nsets), | |
limits=c(0.5,(nsets+0.5)), | |
expand=c(0, 0) | |
) | |
+ scale_x_continuous(limits=c(0, nbars+1), expand=c(0, 0)) | |
+ geom_rect( | |
data=shading_data, | |
aes_string( | |
xmin="xmin", xmax="xmax", ymin="ymin", ymax="ymax" | |
), fill=shading_data$color, alpha=0.5 | |
) | |
+ geom_point( | |
data=all_points, aes_string(x="x", y="y"), | |
color="darkgrey", size=point_size, shape=16, alpha=0.5 | |
) | |
+ geom_point( | |
data=Mat_data, aes_string(x="x", y="y"), | |
size=point_size, shape=16 | |
) | |
+ geom_line( | |
data=Mat_data, | |
aes_string(group="group", x="x", y="y"), | |
size=line_size | |
) | |
+ scale_color_identity() | |
)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment