Skip to content

Instantly share code, notes, and snippets.

@jdidion
Last active March 22, 2019 20:59
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jdidion/23b2643680dedf7abdc64b239cc92372 to your computer and use it in GitHub Desktop.
Save jdidion/23b2643680dedf7abdc64b239cc92372 to your computer and use it in GitHub Desktop.
Create an UpSet plot given the counts for each intersection
# Create an UpSet plot given the counts for each intersection.
#
# The input is a data frame with N + 1 columns, where N is the
# number of sets, and M rows, where M is the number of intersections
# between the sets. The values in columns 1:N are binary values
# indicating which sets are involved in each intersection. The
# N+1 column must have the name "count" and holds the count values
# for the intersections. You can generate a template counts data
# frame using the create_counts_df() function.
#
# Example:
# > counts = create_counts_df(c("A", "B", "C"))
# > counts$count = sample(100, nrow(counts))
# > upset_simple(counts)
create_counts_df = function(sets) {
n = length(sets)
m = matrix(0, nrow=n+1, ncol=n)
colnames(m) = sets
diag(m) = 1
m[n+1,] = 1
for (i in 2:(n-1)) {
combo = combn(n, i)
mm = matrix(0, nrow=ncol(combo), ncol=n)
for (j in 1:ncol(combo)) {
mm[j, combo[,j]] = 1
}
m = rbind(m, mm)
}
data.frame(m, count=0)
}
upset_simple = function(df, xlog10=F, ylog10=F) {
n = ncol(df) - 1
df = df[order(apply(df[,1:n], 1, sum), -df$count),]
sums = data.frame(
x=1:n,
y=sapply(1:n, function(i) sum(df[which(df[,i] == 1), "count"]))
)
if (xlog10) {
sums$y = log10(sums$y)
ylabel = "Set Size (log 10)"
} else {
ylabel = "Set Size"
}
if (ylog10) {
df$count = log10(df$count)
xlabel = "Intersection Size (log10)"
} else {
xlabel = "Intersection Size"
}
p1 = ggplot(sums, aes(x=x, y=y)) +
geom_bar(stat="identity", position = position_stack(reverse = TRUE), width=0.5) +
coord_flip() +
theme_minimal() +
theme(
axis.title.y=element_blank(),
panel.grid.major.y=element_blank(),
panel.grid.minor.y=element_blank(),
plot.margin = unit(c(0, 0, 0, 0), "null")
) +
ylab(ylabel) +
scale_x_continuous(position="top", breaks=1:n, labels=colnames(df)[1:n], limits=c(0.5, n+0.5))
p2 = ggplot(df, aes(x=1:nrow(df), y=count)) +
geom_bar(stat="identity", width=0.5) +
theme_minimal() +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
plot.margin = unit(c(0, 0, 0, 0), "null")
) +
labs(title=NULL, x=NULL, y=NULL) +
ylab(xlabel) +
scale_y_continuous(expand=c(0, 0))
cowplot::plot_grid(
NULL, p2, p1, make_matrix_plot(df), align="hv", axis="tblr", nrow=2, ncol=2, rel_widths = c(2, 3),
rel_heights = c(3, 2)
) +
theme(panel.spacing=unit(0, "lines"))
}
make_mat_data = function(df) {
n = ncol(df)-1
df = df[,1:n]
df2 = NULL
for (i in 1:nrow(df)) {
w = which(df[i,] == 1)
name = paste(colnames(df)[w], collapse="&")
for (j in w) {
df2 = rbind(df2, data.frame(x=i, y=j, group=name))
}
}
df2
}
make_shading_data = function(nrows, ncols) {
data.frame(
xmin=0.5,
xmax=ncols+0.5,
ymin=0.5:(nrows-0.5),
ymax=1.5:(nrows+0.5),
color=c("gray", "white")[(0:(nrows-1) %% 2) + 1],
alpha=0.5
)
}
make_matrix_plot <- function(df, point_size=3, line_size=0.75, text_scale=1, shading_data=NULL) {
nsets = ncol(df) - 1
nbars = nrow(df)
Mat_data = make_mat_data(df)
if (is.null(shading_data)) {
shading_data = make_shading_data(nsets, nbars)
}
all_points = expand.grid(y=1:nsets, x=1:nbars)
ggplot_gtable(ggplot_build(
ggplot()
+ theme_minimal()
+ theme(
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
plot.margin = unit(c(0, 0, 0, 0), "null")
)
+ xlab("")
+ ylab("")
+ scale_y_continuous(
labels=colnames(df)[1:nsets],
breaks=c(1:nsets),
limits=c(0.5,(nsets+0.5)),
expand=c(0, 0)
)
+ scale_x_continuous(limits=c(0, nbars+1), expand=c(0, 0))
+ geom_rect(
data=shading_data,
aes_string(
xmin="xmin", xmax="xmax", ymin="ymin", ymax="ymax"
), fill=shading_data$color, alpha=0.5
)
+ geom_point(
data=all_points, aes_string(x="x", y="y"),
color="darkgrey", size=point_size, shape=16, alpha=0.5
)
+ geom_point(
data=Mat_data, aes_string(x="x", y="y"),
size=point_size, shape=16
)
+ geom_line(
data=Mat_data,
aes_string(group="group", x="x", y="y"),
size=line_size
)
+ scale_color_identity()
))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment