Skip to content

Instantly share code, notes, and snippets.

@petrbouchal
Created February 9, 2025 22:34
Show Gist options
  • Save petrbouchal/ea4e968d7efa9894c3b95026d33480e3 to your computer and use it in GitHub Desktop.
Save petrbouchal/ea4e968d7efa9894c3b95026d33480e3 to your computer and use it in GitHub Desktop.
# Load necessary libraries
library(dplyr)
library(tidyr)
library(ggupset)
library(nanoparquet)
library(ggVennDiagram)
# Define the function
create_upset_plot <- function(df_list, id_column) {
# Combine all data frames into one
combined_df <- df_list %>%
purrr::reduce(full_join, by = id_column)
# Create a long format data frame indicating presence of IDs in each data frame
long_df <- combined_df %>%
pivot_longer(cols = -one_of(id_column), names_to = "data_frame", values_to = "value") %>%
mutate(present = !is.na(value)) %>%
select(-value) %>%
pivot_wider(names_from = data_frame, values_from = present, values_fill = list(FALSE))
# Create a list column for ggupset, excluding the ID column
long_df <- long_df %>%
rowwise() %>%
mutate(data_frames = list(names(long_df)[which(c_across(-one_of(id_column)) & names(long_df) != id_column)])) %>%
ungroup()
# Create the upset plot
ggplot(long_df, aes(x = data_frames)) +
geom_bar() +
scale_x_upset() +
theme_minimal() +
labs(title = "UpSet Plot of ID Presence in Data Frames",
x = "Data Frame Combinations",
y = "Count of IDs")
}
# Example usage
# Assuming df1, df2, df3, df4, and df5 are your data frames and "ID" is the common column
df_list <- list(
a = tibble(id = c(1, 2, 3, 5, 7), a = 1),
b = tibble(id = c(1, 2, 5, 5), b = 0),
b = tibble(id = c(1, 2, 7, 9), c = 2)
)
create_upset_plot(df_list, "id")
create_venn_plot <- function(df_list = NULL,
id_column = "id",
n_intersections = NULL) {
df_list <- df_list
df_names <- names(df_list)
# browser()
# Ensure df_names are not NULL
if (is.null(df_names)) {
df_names <- paste0("df", seq_along(df_list))
}
# Create a list of sets for Venn diagram
sets <- lapply(df_list, function(df) df[[id_column]])
names(sets) <- df_names
sets <- Venn(sets)
# Create the Venn diagram
venn_plot <- plot_upset(sets,
sets.bar.show.numbers = TRUE,
nintersects = n_intersections)
venn_plot
}
mmm <- read_parquet("~/cloudfiles/code/data/mo_merge_kombinace.parquet") |>
janitor::clean_names()
lll <- purrr::map(names(mmm)[2:length(mmm)],
\(x) tibble(kod_fo = mmm[mmm[x] == TRUE,"kod_fo"]))
names(lll) = names(mmm)[2:length(mmm)]
vvv <- create_venn_plot(id_column = "kod_fo",
df_list = list(
a = lll[[1]], b = lll[[2]], c = lll[[3]],
d = lll[[4]], e = lll[[5]], f = lll[[6]]
),
n_intersections = 15)
vvv
vvv <- create_venn_plot(id_column = "kod_fo",
df_list = lll,
n_intersections = 20)
vvv
vvv <- create_venn_plot(id_column = "kod_fo",
df_list = unname(lll),
n_intersections = 20)
vvv
names(vvv)
vvv$plotlist[[3]]$layers[[2]]$geom_params$color <- "red"
vvv$plotlist[[3]]$layers[[2]]$geom_params$color
vvv
create_venn_plot <- function(df_list = NULL, id_column, =
...) {
# Check if df_list is provided
if (missing(...)) {
# Use the provided list and its names
df_list <- df_list
df_names <- names(df_list)
} else {
# Capture the data frames and their names from ...
df_list <- lst(...)
df_names <- sapply(substitute(list(...))[-1], deparse)
}
# Ensure df_names are not NULL
if (is.null(df_names)) {
df_names <- paste0("df", seq_along(df_list))
}
# browser()
# Create a list of sets for Venn diagram
sets <- lapply(df_list, function(df) df[[id_column]])
names(sets) <- df_names
# Create the Venn diagram
ggVennDiagram(sets, label = "count", force_upset = )
}
names(pcr_all)[1] <- "ISP číslo"
pcr_all$Dat.Výst. <- as.character(pcr_all$Dat.Výst.)
pcr_all$Ošatné <- as.double(pcr_all$Ošatné)
pcr_all$Odlučné <- as.double(pcr_all$Odlučné)
pcr_all$Odchodné <- as.double(pcr_all$Odchodné)
create_venn_plot(id_column = "ISP číslo", df_list =
lst(hzs_all, osys, pcr = bind_rows(pcr_all, pcr_bezcosi)))
create_upset_plot(id_column = "ISP číslo", df_list = lst(hzs_all, osys, pcr_all, pcr_bezcosi))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment