Created
February 9, 2025 22:34
-
-
Save petrbouchal/ea4e968d7efa9894c3b95026d33480e3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load necessary libraries | |
library(dplyr) | |
library(tidyr) | |
library(ggupset) | |
library(nanoparquet) | |
library(ggVennDiagram) | |
# Define the function | |
create_upset_plot <- function(df_list, id_column) { | |
# Combine all data frames into one | |
combined_df <- df_list %>% | |
purrr::reduce(full_join, by = id_column) | |
# Create a long format data frame indicating presence of IDs in each data frame | |
long_df <- combined_df %>% | |
pivot_longer(cols = -one_of(id_column), names_to = "data_frame", values_to = "value") %>% | |
mutate(present = !is.na(value)) %>% | |
select(-value) %>% | |
pivot_wider(names_from = data_frame, values_from = present, values_fill = list(FALSE)) | |
# Create a list column for ggupset, excluding the ID column | |
long_df <- long_df %>% | |
rowwise() %>% | |
mutate(data_frames = list(names(long_df)[which(c_across(-one_of(id_column)) & names(long_df) != id_column)])) %>% | |
ungroup() | |
# Create the upset plot | |
ggplot(long_df, aes(x = data_frames)) + | |
geom_bar() + | |
scale_x_upset() + | |
theme_minimal() + | |
labs(title = "UpSet Plot of ID Presence in Data Frames", | |
x = "Data Frame Combinations", | |
y = "Count of IDs") | |
} | |
# Example usage | |
# Assuming df1, df2, df3, df4, and df5 are your data frames and "ID" is the common column | |
df_list <- list( | |
a = tibble(id = c(1, 2, 3, 5, 7), a = 1), | |
b = tibble(id = c(1, 2, 5, 5), b = 0), | |
b = tibble(id = c(1, 2, 7, 9), c = 2) | |
) | |
create_upset_plot(df_list, "id") | |
create_venn_plot <- function(df_list = NULL, | |
id_column = "id", | |
n_intersections = NULL) { | |
df_list <- df_list | |
df_names <- names(df_list) | |
# browser() | |
# Ensure df_names are not NULL | |
if (is.null(df_names)) { | |
df_names <- paste0("df", seq_along(df_list)) | |
} | |
# Create a list of sets for Venn diagram | |
sets <- lapply(df_list, function(df) df[[id_column]]) | |
names(sets) <- df_names | |
sets <- Venn(sets) | |
# Create the Venn diagram | |
venn_plot <- plot_upset(sets, | |
sets.bar.show.numbers = TRUE, | |
nintersects = n_intersections) | |
venn_plot | |
} | |
mmm <- read_parquet("~/cloudfiles/code/data/mo_merge_kombinace.parquet") |> | |
janitor::clean_names() | |
lll <- purrr::map(names(mmm)[2:length(mmm)], | |
\(x) tibble(kod_fo = mmm[mmm[x] == TRUE,"kod_fo"])) | |
names(lll) = names(mmm)[2:length(mmm)] | |
vvv <- create_venn_plot(id_column = "kod_fo", | |
df_list = list( | |
a = lll[[1]], b = lll[[2]], c = lll[[3]], | |
d = lll[[4]], e = lll[[5]], f = lll[[6]] | |
), | |
n_intersections = 15) | |
vvv | |
vvv <- create_venn_plot(id_column = "kod_fo", | |
df_list = lll, | |
n_intersections = 20) | |
vvv | |
vvv <- create_venn_plot(id_column = "kod_fo", | |
df_list = unname(lll), | |
n_intersections = 20) | |
vvv | |
names(vvv) | |
vvv$plotlist[[3]]$layers[[2]]$geom_params$color <- "red" | |
vvv$plotlist[[3]]$layers[[2]]$geom_params$color | |
vvv | |
create_venn_plot <- function(df_list = NULL, id_column, = | |
...) { | |
# Check if df_list is provided | |
if (missing(...)) { | |
# Use the provided list and its names | |
df_list <- df_list | |
df_names <- names(df_list) | |
} else { | |
# Capture the data frames and their names from ... | |
df_list <- lst(...) | |
df_names <- sapply(substitute(list(...))[-1], deparse) | |
} | |
# Ensure df_names are not NULL | |
if (is.null(df_names)) { | |
df_names <- paste0("df", seq_along(df_list)) | |
} | |
# browser() | |
# Create a list of sets for Venn diagram | |
sets <- lapply(df_list, function(df) df[[id_column]]) | |
names(sets) <- df_names | |
# Create the Venn diagram | |
ggVennDiagram(sets, label = "count", force_upset = ) | |
} | |
names(pcr_all)[1] <- "ISP číslo" | |
pcr_all$Dat.Výst. <- as.character(pcr_all$Dat.Výst.) | |
pcr_all$Ošatné <- as.double(pcr_all$Ošatné) | |
pcr_all$Odlučné <- as.double(pcr_all$Odlučné) | |
pcr_all$Odchodné <- as.double(pcr_all$Odchodné) | |
create_venn_plot(id_column = "ISP číslo", df_list = | |
lst(hzs_all, osys, pcr = bind_rows(pcr_all, pcr_bezcosi))) | |
create_upset_plot(id_column = "ISP číslo", df_list = lst(hzs_all, osys, pcr_all, pcr_bezcosi)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment