Last active
February 20, 2024 17:52
-
-
Save jfy133/ce42d1b5d8b5feae2ed81b0ed1324fbf to your computer and use it in GitHub Desktop.
Throw-away example R/tidyverse script to generate a wikipedia 'band-member timeline' style figure for a git repo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env R | |
library(tidyverse) | |
min_commit_count <- 5 | |
## Get raw data: ` git --no-pager log --pretty=format:"%h%x09%an%x09%ad%x09%s" --date=iso | cut -f 2-3 > pipeline_contribs.tsv | |
data_raw <- read_tsv("pipeline_contribs.tsv", col_names = c("Committer", "Date")) | |
data_committers <- data_raw |> | |
select(Committer) |> | |
unique() |> | |
arrange(Committer) |> | |
write_tsv("pipeline_committers.tsv") | |
## Do manual correction of duplicate names and add inferred institution etc | |
## New table: | Committer | Name | Institute | |
data_committers_corrected <- read_tsv("pipeline_committers_CORRECTED.tsv") | |
# > head(data_committers_corrected) | |
# # A tibble: 6 × 3 | |
# Committer Name Institution | |
# <chr> <chr> <chr> | |
# 1 @alxndrdiaz Alexander Ramos Díaz Solena | |
# 2 Adam Talbot Adam Talbot Seqera | |
# 3 Alex Huebner Alex Hübner MPI-EVA | |
# 4 Alex Hübner Alex Hübner MPI-EVA | |
# 5 Alexander Peltzer Alexander Peltzer BoehringerIngelheim | |
# 6 Alexander Ramos Díaz Alexander Ramos Díaz Solena | |
if ( ncol(data_committers_corrected) == 3 ) { | |
data_raw_cleaned <- data_raw |> | |
left_join(data_committers_corrected) |> | |
select(Name, Institution, Date) | |
} else if ( ncol(data_committers_corrected) == 2 ) { | |
data_raw_cleaned <- data_raw |> | |
left_join(data_committers_corrected) |> | |
select(Name, Date) | |
} else { | |
errorCondition("Unknown number of columns, should be either Committer/Name, or Committer/Name/Institution") | |
} | |
## Committer cleanup | |
data_prepped <- data_raw_cleaned |> filter(!Name %in% c("runner", "Travis CI User", "nf-core-bot", "Nf-core-bot")) | |
## Optional, cleanup based on minimum number of commits | |
if (min_commit_count > 0) { | |
data_mincommit <- data_prepped |> | |
group_by(Name) |> | |
summarise(N_Commits = n()) |> | |
filter(N_Commits > min_commit_count) | |
data_prepped <- data_prepped |> filter(Name %in% c(data_mincommit |> select(Name) |> pull(Name) |> unique())) | |
} | |
## Extract start and end dates, and fix order of oldest to newest contributors | |
if ( ncol(data_committers_corrected) == 3 ) { | |
data_startend <- data_prepped |> | |
group_by(Name, Institution) |> | |
summarise(oldest = as_date(last(Date)), youngest = as_date(first(Date))) |> | |
arrange(desc(oldest)) | |
} else if ( ncol(data_committers_corrected) == 2 ) { | |
data_startend <- data_prepped |> | |
group_by(Name) |> | |
summarise(oldest = as_date(last(Date)), youngest = as_date(first(Date))) |> | |
arrange(desc(oldest)) | |
} | |
if ( ncol(data_committers_corrected) == 3 ) { | |
institutional_order <- data_startend$Institution |> unique() | |
} | |
name_order <- data_startend$Name |> unique() | |
if ( ncol(data_committers_corrected) == 3 ) { | |
data_startend <- data_startend |> | |
mutate(Institution = factor(Institution, levels = institutional_order), | |
Name = factor(Name, name_order)) | |
} else if ( ncol(data_committers_corrected) == 2 ) { | |
data_startend <- data_startend |> | |
mutate(Name = factor(Name, name_order)) | |
} | |
## Prepare labels for unlabelled minor marks | |
breaks <- seq(data_startend$oldest |> sort() |> first(), data_startend$youngest |> sort() |> last(), 1) | |
## Plot: TODO: add unlabelled minor ticks (hard in ggplot2 apparently) | |
nr_committers <- data_prepped$Name |> unique() |> length() | |
if (nr_committers <= 9 ) { | |
selected_palette = "Set1" | |
} else if (nr_committers <= 12) { | |
selected_palette = "Paired" | |
} else { | |
errorCondition("Too many contributors for default ggplot2 palettes, modify script to find a larger palette") | |
} | |
if ( ncol(data_committers_corrected) == 3 ) { | |
final_plot <- ggplot(data_startend, aes(x = oldest, y = Name, color = Institution)) + | |
geom_segment(aes(xend = youngest, yend = Name, linewidth = 10)) + | |
theme_classic() + | |
scale_color_brewer(palette = selected_palette, direction = -1) + | |
scale_x_date(date_breaks = "1 year", date_minor_breaks = "4 months", date_labels = "%Y", labels = label_at(1)) + | |
guides(colour = guide_legend(reverse = TRUE), linewidth = FALSE) + | |
theme(legend.position = "bottom", text = element_text(family = "FreeSans")) + | |
xlab("Timeline") + | |
ylab("Contributor") | |
} else if ( ncol(data_committers_corrected) == 2 ) { | |
final_plot <- ggplot(data_startend, aes(x = oldest, y = Name, color = Name)) + | |
geom_segment(aes(xend = youngest, yend = Name, linewidth = 10)) + | |
theme_classic() + | |
scale_color_brewer(palette = selected_palette, direction = -1) + | |
scale_x_date(date_breaks = "1 year", date_minor_breaks = "4 months", date_labels = "%Y", labels = label_at(1)) + | |
guides(colour = guide_legend(reverse = TRUE), linewidth = FALSE) + | |
theme(legend.position = "bottom", text = element_text(family = "FreeSans")) + | |
xlab("Timeline") + | |
ylab("Contributor") | |
} | |
final_plot | |
ggsave(final_plot, filename = "pipeline_contributors_timeline.png", device = 'png', scale = 1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example output for nf-core/mag