Skip to content

Instantly share code, notes, and snippets.

@jfy133
Last active February 20, 2024 17:52
Show Gist options
  • Save jfy133/ce42d1b5d8b5feae2ed81b0ed1324fbf to your computer and use it in GitHub Desktop.
Save jfy133/ce42d1b5d8b5feae2ed81b0ed1324fbf to your computer and use it in GitHub Desktop.
Throw-away example R/tidyverse script to generate a wikipedia 'band-member timeline' style figure for a git repo
#!/usr/bin/env R
library(tidyverse)
min_commit_count <- 5
## Get raw data: ` git --no-pager log --pretty=format:"%h%x09%an%x09%ad%x09%s" --date=iso | cut -f 2-3 > pipeline_contribs.tsv
data_raw <- read_tsv("pipeline_contribs.tsv", col_names = c("Committer", "Date"))
data_committers <- data_raw |>
select(Committer) |>
unique() |>
arrange(Committer) |>
write_tsv("pipeline_committers.tsv")
## Do manual correction of duplicate names and add inferred institution etc
## New table: | Committer | Name | Institute
data_committers_corrected <- read_tsv("pipeline_committers_CORRECTED.tsv")
# > head(data_committers_corrected)
# # A tibble: 6 × 3
# Committer Name Institution
# <chr> <chr> <chr>
# 1 @alxndrdiaz Alexander Ramos Díaz Solena
# 2 Adam Talbot Adam Talbot Seqera
# 3 Alex Huebner Alex Hübner MPI-EVA
# 4 Alex Hübner Alex Hübner MPI-EVA
# 5 Alexander Peltzer Alexander Peltzer BoehringerIngelheim
# 6 Alexander Ramos Díaz Alexander Ramos Díaz Solena
if ( ncol(data_committers_corrected) == 3 ) {
data_raw_cleaned <- data_raw |>
left_join(data_committers_corrected) |>
select(Name, Institution, Date)
} else if ( ncol(data_committers_corrected) == 2 ) {
data_raw_cleaned <- data_raw |>
left_join(data_committers_corrected) |>
select(Name, Date)
} else {
errorCondition("Unknown number of columns, should be either Committer/Name, or Committer/Name/Institution")
}
## Committer cleanup
data_prepped <- data_raw_cleaned |> filter(!Name %in% c("runner", "Travis CI User", "nf-core-bot", "Nf-core-bot"))
## Optional, cleanup based on minimum number of commits
if (min_commit_count > 0) {
data_mincommit <- data_prepped |>
group_by(Name) |>
summarise(N_Commits = n()) |>
filter(N_Commits > min_commit_count)
data_prepped <- data_prepped |> filter(Name %in% c(data_mincommit |> select(Name) |> pull(Name) |> unique()))
}
## Extract start and end dates, and fix order of oldest to newest contributors
if ( ncol(data_committers_corrected) == 3 ) {
data_startend <- data_prepped |>
group_by(Name, Institution) |>
summarise(oldest = as_date(last(Date)), youngest = as_date(first(Date))) |>
arrange(desc(oldest))
} else if ( ncol(data_committers_corrected) == 2 ) {
data_startend <- data_prepped |>
group_by(Name) |>
summarise(oldest = as_date(last(Date)), youngest = as_date(first(Date))) |>
arrange(desc(oldest))
}
if ( ncol(data_committers_corrected) == 3 ) {
institutional_order <- data_startend$Institution |> unique()
}
name_order <- data_startend$Name |> unique()
if ( ncol(data_committers_corrected) == 3 ) {
data_startend <- data_startend |>
mutate(Institution = factor(Institution, levels = institutional_order),
Name = factor(Name, name_order))
} else if ( ncol(data_committers_corrected) == 2 ) {
data_startend <- data_startend |>
mutate(Name = factor(Name, name_order))
}
## Prepare labels for unlabelled minor marks
breaks <- seq(data_startend$oldest |> sort() |> first(), data_startend$youngest |> sort() |> last(), 1)
## Plot: TODO: add unlabelled minor ticks (hard in ggplot2 apparently)
nr_committers <- data_prepped$Name |> unique() |> length()
if (nr_committers <= 9 ) {
selected_palette = "Set1"
} else if (nr_committers <= 12) {
selected_palette = "Paired"
} else {
errorCondition("Too many contributors for default ggplot2 palettes, modify script to find a larger palette")
}
if ( ncol(data_committers_corrected) == 3 ) {
final_plot <- ggplot(data_startend, aes(x = oldest, y = Name, color = Institution)) +
geom_segment(aes(xend = youngest, yend = Name, linewidth = 10)) +
theme_classic() +
scale_color_brewer(palette = selected_palette, direction = -1) +
scale_x_date(date_breaks = "1 year", date_minor_breaks = "4 months", date_labels = "%Y", labels = label_at(1)) +
guides(colour = guide_legend(reverse = TRUE), linewidth = FALSE) +
theme(legend.position = "bottom", text = element_text(family = "FreeSans")) +
xlab("Timeline") +
ylab("Contributor")
} else if ( ncol(data_committers_corrected) == 2 ) {
final_plot <- ggplot(data_startend, aes(x = oldest, y = Name, color = Name)) +
geom_segment(aes(xend = youngest, yend = Name, linewidth = 10)) +
theme_classic() +
scale_color_brewer(palette = selected_palette, direction = -1) +
scale_x_date(date_breaks = "1 year", date_minor_breaks = "4 months", date_labels = "%Y", labels = label_at(1)) +
guides(colour = guide_legend(reverse = TRUE), linewidth = FALSE) +
theme(legend.position = "bottom", text = element_text(family = "FreeSans")) +
xlab("Timeline") +
ylab("Contributor")
}
final_plot
ggsave(final_plot, filename = "pipeline_contributors_timeline.png", device = 'png', scale = 1)
@jfy133
Copy link
Author

jfy133 commented Feb 19, 2024

Example output for nf-core/mag

mag_contributors_plot

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment