Created
April 9, 2024 20:15
-
-
Save tjmahr/201ae8bc37deb2cecce3a14654a7f796 to your computer and use it in GitHub Desktop.
some r code i made to dredge up older git versions of a file into a tibble
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' @param path current path of format | |
#' @param date_format date format to request from git. This format is given to | |
#' git to set the format of the dates it returns and then given to readr to | |
#' tell it how to parse the dates into date-times. | |
#' @param f_split optional function to help reduce `system()` calls. By default | |
#' (`NULL`), one `system()` call is made for each version of the file. | |
#' Alternatively, we can ask git to give the contents of the all files at once | |
#' with one `system()` call. Then `f_split()` creates a list of file contents. | |
#' @return a tibble with one row commit with commit, commit date, historical | |
#' file path, and historical file contents for the desired file. | |
get_file_history <- function(path, date_format = "%Y%m%d%H%M", f_split = NULL) { | |
if (!file.exists(path)) cli::cli_abort("{.path {path}} does not exist") | |
# Gameplan: Get lines with format: | |
# | |
# {commit1} {commit_msg1} | |
# {path_at_commit1} | |
# {commit2} {commit_msg2} | |
# {path_at_commit2} | |
# ... | |
# | |
# Parse this output to create further commands that retrieve file contents. | |
results <- glue::glue("git log --oneline --name-only --follow -- {path}") |> | |
system(intern = TRUE) | |
data <- tibble::tibble( | |
path_current = path, | |
commit = results |> | |
vec_odds() |> | |
stringr::str_split_i(" ", 1), | |
commit_date = get_commit_date(commit, date_format), | |
commit_msg = results |> | |
vec_odds() |> | |
stringr::str_split_fixed(" ", 2) |> | |
_[, 2], | |
path_at_commit = vec_evens(results), | |
contents = get_historical_file_content(commit, path_at_commit, f_split) | |
) | |
data | |
} | |
get_commit_date <- function(commit, date_format = "%Y%m%d%H%M") { | |
xs <- paste0(commit, collapse = " ") | |
glue::glue("git show -s --date=format:{date_format} --format=%cd {xs}") |> | |
system(intern = TRUE) |> | |
readr::parse_datetime(format = date_format) | |
} | |
# Avoid multiple system() calls by using a split function | |
get_historical_file_content <- function(commit, path_at_commit, f_split = NULL) { | |
xs <- paste0(commit, ":", path_at_commit, collapse = " ") | |
if (is.null(f_split)) { | |
l <- glue::glue("git show {commit}:{path_at_commit}") |> | |
lapply(system, intern = TRUE) | |
} else { | |
l <- glue::glue("git show {xs}") |> | |
system(intern = TRUE) |> | |
f_split() | |
} | |
stats::setNames(l, xs) | |
} | |
vec_odds <- function(xs) xs[seq(1, length(xs), by = 2)] | |
vec_evens <- function(xs) xs[seq(2, length(xs), by = 2)] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment