Skip to content

Instantly share code, notes, and snippets.

@tjmahr
Created April 9, 2024 20:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tjmahr/201ae8bc37deb2cecce3a14654a7f796 to your computer and use it in GitHub Desktop.
Save tjmahr/201ae8bc37deb2cecce3a14654a7f796 to your computer and use it in GitHub Desktop.
some r code i made to dredge up older git versions of a file into a tibble
#' @param path current path of format
#' @param date_format date format to request from git. This format is given to
#' git to set the format of the dates it returns and then given to readr to
#' tell it how to parse the dates into date-times.
#' @param f_split optional function to help reduce `system()` calls. By default
#' (`NULL`), one `system()` call is made for each version of the file.
#' Alternatively, we can ask git to give the contents of the all files at once
#' with one `system()` call. Then `f_split()` creates a list of file contents.
#' @return a tibble with one row commit with commit, commit date, historical
#' file path, and historical file contents for the desired file.
get_file_history <- function(path, date_format = "%Y%m%d%H%M", f_split = NULL) {
if (!file.exists(path)) cli::cli_abort("{.path {path}} does not exist")
# Gameplan: Get lines with format:
#
# {commit1} {commit_msg1}
# {path_at_commit1}
# {commit2} {commit_msg2}
# {path_at_commit2}
# ...
#
# Parse this output to create further commands that retrieve file contents.
results <- glue::glue("git log --oneline --name-only --follow -- {path}") |>
system(intern = TRUE)
data <- tibble::tibble(
path_current = path,
commit = results |>
vec_odds() |>
stringr::str_split_i(" ", 1),
commit_date = get_commit_date(commit, date_format),
commit_msg = results |>
vec_odds() |>
stringr::str_split_fixed(" ", 2) |>
_[, 2],
path_at_commit = vec_evens(results),
contents = get_historical_file_content(commit, path_at_commit, f_split)
)
data
}
get_commit_date <- function(commit, date_format = "%Y%m%d%H%M") {
xs <- paste0(commit, collapse = " ")
glue::glue("git show -s --date=format:{date_format} --format=%cd {xs}") |>
system(intern = TRUE) |>
readr::parse_datetime(format = date_format)
}
# Avoid multiple system() calls by using a split function
get_historical_file_content <- function(commit, path_at_commit, f_split = NULL) {
xs <- paste0(commit, ":", path_at_commit, collapse = " ")
if (is.null(f_split)) {
l <- glue::glue("git show {commit}:{path_at_commit}") |>
lapply(system, intern = TRUE)
} else {
l <- glue::glue("git show {xs}") |>
system(intern = TRUE) |>
f_split()
}
stats::setNames(l, xs)
}
vec_odds <- function(xs) xs[seq(1, length(xs), by = 2)]
vec_evens <- function(xs) xs[seq(2, length(xs), by = 2)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment