Skip to content

Instantly share code, notes, and snippets.

@noamross
Last active November 3, 2023 18:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save noamross/e05d3884b8ec8b4dce8380e24163851d to your computer and use it in GitHub Desktop.
Save noamross/e05d3884b8ec8b4dce8380e24163851d to your computer and use it in GitHub Desktop.
Read previous versions of targets from git repository
# Requires a development branch of git2r for now. Do `remotes::install_github('ropensci/git2r@raw-blob-content')` or
# `renv::install('ropensci/git2r@raw-blob-content')`. This installs from source.
# TODO:
# - Add a function to extract an arbitrary file/folder, e.g., copy_git_file(path, ref, repo)
# - Have tar_*_version check if the target is a local file and in that case extract it and return a temporary path (turn on/off with arguments)
# - Guardrails and informative error messages cases such as: Not a git repository, target not present at the given reference, remote version not available. What happens with shallow clones?
# - Maybe only extract stuff in the `objects/` directory when needed
library(git2r)
library(fs)
library(targets)
#' Get a target from a given git reference
#'
#' @param name The name of the target
#' @param ref The git reference: tag, branch, SHA, or revision like HEAD~1 (see https://git-scm.com/docs/git-rev-parse.html#_specifying_revisions)
#' @param ... Additional arguments passed to \code{\link{tar_load}} or \code{\link{tar_read}}
#' @export
tar_load_version <- function(name, ref = "HEAD", envir = parent.frame(), ..., repo = ".") {
name <- targets::tar_deparse_language(substitute(name))
tmp_store <- make_temporary_store(ref, repo)
tar_load(name, store = tmp_store, envir = envir, ...)
}
#' @export
#' @rdname tar_load_version
tar_read_version <- function(name, ref = "HEAD", repo = ".", ...) {
name <- targets::tar_deparse_language(substitute(name))
tmp_store <- make_temporary_store(ref, repo)
tar_read_raw(name, store = tmp_store, ...)
}
make_temporary_store <- function(ref = "HEAD", repo = ".", store = targets::tar_path_store()) {
current_store <- store
store_git_obj <- get_git_obj_at_reference(path = fs::path(current_store), ref = ref, repo = repo)
tmp_store_path <- fs::path(tar_tempdir(), sha(store_git_obj))
# Don't bother copying if the store is already in the tempdir
if(fs::dir_exists(tmp_store_path)) {
return(tmp_store_path)
} else {
tmp_store <- copy_git_tree(store_git_obj, dirname = sha(store_git_obj), path = tar_tempdir())
}
tmp_store
}
#' @returns A git2r object representing the file at the given reference, either a blob or, for a directory, a tree
get_git_obj_at_reference <- function(path, ref, repo = ".") {
# Parse the reference name to a commit object
if(is.character(ref)) ref <- revparse_single(repo, ref)
path <- fs::path(path)
path_parts <- fs::path_split(path)[[1]]
repo <- git2r::repository(repo)
repo_tree <- git2r::tree(ref)
# Find the file in the tree
obj <- repo_tree
splits <- path_parts
while (length(splits) >= 1) {
obj <- obj[splits[1]]
splits <- splits[-1]
}
return(obj)
}
#' Copy the contents of a git_blob object to disk
#' @return The path to the copied file, which is named for the SHA of the blob
copy_git_obj <- function(obj, filename = sha(obj), path = ".") {
if(inherits(obj, "git_tree")) return(copy_git_tree(obj, dirname = filename, path))
stopifnot(inherits(obj, "git_blob"))
# stopifnot(!is_binary(obj))
# Find the path of the git object in the .git/objects directory given its SHA
out_path <- fs::path(path, filename)
contents <- content(obj, split = FALSE, raw = TRUE)
if(is.raw(contents)) {
writeBin(contents, out_path)
} else {
cat(content(obj, split = FALSE), file = out_path)
}
out_path
}
#' Copy the contents of a git_tree object to a directory on disk recursively
#' @return The path to the directory, which is named for the SHA of the tree
copy_git_tree <- function(tree, dirname = sha(tree), path = ".") {
out_path <- fs::path(path, dirname)
fs::dir_create(out_path, recurse = TRUE)
# Recurse down tree and copy all blobs
for (i in seq_along(tree$name)) {
obj <- tree[i]
if (inherits(obj, "git_blob")) {
# Can't read binary files yet, see https://github.com/ropensci/git2r/issues/460
#if(is_binary(obj)) message("Skipping binary file")
copy_git_obj(obj, filename = tree$name[i], path = out_path)
} else if (inherits(obj, "git_tree")) {
copy_git_tree(obj, dirname = tree$name[i], path = out_path)
} else {
warning("Unknown git object type")
}
}
return(out_path)
}
#' Get a system-level temporary directory
tar_tempdir <- function() {
p <- fs::path(fs::path_dir(tempdir()), "tar_versions")
if(!fs::dir_exists(p)) fs::dir_create(p)
p
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment