Skip to content

Instantly share code, notes, and snippets.

@goldingn
Last active May 27, 2016 08:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save goldingn/cd2fadb60ec11b9a846c4030f5e290b2 to your computer and use it in GitHub Desktop.
Save goldingn/cd2fadb60ec11b9a846c4030f5e290b2 to your computer and use it in GitHub Desktop.
functions to create a version/sha lookup table for modules
# functions to compile a package/version-sha lookup table
library(git2r)
ModuleVersion <- function (modulePath, version_string = '^Version: ') {
# given a path to a module, return the module version, or NA if it doesn't
# have one
# copy the roxygen header to a temporary file, in case the R code is unparseable
file.create(f <- tempfile())
lines <- readLines(modulePath, warn = FALSE)
lines <- lines[grepl('^#', lines)]
lines <- c(lines, 'NULL')
writeLines(lines, f)
# get doc elements & look for 'section' components
doc_elements <- roxygen2:::parse_file(f, environment())[[1]]
section_idx <- which(names(doc_elements) == 'section')
if (length(section_idx) == 0) {
version = NA
} else {
# look for a version in there
sections <- unlist(doc_elements[section_idx])
version_idx <- grep(version_string, sections)
if (length(version_idx) == 0) {
version = NA
} else {
version <- gsub(version_string, '', sections[version_idx])
}
}
return (version)
}
ModuleLookup <- function (repo_path = '.') {
# set up temporary copy of the repository
gitdir <- paste0(tempdir(), '/', basename(tempfile()))
dir.create(gitdir, showWarnings = FALSE)
file.copy(repo_path, gitdir, recursive = TRUE)
# link to the repo, get dataframe of commits and pull out the shas
git <- repository(paste0(gitdir, '/.git'))
commits <- as(git, 'data.frame')
shas <- commits$sha
# loop through these commits listing the visible modules and their versions
n_commit <- length(shas)
available <- as.list(rep(NA, n_commit))
names(available) <- shas
for (i in 1:n_commit) {
# checkout at the commit
commit <- lookup(git, shas[i])
checkout(object = commit)
# list the available modules (full paths & names)
modules <- list.files(file.path(gitdir, 'R'), full.names = TRUE)
module_names <- list.files(file.path(gitdir, 'R'))
module_names <- gsub('.R$', '', module_names)
# get their versions and add to the list
versions <- sapply(modules, ModuleVersion)
names(versions) <- module_names
available[[i]] <- versions
}
# reformat this list to link the sha to each version, for each module
# find all available modules and create an empty list for their lookups
all_modules <- sort(unique(unlist(lapply(available, names))))
n_modules <- length(all_modules)
lookup <- as.list(rep(NA, n_modules))
names(lookup) <- all_modules
# loop through finding all available versions and their shas
for (i in seq_len(n_modules)) {
module <- all_modules[i]
versions <- list()
# loop through all the commits, finding the module (if it's there)
for (j in 1:n_commit) {
module_idx <- match(module, names(available[[j]]))
if (is.finite(module_idx) && length(module_idx) > 0) {
# get the version and assign a more useful name if it isn't valid
new_version <- available[[j]][module_idx]
if (is.na(new_version) | new_version == 0) {
new_version <- 'unversioned'
}
# add an entry with this name in versions (overwriting later ones)
versions[new_version] <- shas[j]
}
}
# add to lookup as a character vector
lookup[[i]] <- unlist(versions)
}
# convert lookup to a dataframe, with columns: module, version, sha
module_shas <- unlist(lookup)
module_versions <- unlist(lapply(lookup, names))
module_names <- rep(names(lookup), sapply(lookup, length))
lookup_df <- data.frame(module = module_names,
version = module_versions,
sha = module_shas)
# add the date
lookup_df$when <- commits$when[match(lookup_df$sha, commits$sha)]
# reorder & remove row names
lookup_df <- lookup_df[order(lookup_df$when), ]
lookup_df <- lookup_df[order(lookup_df$module), ]
row.names(lookup_df) <- NULL
# checkout the most recent commit to find the current available (not
# deprecated) modules
commit <- lookup(git, shas[1])
checkout(object = commit)
current_module_files <- list.files(file.path(gitdir, 'R'), full.names = TRUE)
current_module_versions <- sapply(current_module_files, ModuleVersion)
current_module_names <- list.files(file.path(gitdir, 'R'))
current_module_names <- gsub('.R$', '', current_module_names)
# add a column saying whether a module is deprecated or not
lookup_df$deprecated <- TRUE
for (i in seq_along(current_module_names)) {
version <- current_module_versions[i]
if (is.na(version) | version == 0) {
version <- 'unversioned'
}
idx <- which(lookup_df$module == current_module_names[i] &
lookup_df$version == version)
lookup_df$deprecated[idx] <- FALSE
}
# delete temporary git folder & return result
unlink(gitdir, recursive = TRUE)
return (lookup_df)
}
head(lookup <- ModuleLookup())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment