Skip to content

Instantly share code, notes, and snippets.

@daroczig
Created October 7, 2019 06:12
Show Gist options
  • Save daroczig/ef858d11b159f390b35fbbf8300b378d to your computer and use it in GitHub Desktop.
Save daroczig/ef858d11b159f390b35fbbf8300b378d to your computer and use it in GitHub Desktop.
Create or update a local MRAN snapshot for a list of packages
#!/usr/bin/env r
library(miniCRAN)
library(data.table)
library(pander)
library(logger)
library(magrittr)
library(docopt)
## #############################################################################
## config
"Make local MRAN mirror for a given date in the current working directory's {date} folder
Usage: create-local-MRAN-snapshot.R [options] <packages>...
Options:
--date=d YYYY-MM-DD pointer to the MRAN snapshot date [default: 2019-01-01]
--force re-download everything even if the {date} folder already exists
" %>% docopt -> opts
## #############################################################################
## prep folder
SNAPSHOT_DATE <- opts$date
SNAPSHOT_URL <- file.path('https://mran.microsoft.com/snapshot', SNAPSHOT_DATE)
log_info('MRAN mirror: ', SNAPSHOT_URL)
if (opts$force) {
log_info(
'Killing local snapshot {SNAPSHOT_DATE} ',
'with {length(list.files(SNAPSHOT_DATE, recursive = TRUE))} files')
unlink(SNAPSHOT_DATE, recursive = TRUE)
}
## #############################################################################
## check on prior runs
dir.create(SNAPSHOT_DATE, showWarnings = FALSE)
files <- list.files(SNAPSHOT_DATE, recursive = TRUE, full.names = TRUE)
files <- cbind(path = files, file = basename(files), rbindlist(lapply(files, file.info)))
already_downloaded_packages <- sub('^([^_]*).*$', '\\1', files$file)
already_downloaded_packages <- already_downloaded_packages[!grepl('^PACKAGES', already_downloaded_packages)]
log_info('{length(already_downloaded_packages)} already downloaded R packages')
pandoc.list(already_downloaded_packages)
## #############################################################################
## list new packages
## get the list of explicitly required packages
log_info('{length(opts$packages)} R packages listed explicitly:')
pandoc.list(opts$packages)
## add all required R packages as per dependency graph
packages <- pkgDep(opts$packages, repos = SNAPSHOT_URL, type = "source", suggests = FALSE)
log_info('{length(packages)} R packages identified after looking up dependencies:')
pander(
data.table(available.packages(repos = SNAPSHOT_URL))[Package %in% packages, .(Package, Version)],
style = 'simple', justify = 'right')
packages <- setdiff(packages, already_downloaded_packages)
log_info('{length(packages)} R packages to be downloaded and added to the local repo:')
pandoc.list(packages)
if (length(packages) == 0) {
log_info('Nothing to download now, exiting')
quit(save = 'no', status = 0L)
}
## #############################################################################
## download new packages
if (length(already_downloaded_packages) > 0) {
## repo already exists, we just need to add new stuff
addPackage(packages, path = SNAPSHOT_DATE, repos = SNAPSHOT_URL, deps = FALSE, type = 'source')
} else {
## it's a new repo
makeRepo(packages, path = SNAPSHOT_DATE, repos = SNAPSHOT_URL, type = 'source')
}
files <- list.files(SNAPSHOT_DATE, recursive = TRUE, full.names = TRUE)
files <- cbind(path = files, file = basename(files), rbindlist(lapply(files, file.info)))
log_info('Overall {nrow(files)} files downloaded so far:')
pander(files[, .(file, size)], style = 'simple', justify = 'right')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment