See https://github.com/AustralianAntarcticDivision/bowerbird
This script uses bowerbird to configure a new repository and populate it with all the R-devel tarball archives (you'll have to create ~/bower directory).
The configuration here unpacks each tarball at the end of synchronization checks.
The bb_sync(cf)
step can then be run on some routine schedule to keep the local repo up to date, allowing you
to forget about whether you have the latest emails and just trying whatever idea it is you had.
id
is relatively new and not in the readme examplessource_url
examples have ".../*" at the end but this needed to not have that- I needed
pp_gunzip
here
library(bowerbird)
cf <- bb_config(local_file_root="~/bower")
r_devel_source <- bb_source(
name="The R-devel archives",
id = "R-devel-mailing-archive",
description="R-devel--R development and technical/programmer topics",
reference= "https://stat.ethz.ch/mailman/listinfo/r-devel",
citation="https://cran.r-project.org/",
source_url="https://stat.ethz.ch/pipermail/r-devel/",
license="GPL-3",
method=quote(bb_wget),
method_flags="--recursive --level=1 --accept=\"*.gz\"",
postprocess=quote(pp_gunzip))
cf <- cf %>%
add(r_devel_source)
bb_sync(cf)
pathological::list_files("~/bower", recursive = TRUE) %>%
dplyr::filter(grepl("r-devel.*\\.txt$", filename)) %>%
dplyr::mutate(size = file.info(filename)$size) %>%
dplyr::summarize(Mb = sum(size)/1e6, n = n())
# A tibble: 1 x 2
# Mb n
# <dbl> <int>
#1 187.1054 244