Skip to content

Instantly share code, notes, and snippets.

@gaborcsardi
Last active July 18, 2020 19:21
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gaborcsardi/0c3db5601f8f1c3e7f4d to your computer and use it in GitHub Desktop.
Save gaborcsardi/0c3db5601f8f1c3e7f4d to your computer and use it in GitHub Desktop.
Size of the CRAN R package repository over time
library(jsonlite)
## Download
pkgs <- fromJSON("http://crandb.r-pkg.org/-/events")
## Filter
na_pkgs <- unique(pkgs$name[ is.na(pkgs$date) ])
events <- pkgs[ ! pkgs$name %in% na_pkgs, c("date", "name", "event")]
allpkgs <- unique(events$name)
present <- structure(rep(FALSE, length(allpkgs)), names = allpkgs)
## Find new packages
for (i in 1:nrow(events)) {
cat(".")
pk <- events$name[i]
ev <- events$event[i]
if (ev == "released") {
if (!present[pk]) {
present[pk] <- TRUE
events$event[i] <- "newpackage"
}
} else {
present[pk] <- FALSE
}
}
add_del_events <- events[ events$event %in% c("newpackage", "archived"), ]
pm <- c("newpackage" = 1, "archived" = -1)[add_del_events$event]
## Sanity check, must be current number of active packages
sum(pm)
cran_size <- cbind(add_del_events, total = cumsum(pm))
write.csv(cran_size, file = "cran_size.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment