Last active
July 14, 2020 16:07
-
-
Save rcastelo/276a2d3fd26d5511aff286ec8cf1e046 to your computer and use it in GitHub Desktop.
Bioconductor downloads and dependencies
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(igraph) | |
library(BiocPkgTools) | |
d <- biocDownloadStats() | |
pkgs <- biocPkgList() | |
whmaintainerpkgs <- grep("maintainer", pkgs$Maintainer) | |
coreteam <- c("interdonato", "michafla", "jmacdon", | |
"morgan", "andrzej", "pages", | |
"ramos", "shepherd", "grimbough", | |
"turaga", "VanTwisk", "jwang96") | |
whcoreteampkgs <- grep(paste(coreteam, collapse="|"), pkgs$Maintainer) | |
corepkgs1 <- pkgs$Package[unique(c(whmaintainerpkgs, whcoreteampkgs))] | |
biocviews <- sapply(pkgs$biocViews, paste, collapse="_") | |
whcorepkgs2 <- grep("DataRepresentation", biocviews) | |
corepkgs <- unique(c(corepkgs1, corepkgs2)) | |
dep_df <- buildPkgDependencyDataFrame(repo="BioCsoft", | |
dependencies=c("Depends", "Imports")) | |
g <- buildPkgDependencyIgraph(dep_df) | |
## add vertices for Bioconductor packages without dependencies | |
g <- g + vertices(pkgs$Package[!pkgs$Package %in% names(V(g))]) | |
## exclude packages outside Bioconductor | |
excludedpkgs <- names(V(g))[!names(V(g)) %in% pkgs$Package] | |
g <- induced_subgraph(g, setdiff(names(V(g)), excludedpkgs)) | |
## define non-core Bioconductor packages | |
noncorepkgs <- setdiff(pkgs$Package, corepkgs) | |
## calculate for non-core Bioconductor packages their | |
## average monthly download in the last 12 months, the total | |
## number of dependences and the number dependences to | |
## "core infrastructure packages" | |
res <- data.frame(Downloads=integer(length(noncorepkgs)), | |
Ndeps=integer(length(noncorepkgs)), | |
Ncoredeps=integer(length(noncorepkgs)), | |
row.names=noncorepkgs, check.names=FALSE) | |
for (p in noncorepkgs) { | |
## fetch dependences | |
deps <- setdiff(names(subcomponent(g, p, mode="out")), p) | |
## fetch number of dependences to "core infrastructure packages" | |
ncoredeps <- sum(deps %in% corepkgs) | |
## fetch median number of downloads through the last 12 months | |
d.pkg <- d[d$Package %in% p & d$Month != "all", ] | |
lastfullmonth <- Sys.Date() - as.POSIXlt(Sys.Date())$mday | |
mask <- d.pkg$repo == "Software" & | |
d.pkg$Date >= (lastfullmonth-365) & d.pkg$Date <= lastfullmonth | |
d.pkg <- d.pkg$Nb_of_distinct_IPs[mask] | |
d.pkg <- d.pkg[d.pkg > 0] | |
if (length(d.pkg) == 12) | |
res[p, ] <- c(median(d.pkg), length(deps), ncoredeps) | |
else ## if a package has no download data for the last 12 months set NA | |
res[p, ] <- c(NA, length(deps), ncoredeps) | |
} | |
## discard new packages with no download data for the last 12 months | |
res <- res[!is.na(res$Downloads), ] | |
saveRDS(res, file="downloadsbydeps.rds") | |
ncd <- cut(res$Ncoredeps, breaks=c(0, 1, max(res$Ncoredeps)), | |
right=FALSE, include.lowest=TRUE) | |
plot(log10(res$Downloads) ~ ncd, xlab="Number of core dependences", | |
ylab="log10 Monthly downloads", las=1) | |
points(1:2+0.1, tapply(log10(res$Downloads), ncd, mean), pch=23, bg="black") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment