Last active
February 22, 2020 06:38
Star
You must be signed in to star a gist
Functions to help identifying scarcely used R package dependencies representing a large installation burden
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## this file contains 4 functions using several R and Bioconductor | |
## packages to help identifying scarcely used R package dependencies | |
## representing a large installation burden. it has been created | |
## upon the following discussion thread on the bioc-devel mailing list: | |
## https://stat.ethz.ch/pipermail/bioc-devel/2020-February/016146.html | |
## | |
## to use these functions you need to build a database of package | |
## information using the following lines: | |
## | |
## repos <- BiocManager::repositories()[c("BioCsoft", "CRAN")] | |
## db <- utils::available.packages(repos=repos) | |
## | |
## a typical usage is: | |
## | |
## 1. identify the burden of dependencies of a package, | |
## e.g., "GenomicScores" | |
## | |
## pkgDepMetrics("GenomicScores", db) | |
## | |
## 2. let's say we want to investigate what function calls | |
## are responsible for the dependency on "BSgenome" | |
## | |
## funCalls2Dep("GenomicScores", "BSgenome", db) | |
## | |
## 3. we want now to see what lines in our code contain | |
## those function calls (assuming we're in the source | |
## path of the package "GenomicScores" | |
## | |
## funCalls2Dep("GenomicScores", "BSgenome", db, ".", "R") | |
## build a directed graph of all package dependencies | |
pkgDepGraph <- function(pkg, db) { | |
require(tools) | |
require(graph) | |
deps <- tools::package_dependencies(pkg, db, recursive=TRUE)[[1]] | |
deps <- tools::package_dependencies(c(pkg, deps), db) | |
deps <- lapply(deps, function(x, nonbasepkgs) x[x %in% nonbasepkgs], rownames(db)) | |
deps <- deps[names(deps) %in% rownames(db)] | |
g <- graph::graphNEL(nodes=names(deps), edgeL=deps, edgemode="directed") | |
g | |
} | |
## calculate some metric for first-degre package dependencies | |
pkgDepMetrics<- function(pkg, db) { | |
require(graph) | |
require(itdepends) | |
g <- pkgDepGraph(pkg, db) | |
dep1pkgs <- graph::adj(g, pkg)[[1]] | |
du <- suppressMessages(itdepends::dep_usage_pkg(pkg)) | |
du <- sapply(split(du$fun, du$pkg), unique) | |
du <- lengths(du) | |
ifun <- efun <- integer(length(dep1pkgs)) | |
names(ifun) <- names(efun) <- dep1pkgs | |
ifun[dep1pkgs] <- du[dep1pkgs] | |
depov <- numeric(length(dep1pkgs)) | |
names(depov) <- dep1pkgs | |
gvtx <- graph::nodes(g) | |
for (p in dep1pkgs) { | |
gdep <- pkgDepGraph(p, db) | |
gdepvtx <- graph::nodes(gdep) | |
depov[p] <- length(intersect(gvtx, gdepvtx)) / length(union(gvtx, gdepvtx)) | |
expfun <- getNamespaceExports(p) | |
nonstd <- expfun[grep("^\\.", expfun)] | |
efun[p] <- length(setdiff(expfun, nonstd)) | |
} | |
res <- data.frame(ImportedBy=ifun, | |
Exported=efun, | |
Usage=100*ifun/efun, | |
DepOverlap=depov, | |
row.names=dep1pkgs, | |
stringsAsFactors=FALSE) | |
res[order(res$Usage), ] | |
} | |
## finds first-degree package dependencies that | |
## lead to a target dependency | |
pkgFirstDep2Target <- function(sourcePkg, targetPkg, db) { | |
require(graph) | |
g <- pkgDepGraph(sourcePkg, db) | |
grev <- as(as(t(as(as(g, "graphAM"), "matrix")), "graphAM"), "graphNEL") | |
revdeps <- c(targetPkg, names(graph::acc(grev, targetPkg)[[1]])) | |
dep1pkgs <- intersect(graph::adj(g, sourcePkg)[[1]], revdeps) | |
dep1pkgs | |
} | |
## find what function calls in a package lead to a target dependency | |
funCalls2Dep <- function(sourcePkg, targetPkg, db, pkgSource=NULL, subDirs=NULL) { | |
require(dplyr) | |
if (!is.null(pkgSource)) | |
if (!dir.exists(pkgSource)) | |
stop(sprintf("Directory %s does not exist.")) | |
du <- suppressMessages(itdepends::dep_usage_pkg(sourcePkg) %>% | |
group_by(pkg) %>% | |
count(fun) %>% | |
top_n(1) %>% | |
arrange(desc(n))) | |
dep1pkgs <- pkgFirstDep2Target(sourcePkg, targetPkg, db) | |
res <- du[du$pkg %in% dep1pkgs, ] | |
if (!is.null(pkgSource)) { | |
dl <- c() | |
for (pkg in unique(res$pkg)) | |
dl <- c(dl, suppressMessages(itdepends::dep_locate(pkg, pkgSource))) | |
funcalls <- paste(res$pkg, res$fun, sep="::") | |
mt <- match(sapply(dl, "[[", "message"), funcalls) | |
res <- dl[!is.na(mt)] | |
## 'dep_locate()' searches among files in a "R project" | |
## even if this is a directory upstream of the one given | |
## as 'pkgSource'. to restrict the search to the desired | |
## sub-directory we use the argument 'subDirs' | |
if (!is.null(subDirs)) { | |
wh <- grep(paste(sprintf("^%s", subDirs), collapse="|"), | |
sapply(res, "[[", "filename")) | |
res <- res[wh] | |
} | |
} | |
res | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment