Skip to content

Instantly share code, notes, and snippets.

@rcastelo
Last active February 22, 2020 06:38
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save rcastelo/7429d05178ddb57a38bd42093c2ddfe2 to your computer and use it in GitHub Desktop.
Functions to help identifying scarcely used R package dependencies representing a large installation burden
## this file contains 4 functions using several R and Bioconductor
## packages to help identifying scarcely used R package dependencies
## representing a large installation burden. it has been created
## upon the following discussion thread on the bioc-devel mailing list:
## https://stat.ethz.ch/pipermail/bioc-devel/2020-February/016146.html
##
## to use these functions you need to build a database of package
## information using the following lines:
##
## repos <- BiocManager::repositories()[c("BioCsoft", "CRAN")]
## db <- utils::available.packages(repos=repos)
##
## a typical usage is:
##
## 1. identify the burden of dependencies of a package,
## e.g., "GenomicScores"
##
## pkgDepMetrics("GenomicScores", db)
##
## 2. let's say we want to investigate what function calls
## are responsible for the dependency on "BSgenome"
##
## funCalls2Dep("GenomicScores", "BSgenome", db)
##
## 3. we want now to see what lines in our code contain
## those function calls (assuming we're in the source
## path of the package "GenomicScores"
##
## funCalls2Dep("GenomicScores", "BSgenome", db, ".", "R")
## build a directed graph of all package dependencies
pkgDepGraph <- function(pkg, db) {
require(tools)
require(graph)
deps <- tools::package_dependencies(pkg, db, recursive=TRUE)[[1]]
deps <- tools::package_dependencies(c(pkg, deps), db)
deps <- lapply(deps, function(x, nonbasepkgs) x[x %in% nonbasepkgs], rownames(db))
deps <- deps[names(deps) %in% rownames(db)]
g <- graph::graphNEL(nodes=names(deps), edgeL=deps, edgemode="directed")
g
}
## calculate some metric for first-degre package dependencies
pkgDepMetrics<- function(pkg, db) {
require(graph)
require(itdepends)
g <- pkgDepGraph(pkg, db)
dep1pkgs <- graph::adj(g, pkg)[[1]]
du <- suppressMessages(itdepends::dep_usage_pkg(pkg))
du <- sapply(split(du$fun, du$pkg), unique)
du <- lengths(du)
ifun <- efun <- integer(length(dep1pkgs))
names(ifun) <- names(efun) <- dep1pkgs
ifun[dep1pkgs] <- du[dep1pkgs]
depov <- numeric(length(dep1pkgs))
names(depov) <- dep1pkgs
gvtx <- graph::nodes(g)
for (p in dep1pkgs) {
gdep <- pkgDepGraph(p, db)
gdepvtx <- graph::nodes(gdep)
depov[p] <- length(intersect(gvtx, gdepvtx)) / length(union(gvtx, gdepvtx))
expfun <- getNamespaceExports(p)
nonstd <- expfun[grep("^\\.", expfun)]
efun[p] <- length(setdiff(expfun, nonstd))
}
res <- data.frame(ImportedBy=ifun,
Exported=efun,
Usage=100*ifun/efun,
DepOverlap=depov,
row.names=dep1pkgs,
stringsAsFactors=FALSE)
res[order(res$Usage), ]
}
## finds first-degree package dependencies that
## lead to a target dependency
pkgFirstDep2Target <- function(sourcePkg, targetPkg, db) {
require(graph)
g <- pkgDepGraph(sourcePkg, db)
grev <- as(as(t(as(as(g, "graphAM"), "matrix")), "graphAM"), "graphNEL")
revdeps <- c(targetPkg, names(graph::acc(grev, targetPkg)[[1]]))
dep1pkgs <- intersect(graph::adj(g, sourcePkg)[[1]], revdeps)
dep1pkgs
}
## find what function calls in a package lead to a target dependency
funCalls2Dep <- function(sourcePkg, targetPkg, db, pkgSource=NULL, subDirs=NULL) {
require(dplyr)
if (!is.null(pkgSource))
if (!dir.exists(pkgSource))
stop(sprintf("Directory %s does not exist."))
du <- suppressMessages(itdepends::dep_usage_pkg(sourcePkg) %>%
group_by(pkg) %>%
count(fun) %>%
top_n(1) %>%
arrange(desc(n)))
dep1pkgs <- pkgFirstDep2Target(sourcePkg, targetPkg, db)
res <- du[du$pkg %in% dep1pkgs, ]
if (!is.null(pkgSource)) {
dl <- c()
for (pkg in unique(res$pkg))
dl <- c(dl, suppressMessages(itdepends::dep_locate(pkg, pkgSource)))
funcalls <- paste(res$pkg, res$fun, sep="::")
mt <- match(sapply(dl, "[[", "message"), funcalls)
res <- dl[!is.na(mt)]
## 'dep_locate()' searches among files in a "R project"
## even if this is a directory upstream of the one given
## as 'pkgSource'. to restrict the search to the desired
## sub-directory we use the argument 'subDirs'
if (!is.null(subDirs)) {
wh <- grep(paste(sprintf("^%s", subDirs), collapse="|"),
sapply(res, "[[", "filename"))
res <- res[wh]
}
}
res
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment