Skip to content

Instantly share code, notes, and snippets.

@bbolker
Created June 29, 2017 16:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bbolker/9b4b4dac83cd8bb16b855d237390e258 to your computer and use it in GitHub Desktop.
Save bbolker/9b4b4dac83cd8bb16b855d237390e258 to your computer and use it in GitHub Desktop.
a "factorial apply" function
#' apply a function to a factorial combination of elements of lists
#' returns (if \code{FLATTEN=TRUE}) a flat list (with length equal to the product of the
#' lengths of the input lists) of results, along with a \code{grid} attribute containing
#' a data frame giving the values used for each element in the list
#' @param FUN function to apply
#' @param ... list of vectors to apply to
#' @param FLATTEN
#' @param MoreArgs additional arguments to pass to \code{FUN}
#' @param GlobalVars list of names of variables in global environment needed for parallel runs
#' @param .progress progress bar?
#' @param checkpoint ?stub
#' @param verbose
#' @param sanitize.call clean up call element of computed objects?
#' @examples
#' L1 <- list(data.frame(x=1:10,y=1:10),
#' data.frame(x=runif(10),y=runif(10)),
#' data.frame(x=rnorm(10),y=rnorm(10)))
#' L2 <- list(y~1,y~x,y~poly(x,2))
#' z <- xapply(lm,L2,L1)
#' z <- xapply(lm,L2,L1,verbose=TRUE)
#' z <- xapply(lm,L2,L1,parallel=TRUE)
#' sapply(z,coef)
#' if (require(tcltk)) {
#' z <- xapply(lm,L2,L1,.progress="tk")
#' }
## FIXME/TODO:
## - wrap in try()? warn on inherits("error")?
## - Summarize() method?
## - progress printing doesn't work right in parallel eval
xapply <- function(FUN,...,
FLATTEN=TRUE,
MoreArgs=NULL,
GlobalVars=NULL,
Packages=NULL,
parallel=FALSE,
ncores=2,
.progress="none",
verbose=FALSE,
checkpoint=FALSE,
sanitize.call=FALSE) {
if (parallel) {
require(parallel)
## print/cat to stdout on Linux ... doesn't work on Windows?
## https://stackoverflow.com/questions/16717461/how-can-i-print-or-cat-when-using-parallel
cl <- makeCluster(getOption("cl.cores", ncores),outfile="")
on.exit(stopCluster(cl))
}
L <- list(...)
if (isTRUE(checkpoint) || is.character(checkpoint))
stop("checkpointing is a stub")
namefun <- function(x,maxchar=20) {
if (!is.null(names(x))) return(names(x))
if (is.atomic(x) && !any(duplicated(n <- as.character(x)))) return(n)
nn <- sapply(x,deparse)
if (all(nchar(nn)<maxchar)) return(nn)
return(paste0("V",seq_along(x)))
}
namelist <- lapply(L,namefun)
grid <- do.call(expand.grid,c(namelist,list(stringsAsFactors=FALSE)))
grid <- data.frame(ind=1:nrow(grid),grid,stringsAsFactors=FALSE)
inds <- do.call(expand.grid,lapply(L,seq_along)) ## Marek's suggestion
maxind <- nrow(inds)
pbStyle <- 1
do.progress <- (.progress!="none")
if (do.progress || verbose) { ## wrap/decorate primary function
if (do.progress) {
## minor hack to allow plyr-like progress bars
if (.progress=="text") {
.progress <- "txt"
pbStyle <- 3
}
pbfun <- get(paste(.progress,"ProgressBar",sep=""))
setpbfun <- get(paste("set",.simpleCap(.progress),"ProgressBar",sep=""))
pbArgs <- list()
if (.progress=="txt") pbArgs$style <- pbStyle
pb <- do.call(pbfun,pbArgs)
on.exit(close(pb))
}
FUN0 <- FUN
## don't have an explicit loop, so need to increment counter in environment
i <- 1
FUN <- function(...) {
if (do.progress) setpbfun(pb,value=i/maxind)
if (verbose) cat(paste(grid[i,],collapse=","),"\n")
ret <- FUN0(...)
i <<- i+1
return(ret)
}
}
## FIXME: apply names(L) to argsList ?
if (!FLATTEN) stop("only FLATTEN=TRUE is implemented so far")
## construct list of arg lists (+ additional arguments)
argsList <- lapply(1:nrow(inds), function(i) c(Map(function(x,j) x[[j]],L,as.list(inds[i,])),
MoreArgs))
if (parallel) {
clusterExport(cl,varlist=c("FUN0","FUN","fit_verbose"),env=environment(FUN))
if (!is.null(Packages)) {
for (pkg in Packages) {
clusterCall(cl,library,pkg,character.only=TRUE)
}
}
if (!is.null(GlobalVars)) clusterExport(cl,varlist=GlobalVars)
retlist <- parLapply(cl=cl, X=argsList, fun=function(x) do.call(FUN,x))
} else {
retlist <- lapply(argsList, do.call, what=FUN)
}
attr(retlist,"grid") <- grid
return(retlist)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment