Skip to content

Instantly share code, notes, and snippets.

@lcolladotor
Last active May 31, 2020 15:14
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save lcolladotor/7462753 to your computer and use it in GitHub Desktop.
Save lcolladotor/7462753 to your computer and use it in GitHub Desktop.
Testing differing approaches for reducing the memory load when using mclapply in R.
#!/bin/bash
#$ -cwd
#$ -l jabba,mem_free=10G,h_vmem=2G,h_fsize=1G
#$ -N approach1b
#$ -pe local 20
#$ -m e
echo "**** Job starts ****"
date
## For organization
mkdir -p logs1b
mkdir -p trash
## Run approach test
Rscript testApproach.R -a "approach1b" -m 20 -d FALSE
## Organize log files
mv *.p* trash/
mv approach1b.* logs1b/
### Done
echo "**** Job ends ****"
date
#!/bin/bash
#$ -cwd
#$ -l jabba,mem_free=10G,h_vmem=2G,h_fsize=1G
#$ -N approach2
#$ -pe local 20
#$ -m e
echo "**** Job starts ****"
date
## For organization
mkdir -p logs2
mkdir -p trash
## Run approach test
Rscript testApproach.R -a "approach2" -m 20 -d FALSE
## Organize log files
mv *.p* trash/
mv approach2.* logs2/
### Done
echo "**** Job ends ****"
date
#!/bin/bash
#$ -cwd
#$ -l jabba,mem_free=10G,h_vmem=2G,h_fsize=1G
#$ -N approach3
#$ -pe local 20
#$ -m e
echo "**** Job starts ****"
date
## For organization
mkdir -p logs3
mkdir -p trash
## Run approach test
Rscript testApproach.R -a "approach3" -m 20 -d FALSE
## Organize log files
mv *.p* trash/
mv approach3.* logs3/
### Done
echo "**** Job ends ****"
date
#!/bin/bash
#$ -cwd
#$ -l jabba,mem_free=10G,h_vmem=2G,h_fsize=1G
#$ -N approach4
#$ -pe local 20
#$ -m e
echo "**** Job starts ****"
date
## For organization
mkdir -p logs4
mkdir -p trash
## Run approach test
Rscript testApproach.R -a "approach4" -m 20 -d TRUE
## Organize log files
mv *.p* trash/
mv approach4.* logs4/
### Done
echo "**** Job ends ****"
date
## Load libraries
library("getopt")
library("parallel")
## Specify parameters
spec <- matrix(c(
'approach', 'a', 1, "character", "Either 'approach1b', 'approach2', 'approach3' or 'approach4'",
'mcores', 'm', 1, "integer", "Number of cores",
'dataAbsent', 'd', 2, "logical", "Is the data absent?",
'help' , 'h', 0, "logical", "Display help"
), byrow=TRUE, ncol=5)
opt <- getopt(spec)
## if help was asked for print a friendly message
## and exit with a non-zero error code
if (!is.null(opt$help)) {
cat(getopt(spec, usage=TRUE))
q(status=1)
}
if(opt$dataAbsent) {
set.seed(20131113)
data <- data.frame(matrix(rnorm(1e7), ncol=10))
if(opt$approach != "approach4") {
dataSplit1b <- split(data, rep(seq_len(opt$mcores), each= nrow(data) / opt$mcores))
rm(data)
save(dataSplit1b, file="dataSplit1b.Rdata")
}
}
if(opt$approach == "approach1b") {
if(!opt$dataAbsent) load("dataSplit1b.Rdata")
#tracemem(dataSplit1b)
## Approach 1b
res1b <- mclapply(dataSplit1b, rowMeans, mc.cores=opt$mcores)
} else if (opt$approach == "approach2") {
if(!opt$dataAbsent) load("dataSplit1b.Rdata")
## Save the split data in an environment
my.env <- new.env()
for(i in seq_len(opt$mcores)) {
eval(parse(text=paste0("my.env$data", i, " <- dataSplit1b[[", i, "]]")))
}
rm(dataSplit1b)
## Function that takes indexes, then extracts the data from the environment
applyMyFun <- function(idx, env) {
eval(parse(text=paste0("result <- env$", ls(env)[idx])))
rowMeans(result)
}
## Approach 2
index <- seq_len(opt$mcores)
names(index) <- index
res2 <- mclapply(index, applyMyFun, env=my.env, mc.cores=opt$mcores)
} else if(opt$approach == "approach3") {
if(opt$dataAbsent) {
## Save the chunks
for(i in names(dataSplit1b)) {
chunk <- dataSplit1b[[i]]
output <- paste0("chunk", i, ".Rdata")
save(chunk, file=output)
}
rm(dataSplit1b)
}
## Function that loads the chunk
applyMyFun2 <- function(idx) {
load(paste0("chunk", idx, ".Rdata"))
rowMeans(chunk)
}
## Approach 3
index <- seq_len(opt$mcores)
names(index) <- index
res3 <- mclapply(index, applyMyFun2, mc.cores=opt$mcores)
} else if(opt$approach == "approach4") {
#tracemem(data)
## Approach 4: credit to Ryan from https://stat.ethz.ch/pipermail/bioc-devel/2013-November/004930.html
res4 <- mclapply(splitIndices(nrow(data), opt$mcores), function(i) rowMeans(data[i,]), mc.cores=opt$mcores)
} else {
stop("Invalid 'approach' option")
}
## Reproducibility
proc.time()
sessionInfo()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment