Skip to content

Instantly share code, notes, and snippets.

@achetverikov
Created June 23, 2021 12:57
Show Gist options
  • Save achetverikov/f04ab8ea7209b9e20f39f86a51ef9e7d to your computer and use it in GitHub Desktop.
Save achetverikov/f04ab8ea7209b9e20f39f86a51ef9e7d to your computer and use it in GitHub Desktop.
examples with future and batchtools for PBS Torque
#!/bin/bash
#PBS -N <%= job.name %>
#PBS -o <%= log.file %>
#PBS -l walltime=<%= resources$walltime %>,mem=<%= resources$memory %>M
#PBS -q batch
#PBS -j oe
## setup modules
## create our own temp dir (and clean it up later)
#mkdir /data/${USER}/${USER}-${PBS_JOBID}
#export TMPDIR=/data/${USER}/${USER}-${PBS_JOBID}
<%= sprintf("export OMP_NUM_THREADS=%i", resources$omp.threads) -%>
<%= sprintf("export OPENBLAS_NUM_THREADS=%i", resources$blas.threads) -%>
<%= sprintf("export MKL_NUM_THREADS=%i", resources$blas.threads) -%>
## export value of DEBUGME environemnt var to slave
#export DEBUGME=<%= Sys.getenv("DEBUGME") %>
## run R
Rscript -e 'batchtools::doJobCollection("<%= uri %>")'
## Cleanup
#rm -rf /data/${USER}/${USER}-${PBS_JOBID}
library(magic)
library(future)
library(batchtools)
library(future.batchtools)
# Example 1: submit jobs to cluster via future
plan(batchtools_torque, resources = list(walltime = '00:05:00', memory = '2Gb')) # specify that the PBS Torque cluster is used and the resources we want
get_magig_diagsum <- function(i){
magic_matrix <- magic(as.integer(i))
sum(diag(magic_matrix))
}
single_result %<-% get_magig_diagsum(5) # submit a single job
while (!resolved(single_result)) Sys.sleep(5) # wait until the job is finished
single_result
# Example 2: submit many jobs in parallel via batchtools
reg = makeRegistry(file.dir = '.future_registry', seed = 1) # creates a folder for a registry with all neccessary bookkeeping info
reg$packages <- c( 'magic') # indicates required packages
reg$cluster.functions = makeClusterFunctionsTORQUE(template = "batchtools.torque.tmpl") # indicates that
res <- btlapply(5:9, fun = get_magig_diagsum, reg = reg,resources = list(walltime = '00:05:00', memory = '2Gb')) # syntax is similar to the standard R lapply function
res
clearRegistry() # clean up
removeRegistry()
# Example 3: parallelize brms execution
library(brms)
p <- read.csv("https://stats.idre.ucla.edu/stat/data/poisson_sim.csv")
p <- within(p, {
prog <- factor(prog, levels=1:3, labels=c("General", "Academic",
"Vocational"))
id <- factor(id)
})
plan(list(
tweak(batchtools_torque, resources = list(walltime = '00:20:00', memory = '6Gb', packages = c('brms'))),
tweak(batchtools_torque, resources = list(walltime = '00:05:00', memory = '6Gb', packages = c('brms')))
), .cleanup = T) # two-level paralellization setup to compile on the cluster and then run the chains in parallel
brm_job <- future({
brm(num_awards~math+prog, data = p, family = 'poisson', future = T)},
packages = c('brms'), globals = c('p')) # the job is submitted to compile on the cluster and the individual chains are in turn started in parallel
while (!resolved(brm_job)) Sys.sleep(5) # wait until the job is finished
brm_val <- value(brm_job) # load the results
brm_val
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment