Last active February 7, 2024 18:03
minimal example of using `foreach` parallelism on a clusters managed by SLURM

Job script:


#SBATCH --job-name=rfee
#SBATCH --workdir=/home/
#SBATCH --output=r_foreach_example_console_output.txt
#SBATCH --mem-per-cpu=100 # specify RAM per CPU here in Mb
#SBATCH --time=0:02:00
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=4 # specify number of CPUs to use here

module load r-3.5.1-gcc-4.8.5-esnjoca

R --file=./r_foreach_example.R

R Script (r_foreach_example.R):

# specify the path from which to load R packages


# load the packages:

library('doMC') # note: loading 'doMC' also loads 'foreach'

# a simple function to execute first in serial then again in parallel

f1 <- function(x){

# import the number of available CPUs
# this is the number we supplied at the line
#SBATCH --cpus-per-task=4
# in our .sh file

n.cpus <- Sys.getenv("SLURM_CPUS_PER_TASK")



# we need this to be numeric below so:

n.cpus <- as.numeric(n.cpus)



# register a parallel backend specifying the number of CPUs as the number we imported via Sys.getenv()

registerDoMC(cores = n.cpus) 

# run a serial foreach loop

  s1 <- foreach(i = 1:4, .combine = c) %do%

# run a parallel foreach loop

  s2 <- foreach(i = 1:4, .combine = c) %dopar%

# the parallel foreach loop should be faster
