The purpose of this post is to clarify the use of random seed for
parallel calculation with foreach
, doParallel
, and doRND
on
Windows 10 when evaluating variables that are not difined inside the
right side of %dopar%
.
Substitute %dorng%
for %dopar%
. If you understand this, you don't
need to see blow.
# On windows, use Sys.setenv(LANG = "en") instead of Sys.setlocale()
# to show messages in English.
Sys.setenv(LANG = "en")
sessionInfo()
## R version 3.4.1 (2017-06-30)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 14393)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=Japanese_Japan.932 LC_CTYPE=Japanese_Japan.932
## [3] LC_MONETARY=Japanese_Japan.932 LC_NUMERIC=C
## [5] LC_TIME=Japanese_Japan.932
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] compiler_3.4.1 backports_1.1.0 magrittr_1.5 rprojroot_1.2
## [5] tools_3.4.1 htmltools_0.3.6 yaml_2.1.14 Rcpp_0.12.11
## [9] stringi_1.1.5 rmarkdown_1.6 knitr_1.16 stringr_1.2.0
## [13] digest_0.6.12 evaluate_0.10.1
#install.packages(c("foreach", "doParallel", "doRNG")
suppressPackageStartupMessages(library(foreach))
suppressPackageStartupMessages(library(doParallel))
suppressPackageStartupMessages(library(doRNG))
# helper function
is_same <- function(f, ...){
identical(do.call(f, list(...)), do.call(f, list(...)))
}
Surely it retuns FALSE
because there is no setting of randome seed.
f0 <- function(k, n) {
foreach(i = seq_len(n)) %do% {
runif(k)
}
}
print(f0(2, 3))
## [[1]]
## [1] 0.3632454 0.1755758
##
## [[2]]
## [1] 0.4010448 0.6290903
##
## [[3]]
## [1] 0.6007226 0.8193204
is_same(f0, 2, 3)
## [1] FALSE
You can imagene the result before running this code.
f0a <- function(k, n, seed) {
set.seed(seed)
foreach(i = seq_len(n)) %do% {
runif(k)
}
}
is_same(f0a, 2, 3, seed = 1)
## [1] TRUE
f1 <- function(k, n, seed) {
set.seed(seed)
foreach(i = seq_len(n)) %dopar% {
runif(k)
}
}
is_same(f1, 2, 3, seed = 1)
## Warning: executing %dopar% sequentially: no parallel backend registered
## [1] TRUE
f2 <- function(k, n, seed) {
# set cluster
cores <- max(1, detectCores() - 1)
cl <- makeCluster(cores)
registerDoParallel(cl)
on.exit(stopCluster(cl))
set.seed(seed)
foreach(i = seq_len(n)) %dopar% {
runif(k)
}
}
is_same(f2, 2, 3, seed = 1)
## [1] FALSE
The result of f2() shows that set.seed() for parallel execution does not work.
f3 <- function(k, n, seed) {
# set cluster
cores <- max(1, detectCores() - 1)
cl <- makeCluster(cores)
registerDoParallel(cl)
on.exit(stopCluster(cl))
registerDoRNG(seed)
foreach(i = seq_len(n)) %dopar% {
runif(k)
}
}
is_same(f3, 2, 3, seed = 1)
## Error in {: task 1 failed - "object 'k' not found"
Error occurs because cannot captuer k
f4 <- function(k, n, seed) {
cores <- max(1, detectCores() - 1)
cl <- makeCluster(cores)
registerDoParallel(cl)
on.exit(stopCluster(cl))
registerDoRNG(seed)
foreach(i = seq_len(n), .export = "k") %dopar% {
runif(k)
}
}
is_same(f4, 2, 3, seed = 1)
## Error in e$fun(obj, substitute(ex), parent.frame(), e$data): unable to find variable "k"
foreach(.export=)
does not work
?registerDoRNG
f5 <- function(k, n, seed) {
cores <- max(1, detectCores() - 1)
cl <- makeCluster(cores)
registerDoParallel(cl)
on.exit(stopCluster(cl))
registerDoRNG(seed)
foreach(i = seq_len(n)) %dorng% {
runif(k)
}
}
is_same(f5, 2, 3, seed = 1)
## [1] TRUE
Yes! Works!
f6 <- function(k, n, seed) {
cores <- max(1, detectCores() - 1)
cl <- makeCluster(cores)
registerDoParallel(cl)
on.exit(stopCluster(cl))
set.seed(seed)
foreach(i = seq_len(n)) %dorng% {
runif(k)
}
}
is_same(f6, 2, 3, seed = 1)
## [1] TRUE
f7 <- function(k, n, seed) {
cores <- max(1, detectCores() - 1)
cl <- makeCluster(cores)
registerDoParallel(cl)
on.exit(stopCluster(cl))
foreach(i = seq_len(n), .options.RNG = seed) %dorng% {
runif(k)
}
}
is_same(f7, 2, 3, seed = 1)
## [1] TRUE
Substitute %dorng%
for %dopar%
.