Skip to content

Instantly share code, notes, and snippets.

@tobigithub
Created September 24, 2015 00:42
Show Gist options
  • Save tobigithub/c0129fc153f282cbcf69 to your computer and use it in GitHub Desktop.
Save tobigithub/c0129fc153f282cbcf69 to your computer and use it in GitHub Desktop.
# Run R code on 32 bit and 64 bit and get different results
# You need to run the code on 32 Bit R version and then the 64 bit R (not just the code below)
# http://stackoverflow.com/questions/17881609/parallel-randomforest-with-different-results-using-dosnow
#
library(foreach)
library(doSNOW)
library(parallel)
set.seed(666)
ncores <- 4
cl <- makeCluster(ncores)
registerDoSNOW(cl)
foreach(i=1:ncores) %dopar% {
set.seed(666)
rnorm(1)
}
stopCluster(cl)
#---- now seed indiviual on each cluster
library(foreach)
library(doSNOW)
library(parallel)
set.seed(666)
ncores <- 4
cl <- makeCluster(ncores)
registerDoSNOW(cl)
foreach(i=1:ncores) %dopar% {
set.seed(i)
rnorm(1)
}
stopCluster(cl)
#-----
# 64 bit R 32 bit R
# [[1]] [[1]]
# [1] 0.753311 [1] 0.8191309
#
# [[2]] [[2]]
# [1] 0.753311 [1] 0.8191309
#
# [[3]] [[3]]
# [1] 0.753311 [1] 0.8191309
#
# [[4]] [[4]]
# [1] 0.753311 [1] 0.8191309
#----------------------------------
#----------------------------------
#----- RF static seed ------
library(foreach)
library(doSNOW)
library(parallel)
library(randomForest)
set.seed(123)
ncores <- 2
cl <- makeCluster(ncores)
registerDoSNOW(cl)
nr <- 1000
x <- matrix(runif(100000), nr)
y <- gl(4, nr/4)
trainX <- x[1:800,]
trainY <- y[1:800]
testX <- x[801:nrow(x),]
testY <- y[801:length(y)]
rf <- foreach(i=1:ncores, ntree=rep(100, ncores), .packages='randomForest', .combine=combine) %dopar% {
# seed for each node is same
set.seed(123)
randomForest(trainX, trainY, ntree=ntree)
}
stopCluster(cl)
pred <- predict(rf, new=testX)
pred
table(pred)
#----- RF individual seed ----------
library(foreach)
library(doSNOW)
library(parallel)
library(randomForest)
set.seed(123)
ncores <- 2
cl <- makeCluster(ncores)
registerDoSNOW(cl)
nr <- 1000
x <- matrix(runif(100000), nr)
y <- gl(4, nr/4)
trainX <- x[1:800,]
trainY <- y[1:800]
testX <- x[801:nrow(x),]
testY <- y[801:length(y)]
rf <- foreach(i=1:ncores, ntree=rep(100, ncores), .packages='randomForest', .combine=combine) %dopar% {
# seed for each node is different
set.seed(i)
randomForest(trainX, trainY, ntree=ntree)
}
stopCluster(cl)
pred <- predict(rf, new=testX)
pred
table(pred)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment