Skip to content

Instantly share code, notes, and snippets.

@tobigithub
Last active September 24, 2015 01:54
Show Gist options
  • Save tobigithub/fcafabbed822ee0513f4 to your computer and use it in GitHub Desktop.
Save tobigithub/fcafabbed822ee0513f4 to your computer and use it in GitHub Desktop.
# Random forest are random...indeed
# http://stats.stackexchange.com/questions/35609/why-do-i-need-bag-composition-to-calculate-oob-error-of-combined-random-forest-m
# https://github.com/mlist/IB2014/blob/master/helper_methods.R
# Random Forest combine: http://www.inside-r.org/packages/cran/randomForest/docs/combine
#
# This has implications on parallel random forests using snow, doSNOW, doParallel etc.
#
# err.rate : NULL
# err.rate : NULL
# OOB : NULL
# Tobias Kind (2015)
getConfusionMatrix <- function(rf) {
tbl = table(predict(rf), rf$y)
class.error = vector()
for (i in 1:nrow(tbl)) {
rowSum = sum(tbl[i,])
accurate = diag(tbl)[i]
error = rowSum - accurate
class.error[i] = error / rowSum
}
return(cbind(tbl, class.error))
}
set.seed(123)
library(randomForest)
rf1 <- randomForest(Species ~ ., iris, ntree=50, norm.votes=FALSE)
rf2 <- randomForest(Species ~ ., iris, ntree=50, norm.votes=FALSE)
rf3 <- randomForest(Species ~ ., iris, ntree=50, norm.votes=FALSE)
rf4 <- randomForest(Species ~ ., iris, ntree=50, norm.votes=FALSE)
rf1;rf2;rf3;rf4
rf.all <- combine(rf1, rf2, rf3, rf4)
rf.all
#---
rf1$confusion = getConfusionMatrix(rf1)
rf1$confusion
rf2$confusion = getConfusionMatrix(rf2)
rf2$confusion
rf3$confusion = getConfusionMatrix(rf3)
rf3$confusion
rf4$confusion = getConfusionMatrix(rf4)
rf4$confusion
# same as
conf <- rf4$confusion; conf
# does not work (NULL)
conf <- rf.all$confusion; conf
# works
rf.all$confusion = getConfusionMatrix(rf.all)
rf.all$confusion
predict(rf.all, type='prob')
par(mfrow=c(4,2))
plot(rf1); plot(rf2); plot(rf3);plot(rf4);
varImpPlot(rf1); varImpPlot(rf2); varImpPlot(rf3); varImpPlot(rf4);
# str(rf1) # OOB and err.rate exist // List of 19
# str(rf.all) # OOB and err.rate do not exist // List of 18
# rf.all error rate can not be plotted, unless error rates are combined
# plot(rf.all)
# END
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment