Skip to content

Instantly share code, notes, and snippets.

@stephenturner
Created February 15, 2011 22:53
Show Gist options
  • Save stephenturner/828445 to your computer and use it in GitHub Desktop.
Save stephenturner/828445 to your computer and use it in GitHub Desktop.
randomforestdemo.r
rm(list=ls(all=TRUE))
library(randomForest)
###############Classification################
data(iris)
head(iris)
iris.rf <- randomForest(Species~., data=iris, importance=T, proximity=T)
iris.rf.subset <- randomForest(Species~., data=iris[c(1:3,5)], importance=T, proximity=T)
iris.rf.subset2 <- randomForest(Species~. -Petal.Length -Petal.Width, data=iris, importance=T, proximity=T)
print(iris.rf)
## Look at variable importance:
round(importance(iris.rf), 2)
## Do MDS on 1 - proximity:
iris.mds <- cmdscale(1 - iris.rf$proximity, eig=TRUE)
op <- par(pty="s")
head(cbind(iris[,1:4], iris.mds$points))
pairs(cbind(iris[,1:4], iris.mds$points), cex=0.6, gap=0,
col=c("red", "green", "blue")[as.numeric(iris$Species)],
main="Iris Data: Predictors and MDS of Proximity Based on RandomForest")
par(op)
print(iris.mds$GOF)
MDSplot(iris.rf, iris$Species)
############### Regression ################
data(airquality)
set.seed(131)
ozone.rf <- randomForest(Ozone ~ ., data=airquality, mtry=3, importance=TRUE, na.action=na.omit, nperm=1)
print(ozone.rf)
## Show "importance" of variables: higher value mean more important:
round(importance(ozone.rf), 2)
varImpPlot(ozone.rf, pch=16)
############################################
## "x" can be a matrix instead of a data frame:
set.seed(17)
x <- matrix(runif(5e2), 100)
y <- gl(2, 50)
(myrf <- randomForest(x, y))
(predict(myrf, x))
## "complicated" formula:
(swiss.rf <- randomForest(sqrt(Fertility) ~ . - Catholic + I(Catholic < 50), data=swiss))
(predict(swiss.rf, swiss))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment