Skip to content

Instantly share code, notes, and snippets.

@spark2010
Created June 2, 2018 10:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save spark2010/d067588eae3caad20b22c38f3eebb8a2 to your computer and use it in GitHub Desktop.
Save spark2010/d067588eae3caad20b22c38f3eebb8a2 to your computer and use it in GitHub Desktop.
Building DRF models with ntrees=50, 500, 5000
# Code reference: http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html
library(h2o)
h2o.init()
# Import a sample binary outcome train set into H2O
train <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv")
# Identify predictors and response
y <- "response"
x <- setdiff(names(train), y)
# For binary classification, response should be a factor
train[,y] <- as.factor(train[,y])
# Build DRF models
log_loc <- paste(getwd(), "/logs/DRF.log", sep="")
con <- file(log_loc, "w")
for (ntrees in c(50, 500, 5000)) {
start_time <- Sys.time()
cat(paste("Starting building a DRF model (ntrees: ", ntrees, ") at ", start_time, "\n", sep=""), file=con)
rf <- h2o.randomForest(x = x,
y = y,
training_frame = train,
ntrees = ntrees,
max_depth = 20,
seed = 1)
end_time <- Sys.time()
elapsed_time = format(round(end_time - start_time, 2))
cat(paste("Finished building the DRF model (ntrees: ", ntrees, ") at ", end_time, " (elapsed time: ",elapsed_time, ")", "\n", sep=""), file=con)
model_path <- h2o.saveModel(object=rf, path=paste(getwd(), "/DRF_models/", sep=""), force=TRUE)
new_model_path = paste(getwd(), "/DRF_models/DRF_ntrees-", ntrees, sep="")
file.rename(model_path, new_model_path)
cat(paste("Saved the model at ", new_model_path, sep=""), "\n\n", file=con)
}
close(con)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment