Skip to content

Instantly share code, notes, and snippets.

@ledell
Last active June 3, 2017 04:56
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ledell/c0501c3a7449291d3d11bcec5d0c89d7 to your computer and use it in GitHub Desktop.
Save ledell/c0501c3a7449291d3d11bcec5d0c89d7 to your computer and use it in GitHub Desktop.
Demo of how to use the SigOpt API with H2O in R
# Set API Key
Sys.setenv(SIGOPT_API_TOKEN="HERE")
# Start a local H2O cluster for training models
library(h2o)
h2o.init(nthreads = -1)
# Load a dataset
data(iris)
y <- "Species"
x <- 1:4
train <- as.h2o(iris)
# Create a SigOpt experiment for the Random Forest parameters
library(SigOptR)
experiment <- create_experiment(list(
name="Random Forest (h2o)",
parameters=list(
list(name="mtries", type="int", bounds=list(min=1, max=ncol(iris)-1)),
list(name="ntrees", type="int", bounds=list(min=1, max=100)),
list(name="sample_rate", type="double", bounds=list(min=0.25, max=1.0)),
list(name="min_rows", type="int", bounds=list(min=1, max=10))
)
))
print(paste("Created experiment: https://sigopt.com/experiment", experiment$id, sep="/"))
evaluate_model <- function(assignments, training_frame, x, y) {
# Train and cross-validate a RF
rf_fit <- h2o.randomForest(x = x,
y = y,
training_frame = train,
nfolds = 5,
ntrees = assignments$ntrees,
mtries = assignments$mtries,
sample_rate = assignments$sample_rate,
min_rows = assignments$min_rows)
# Return cv accuracy
return(1-h2o.mean_per_class_error(rf_fit, xval = TRUE))
}
for (i in 1:80) {
# Receive a Suggestion from SigOpt
suggestion <- create_suggestion(experiment$id)
# Evaluate the model locally
res <- evaluate_model(suggestion$assignments, train, x, y)
# Report an Observation (with standard deviation) back to SigOpt
create_observation(experiment$id, list(suggestion=suggestion$id,
value=res))
}
# Re-fetch the experiment to get the best observed value and assignments
experiment <- fetch_experiment(experiment$id)
best_assignments <- experiment$progress$best_observation$assignments
# To wrap up the Experiment, fit the Random Forest on the best assigments
# and train on all available data
rf <- h2o.randomForest(x = x,
y = y,
training_frame = train,
nfolds = 5,
ntrees = best_assignments$ntrees,
mtries = best_assignments$mtries,
sample_rate = best_assignments$sample_rate,
min_rows = best_assignments$min_rows)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment