Skip to content

Instantly share code, notes, and snippets.

@ivopbernardo
Created November 1, 2022 19:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ivopbernardo/a2f444e19aab930c482946441a56f62d to your computer and use it in GitHub Desktop.
Save ivopbernardo/a2f444e19aab930c482946441a56f62d to your computer and use it in GitHub Desktop.
h2o R Example
# Load h2o
library(h2o)
library(ggplot2)
# Load Dataset - London Bike
london_bike <- read.csv('./london_merged.csv')
# Transforming Weather code and Season to factor
london_bike$weather_code <- as.factor(london_bike$weather_code)
london_bike$season <- as.factor(london_bike$season)
h2o.init()
# Convert london_bike to h2o frame
london_bike.h2o <- as.h2o(london_bike)
# Split london_bike into train_test split
london_bike_split <- h2o.splitFrame(data = london_bike.h2o, ratios = 0.8, seed = 1234)
training_data <- london_bike_split[[1]]
test_data <- london_bike_split[[2]]
# Training Linear regression
predictors <- c("t1", "t2", "hum", "wind_speed", "weather_code", "is_holiday",
"is_weekend", "season")
response <- "cnt"
# Model 1
london_bike_model <- h2o.glm(x = predictors,
y = response,
training_frame = training_data)
test_predict <- h2o.predict(object = london_bike_model,
newdata = test_data)
predictions_x_real <- cbind(
as.data.frame(test_data$cnt),
as.data.frame(test_predict)
)
ggplot(
data = as.data.frame(predictions_x_real),
aes(x=cnt, y=predict)
) + geom_point(color='darkgreen') + xlab('Actual Label') + ylab('Predictions')
# Training Linear Regression using Regularization
london_bike_model_regularized <- h2o.glm(x = predictors,
y = response,
training_frame = training_data,
alpha = 1)
test_predict_regularized <- h2o.predict(object = london_bike_model_regularized,
newdata = test_data)
predictions_x_real_regularized <- cbind(
as.data.frame(test_data$cnt),
as.data.frame(test_predict_regularized)
)
ggplot(
data = as.data.frame(predictions_x_real_regularized),
aes(x=cnt, y=predict)
) + geom_point(color='darkgreen') + xlab('Actual Label') + ylab('Predictions')
# Evaluating models - Using validation_frame
london_bike_model <- h2o.glm(x = predictors,
y = response,
training_frame = training_data,
validation_frame = test_data)
#
h2o.rmse(london_bike_model, train=TRUE, valid=TRUE)
# Random Forest Example
london_bike_rf <- h2o.randomForest(x = predictors,
y = response,
ntrees = 25,
max_depth = 5,
training_frame = training_data,
validation_frame = test_data)
# Retrieving metrics for randomforest
h2o.rmse(london_bike_rf, train=TRUE, valid=TRUE)
h2o.r2(london_bike_rf, train=TRUE, valid=TRUE)
# Training Neural Network
nn_model <- h2o.deeplearning(x = predictors,
y = response,
hidden = c(6,6,4,7),
epochs = 1000,
train_samples_per_iteration = -1,
reproducible = TRUE,
activation = "Rectifier",
seed = 23123,
training_frame = training_data,
validation_frame = test_data)
# Neural Network Evaluation
h2o.rmse(nn_model, train=TRUE, valid=TRUE)
h2o.r2(nn_model, train=TRUE, valid=TRUE)
# Grid Search
rf_params <- list(ntrees = c(2, 5, 10, 15),
max_depth = c(3, 5, 9),
min_rows = c(5, 10, 100))
# Train and validate a grid of randomForests
rf_grid <- h2o.grid("randomForest",
x = predictors,
y = response,
grid_id = "rf_grid",
training_frame = training_data,
validation_frame = test_data,
seed = 1,
hyper_params = rf_params)
h2o.getGrid(grid_id = "rf_grid",
sort_by = "r2",
decreasing = TRUE)
# Auto ML Routine
aml <- h2o.automl(x = predictors,
y = response,
training_frame = training_data,
validation_frame = test_data,
max_models = 15,
seed = 1)
# Explainability
london_bike_rf <- h2o.randomForest(x = predictors,
y = response,
ntrees = 25,
max_depth = 5,
training_frame = training_data,
validation_frame = test_data)
# Variable importance plot
h2o.varimp_plot(london_bike_rf)
# Shap Summary
h2o.shap_summary_plot(london_bike_rf, test_data)
# Shap Explain Row
h2o.shap_explain_row_plot(london_bike_rf, test_data, row_index = 4)
# Shap Explain Summer Row
h2o.shap_explain_row_plot(london_bike_rf, test_data, row_index = 830)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment