This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The following two commands remove any previously installed H2O packages for R. | |
if ("package:h2o" %in% search()) { detach("package:h2o", unload=TRUE) } | |
if ("h2o" %in% rownames(installed.packages())) { remove.packages("h2o") } | |
# Next, we download packages that H2O depends on. | |
if (! ("methods" %in% rownames(installed.packages()))) { install.packages("methods") } | |
if (! ("statmod" %in% rownames(installed.packages()))) { install.packages("statmod") } | |
if (! ("stats" %in% rownames(installed.packages()))) { install.packages("stats") } | |
if (! ("graphics" %in% rownames(installed.packages()))) { install.packages("graphics") } | |
if (! ("RCurl" %in% rownames(installed.packages()))) { install.packages("RCurl") } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# NOTE: This is now part of h2oEnsemble R package and should be used from there instead | |
# Given a list of H2O models, ensemble the base learners usig a metalearner (Stacking / Super Learning) | |
# Currently requires: | |
#source("cvpreds.R") | |
h2o.stack <- function(models, #list of H2OModels | |
metalearner = "h2o.glm.wrapper", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Extract cross-validated predicted values (in order of original rows) | |
h2o.cvpreds <- function(object, single_col = TRUE) { | |
# TO DO: Check that object is an H2OModel | |
# TO DO: Check that keep_cross_validation_predictions = TRUE in the model | |
# TO DO: Need to add support for returning a multiclass prediction and binary (full frame: predict, p0, p1) | |
# TO DO: Remove family variable and just check class(object) directly | |
# Need to extract family from model object | |
if (class(object) == "H2OBinomialModel") family <- "binomial" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Train 4 models and ensemble them together with new h2o.stack function. | |
# Requirements: Models must be same type of model H2OBinomial, etc | |
# Must have same outcome | |
# Must have used `fold_assignment = "Modulo"` and same number for `nfolds`, | |
# or identical `fold_column` must be used to guarantee same folds between base models | |
# Requires: cvpreds.R and stack.R | |
source("https://gist.githubusercontent.com/ledell/f3a87bd136ce06e0a5ff/raw/2a82535892ff66694a1a401de46b8b5a92820849/cvpreds.R") | |
source("https://gist.githubusercontent.com/ledell/f389ac1e9c6e7000b299/raw/6bc1d2c9cfe1a51ffcdcf79cf184e80a40d4828f/stack.R") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import h2o | |
from h2o.estimators.glm import H2OGeneralizedLinearEstimator | |
def make_Z(models): | |
''' | |
Takes a list of models and creates level-one data | |
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This example is outdated because we have the H2O Stacked Ensemble function now (so it's better to use that): | |
# http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html | |
import h2o | |
from h2o.estimators.gbm import H2OGradientBoostingEstimator | |
from h2o.estimators.deeplearning import H2ODeepLearningEstimator | |
from h2o.estimators.glm import H2OGeneralizedLinearEstimator | |
from h2o.estimators.random_forest import H2ORandomForestEstimator | |
from sklearn import metrics #will be replaced with ensemble_performance later |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import h2o | |
from h2o.estimators.random_forest import H2ORandomForestEstimator | |
from h2o.estimators.gbm import H2OGradientBoostingEstimator | |
from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator | |
from h2o.grid.grid_search import H2OGridSearch | |
from __future__ import print_function | |
h2o.init(nthreads=-1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(h2o) | |
h2o.init(nthreads = -1) | |
# Import a sample binary outcome train/test set into H2O | |
train <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv") | |
test <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv") | |
# Identify predictors and response | |
y <- "response" | |
x <- setdiff(names(train), y) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Set API Key | |
Sys.setenv(SIGOPT_API_TOKEN="HERE") | |
# Start a local H2O cluster for training models | |
library(h2o) | |
h2o.init(nthreads = -1) | |
# Load a dataset | |
data(iris) | |
y <- "Species" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(h2o) | |
h2o.init() | |
# Load the HIGGS dataset | |
train <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv") | |
test <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv") | |
y <- "response" | |
x <- setdiff(names(train), y) | |
family <- "binomial" |