This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Train 4 models and ensemble them together with new h2o.stack function. | |
# Requirements: Models must be same type of model H2OBinomial, etc | |
# Must have same outcome | |
# Must have used `fold_assignment = "Modulo"` and same number for `nfolds`, | |
# or identical `fold_column` must be used to guarantee same folds between base models | |
# Requires: cvpreds.R and stack.R | |
source("https://gist.githubusercontent.com/ledell/f3a87bd136ce06e0a5ff/raw/2a82535892ff66694a1a401de46b8b5a92820849/cvpreds.R") | |
source("https://gist.githubusercontent.com/ledell/f389ac1e9c6e7000b299/raw/6bc1d2c9cfe1a51ffcdcf79cf184e80a40d4828f/stack.R") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Extract cross-validated predicted values (in order of original rows) | |
h2o.cvpreds <- function(object, single_col = TRUE) { | |
# TO DO: Check that object is an H2OModel | |
# TO DO: Check that keep_cross_validation_predictions = TRUE in the model | |
# TO DO: Need to add support for returning a multiclass prediction and binary (full frame: predict, p0, p1) | |
# TO DO: Remove family variable and just check class(object) directly | |
# Need to extract family from model object | |
if (class(object) == "H2OBinomialModel") family <- "binomial" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# NOTE: This is now part of h2oEnsemble R package and should be used from there instead | |
# Given a list of H2O models, ensemble the base learners usig a metalearner (Stacking / Super Learning) | |
# Currently requires: | |
#source("cvpreds.R") | |
h2o.stack <- function(models, #list of H2OModels | |
metalearner = "h2o.glm.wrapper", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The following two commands remove any previously installed H2O packages for R. | |
if ("package:h2o" %in% search()) { detach("package:h2o", unload=TRUE) } | |
if ("h2o" %in% rownames(installed.packages())) { remove.packages("h2o") } | |
# Next, we download packages that H2O depends on. | |
if (! ("methods" %in% rownames(installed.packages()))) { install.packages("methods") } | |
if (! ("statmod" %in% rownames(installed.packages()))) { install.packages("statmod") } | |
if (! ("stats" %in% rownames(installed.packages()))) { install.packages("stats") } | |
if (! ("graphics" %in% rownames(installed.packages()))) { install.packages("graphics") } | |
if (! ("RCurl" %in% rownames(installed.packages()))) { install.packages("RCurl") } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install.packages("h2o", type="source", repos=(c("http://h2o-release.s3.amazonaws.com/h2o/rel-slater/5/R"))) | |
library(h2o) | |
localH2O <- h2o.init(ip = "XX.XX.XX.XX", port = 54321) | |
#higgs <- h2o.importFile("/home/0xdiag/datasets/higgs/HIGGS.csv", destination_frame = "higgs") #Local copy | |
higgs <- h2o.importFile("http://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz", destination_frame = "higgs") | |
dim(higgs) #11M x 29 | |
higgs$C1 <- as.factor(higgs$C1) #Encode response as categorical |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Lending Club example using cleaned up dataset & h2o.ensemble ### | |
library(h2o) | |
h2o.init(nthreads = -1, max_mem_size = "8G") | |
loan_csv <- "https://raw.githubusercontent.com/h2oai/app-consumer-loan/master/data/loan.csv" | |
data <- h2o.importFile(loan_csv) # 163994 x 15 | |
data$bad_loan <- as.factor(data$bad_loan) | |
rand <- h2o.runif(data, seed = 1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(h2o) | |
h2o.init(nthreads = -1) # This means nthreads = num available cores | |
train_file <- "https://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/mnist/train.csv.gz" | |
test_file <- "https://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/mnist/test.csv.gz" | |
train <- h2o.importFile(train_file) | |
test <- h2o.importFile(test_file) | |
# To see a brief summary of the data, run the following command |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Example of how to train an H2O model with folds that are | |
# stratified both by outcome and a cluster id | |
library(cvAUC) | |
data("adherence") #load a dataset with an ID column | |
df <- adherence | |
# Load a utility function for creating stratified folds | |
source("https://gist.githubusercontent.com/ledell/bd4e227d4e5ff426c41d/raw/708eb429fa1954a140d65a6a42ce93847affd67c/CVFolds2.R") #utility function |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Update for SuperLearner::CVFolds function that enables stratification by outcome and cluster ID | |
CVFolds2 <- function (N, id, Y, cvControl) { | |
if (!is.null(cvControl$validRows)) { | |
return(cvControl$validRows) | |
} | |
stratifyCV <- cvControl$stratifyCV | |
shuffle <- cvControl$shuffle | |
V <- cvControl$V | |
if (!stratifyCV) { ### Not Stratified |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(devtools) | |
install_github("h2oai/h2o-3/h2o-r/ensemble/h2oEnsemble-package") |