This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(miniCRAN) | |
library(gender) | |
library(stringr) | |
# Get package description data | |
# This took about an hour to run, so you can load the data directly below | |
# pkgs <- available.packages("http://cran.rstudio.com/src/contrib") | |
# desc <- getCranDescription(pkgs, repos = c(CRAN="http://cran.rstudio.com")) | |
desc <- read.csv("http://www.stat.berkeley.edu/~ledell/data/RStudioCRAN_pkgDesc_20141216.csv") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(devtools) | |
install_github("h2oai/h2o-3/h2o-r/ensemble/h2oEnsemble-package") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Update for SuperLearner::CVFolds function that enables stratification by outcome and cluster ID | |
CVFolds2 <- function (N, id, Y, cvControl) { | |
if (!is.null(cvControl$validRows)) { | |
return(cvControl$validRows) | |
} | |
stratifyCV <- cvControl$stratifyCV | |
shuffle <- cvControl$shuffle | |
V <- cvControl$V | |
if (!stratifyCV) { ### Not Stratified |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Example of how to train an H2O model with folds that are | |
# stratified both by outcome and a cluster id | |
library(cvAUC) | |
data("adherence") #load a dataset with an ID column | |
df <- adherence | |
# Load a utility function for creating stratified folds | |
source("https://gist.githubusercontent.com/ledell/bd4e227d4e5ff426c41d/raw/708eb429fa1954a140d65a6a42ce93847affd67c/CVFolds2.R") #utility function |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(h2o) | |
h2o.init(nthreads = -1) # This means nthreads = num available cores | |
train_file <- "https://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/mnist/train.csv.gz" | |
test_file <- "https://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/mnist/test.csv.gz" | |
train <- h2o.importFile(train_file) | |
test <- h2o.importFile(test_file) | |
# To see a brief summary of the data, run the following command |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The following two commands remove any previously installed H2O packages for R. | |
if ("package:h2o" %in% search()) { detach("package:h2o", unload=TRUE) } | |
if ("h2o" %in% rownames(installed.packages())) { remove.packages("h2o") } | |
# Next, we download packages that H2O depends on. | |
if (! ("methods" %in% rownames(installed.packages()))) { install.packages("methods") } | |
if (! ("statmod" %in% rownames(installed.packages()))) { install.packages("statmod") } | |
if (! ("stats" %in% rownames(installed.packages()))) { install.packages("stats") } | |
if (! ("graphics" %in% rownames(installed.packages()))) { install.packages("graphics") } | |
if (! ("RCurl" %in% rownames(installed.packages()))) { install.packages("RCurl") } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(h2o) | |
localH2O <- h2o.init(nthreads = -1) #Start up H2O cluster using nthreads = ncores | |
# Get training data: | |
data <- h2o.importFile("http://www.stat.berkeley.edu/~ledell/data/wisc-diag-breast-cancer-shuffled.csv", | |
destination_frame = "breast_cancer") | |
y <- "diagnosis" #Response column | |
x <- setdiff(names(data), c(y, "id")) #remove 'id' and response col |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Extract cross-validated predicted values (in order of original rows) | |
h2o.cvpreds <- function(object, single_col = TRUE) { | |
# TO DO: Check that object is an H2OModel | |
# TO DO: Check that keep_cross_validation_predictions = TRUE in the model | |
# TO DO: Need to add support for returning a multiclass prediction and binary (full frame: predict, p0, p1) | |
# TO DO: Remove family variable and just check class(object) directly | |
# Need to extract family from model object | |
if (class(object) == "H2OBinomialModel") family <- "binomial" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# NOTE: This is now part of h2oEnsemble R package and should be used from there instead | |
# Given a list of H2O models, ensemble the base learners usig a metalearner (Stacking / Super Learning) | |
# Currently requires: | |
#source("cvpreds.R") | |
h2o.stack <- function(models, #list of H2OModels | |
metalearner = "h2o.glm.wrapper", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import h2o | |
from h2o.estimators.glm import H2OGeneralizedLinearEstimator | |
def make_Z(models): | |
''' | |
Takes a list of models and creates level-one data | |
''' |
OlderNewer