Last active
August 31, 2015 19:01
-
-
Save ledell/e1abae4410330e44b36d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Example of how to train an H2O model with folds that are | |
# stratified both by outcome and a cluster id | |
library(cvAUC) | |
data("adherence") #load a dataset with an ID column | |
df <- adherence | |
# Load a utility function for creating stratified folds | |
source("https://gist.githubusercontent.com/ledell/bd4e227d4e5ff426c41d/raw/708eb429fa1954a140d65a6a42ce93847affd67c/CVFolds2.R") #utility function | |
cvControl <- list(V = 10, stratifyCV = TRUE, shuffle = TRUE) | |
# Create stratified folds for 10-fold CV: `folds` is a list of length 10, of fold idxs | |
folds <- CVFolds2(N = nrow(df), id = df$id, Y = df$Y, cvControl = cvControl) | |
convert_foldlist_to_vec <- function(folds) { | |
V <- length(folds) | |
N <- length(unlist(folds)) | |
fold_column <- rep(NA, N) | |
for (i in 1:V) { | |
fold_column[folds[[i]]] <- i | |
} | |
return(fold_column) | |
} | |
fold_column <- convert_foldlist_to_vec(folds) | |
df$fold_id <- fold_column | |
df$Y <- as.factor(df$Y) #convert to factor for binary classfication | |
# Now use this fold designation with H2O | |
library(h2o) | |
h2o.init(nthreads = -1) | |
train <- as.h2o(df) #if data is too big, write folds to disk and upload file using h2o.importFile | |
fit <- h2o.deeplearning(x = 2:8, y = 7, training_frame = train, fold_column = "fold_id") | |
# Get CV metrics | |
fit@model$cross_validation_metrics | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment