Example of ML preprocessing in h2o (RECS dataset)
# Initiate remote h2o cluster (receives and processes dataset) | |
# No modeling is done locally - an address key is saved to reference the remote version | |
h2o.init(nthreads = -1) | |
# Prepare h2o inputs for modeling | |
recs.reduced2.h2o <- as.h2o(recs.reduced2) # Coerce DF to an h2o object | |
set.seed(0) # For reproducibility of train/test split | |
# Split h2o data into training, validation, and test frames | |
data.split <- h2o.splitFrame(recs.reduced2.h2o,ratios = c(.7,.2)) | |
train <- data.split[[1]] # For training | |
valid <- data.split[[2]] # For validating trained models and comparing different hyperparameter vectors | |
test <- data.split[[3]] # For final evaluation of model performance | |
y = "KWH" | |
x <- setdiff(x = colnames(train), y = "KWH") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment