Skip to content

Instantly share code, notes, and snippets.

@mndrake
Created August 11, 2015 19:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mndrake/f861af34032f8f9cb9eb to your computer and use it in GitHub Desktop.
Save mndrake/f861af34032f8f9cb9eb to your computer and use it in GitHub Desktop.
h2o domino
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Install h2o
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# The following two commands remove any previously installed H2O packages for R.
if ("package:h2o" %in% search()) { detach("package:h2o", unload=TRUE) }
if ("h2o" %in% rownames(installed.packages())) { remove.packages("h2o") }
# Next, we download packages that H2O depends on.
if (! ("methods" %in% rownames(installed.packages()))) { install.packages("methods") }
if (! ("statmod" %in% rownames(installed.packages()))) { install.packages("statmod") }
if (! ("stats" %in% rownames(installed.packages()))) { install.packages("stats") }
if (! ("graphics" %in% rownames(installed.packages()))) { install.packages("graphics") }
if (! ("RCurl" %in% rownames(installed.packages()))) { install.packages("RCurl") }
if (! ("rjson" %in% rownames(installed.packages()))) { install.packages("rjson") }
if (! ("tools" %in% rownames(installed.packages()))) { install.packages("tools") }
if (! ("utils" %in% rownames(installed.packages()))) { install.packages("utils") }
# Now we download, install and initialize the H2O package for R.
install.packages("h2o", type="source", repos=(c("http://h2o-release.s3.amazonaws.com/h2o/rel-simons/4/R")))
source("install.R")
library(h2o)
h2oCluster <- h2o.init()
fit <- h2o.loadModel("/home/public/DLModel1", h2oCluster)
model.predict <- function(pclass, sex, age, sibsp, parch, fare) {
data <- as.h2o(data.frame(Pclass=as.factor(pclass),
Sex=sex,
Age=age,
SibSp = sibsp,
Parch=parch,
Fare=fare))
result <- as.data.frame(h2o.predict(fit, data))
result$p1 >= 0.5
}
#model.predict(1,"female", 38, 0, 0, 71.2833) # returns true
#model.predict(1,"male", 38, 0, 0, 71.2833) # returns false
source("install.R")
library(h2o)
# initialize connection to h2o cluster
h2oCluster <- h2o.init()
# upload data to cluster
df <- h2o.uploadFile("data/train.csv")
df$Pclass <- as.factor(df$Pclass)
df <- h2o.impute(df, "Age", method = "mean")
# split data into train/test
df.split <- h2o.splitFrame(data = df , ratios = 0.75)
df.train <- df.split[[1]]
df.test <- df.split[[2]]
# fit model
fit.glm <- h2o.glm(y = "Survived",
x = c("Pclass","Sex","Age","SibSp","Parch","Fare"),
training_frame = df.train,
family = "binomial", link = "logit",
solver = "AUTO", lambda_search = FALSE)
pred <- h2o.predict(fit.glm,df.test)
perf <- h2o.performance(fit.glm, df.test, measure="precision")
plot(perf,type="roc", col="blue")
h2o.saveModel(fit.glm, dir=getwd(), name="DLModel1", force=T)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment