Skip to content

Instantly share code, notes, and snippets.

@spark2010
Last active June 2, 2018 11:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save spark2010/3d6de5dc2b9471a2308f1b7f79f0ba01 to your computer and use it in GitHub Desktop.
Save spark2010/3d6de5dc2b9471a2308f1b7f79f0ba01 to your computer and use it in GitHub Desktop.
Run DRF models in one AWS instance
mem_size <- round(as.numeric(system("awk '/MemFree/ {print $2}' /proc/meminfo", intern=TRUE)) / 1024 * 0.9, digits=0)
library(h2o)
library(Hmisc)
h2o.init(min_mem_size=paste(as.character(mem_size), "M", sep=""))
test <- h2o.importFile("higgs_test_5k.csv")
y <- "response"
test[,y] <- as.factor(test[,y])
num_rows <- 500 #h2o.nrow(test)
model_paths <- c(
'DRF_ntrees-50'
, 'DRF_ntrees-500'
, 'DRF_ntrees-5000'
)
for (model_path in model_paths) {
rf <- h2o.loadModel(model_path)
processing_times <- vector(mode='numeric', length=num_rows)
for (index in 1:num_rows) {
start_time <- Sys.time()
pred <- h2o.predict(rf, newdata=test[index,])
end_time <- Sys.time()
processing_times[index] = end_time - start_time
}
print('=============================')
print(model_path)
print(summary(processing_times))
print(describe(processing_times))
print('=============================')
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment