spark2010/run_models.py

## run_models.py
mem_size <- round(as.numeric(system("awk '/MemFree/ {print $2}' /proc/meminfo", intern=TRUE)) / 1024 * 0.9, digits=0)

library(h2o)
library(Hmisc)
h2o.init(min_mem_size=paste(as.character(mem_size), "M", sep=""))

test <- h2o.importFile("higgs_test_5k.csv")
y <- "response"
test[,y] <- as.factor(test[,y])
num_rows <- 500  #h2o.nrow(test)


model_paths <- c(
    'DRF_ntrees-50'
    , 'DRF_ntrees-500'
    , 'DRF_ntrees-5000'
)
for (model_path in model_paths) {
    rf <- h2o.loadModel(model_path)
    processing_times <- vector(mode='numeric', length=num_rows)
    for (index in 1:num_rows) {
        start_time <- Sys.time()
        pred <- h2o.predict(rf, newdata=test[index,])
        end_time <- Sys.time()
        processing_times[index] = end_time - start_time
    }
    print('=============================')
    print(model_path)
    print(summary(processing_times))
    print(describe(processing_times))
    print('=============================')
}
	mem_size <- round(as.numeric(system("awk '/MemFree/ {print $2}' /proc/meminfo", intern=TRUE)) / 1024 * 0.9, digits=0)

	library(h2o)
	library(Hmisc)
	h2o.init(min_mem_size=paste(as.character(mem_size), "M", sep=""))

	test <- h2o.importFile("higgs_test_5k.csv")
	y <- "response"
	test[,y] <- as.factor(test[,y])
	num_rows <- 500 #h2o.nrow(test)


	model_paths <- c(
	'DRF_ntrees-50'
	, 'DRF_ntrees-500'
	, 'DRF_ntrees-5000'
	)
	for (model_path in model_paths) {
	rf <- h2o.loadModel(model_path)
	processing_times <- vector(mode='numeric', length=num_rows)
	for (index in 1:num_rows) {
	start_time <- Sys.time()
	pred <- h2o.predict(rf, newdata=test[index,])
	end_time <- Sys.time()
	processing_times[index] = end_time - start_time
	}
	print('=============================')
	print(model_path)
	print(summary(processing_times))
	print(describe(processing_times))
	print('=============================')
	}