ledell/h2o_deeplearning_gridsearch_mnist_example.R

## h2o_deeplearning_gridsearch_mnist_example.R
library(h2o)
h2o.init(nthreads = -1)  # This means nthreads = num available cores

train_file <- "https://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/mnist/train.csv.gz"
test_file <- "https://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/mnist/test.csv.gz"

train <- h2o.importFile(train_file)
test <- h2o.importFile(test_file)

# To see a brief summary of the data, run the following command
summary(train)
summary(test)


# Specify the response and predictor columns
y <- "C785"
x <- setdiff(names(train), y)

# We encode the response column as categorical for multinomial classification
train[,y] <- as.factor(train[,y])
test[,y] <- as.factor(test[,y])

# Train a Deep Learning model and validate on a test set
model <- h2o.deeplearning(
        x = x,
        y = y,
        training_frame = train,
        validation_frame = test,
        distribution = "multinomial",
        activation = "RectifierWithDropout",
        hidden = c(200,200,200),
        input_dropout_ratio = 0.2,
        l1 = 1e-5,
        epochs = 10)


hidden_opt <- list(c(200,200), c(100,300,100), c(500,500,500))
l1_opt <- c(1e-5,1e-7)
hyper_params <- list(hidden = hidden_opt, l1 = l1_opt)

model_grid <- h2o.grid(
        "deeplearning",
        hyper_params = hyper_params,
        x = x,
        y = y,
        distribution = "multinomial",
        training_frame = train,
        validation_frame = test)


# print out all prediction errors and run times of the models
model_grid

# print out the Test MSE for all of the models
for (model_id in model_grid@model_ids) {
  model <- h2o.getModel(model_id)
  mse <- h2o.mse(model, valid = TRUE)
  print(sprintf("Test set MSE: %f", mse))
}
	library(h2o)
	h2o.init(nthreads = -1) # This means nthreads = num available cores

	train_file <- "https://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/mnist/train.csv.gz"
	test_file <- "https://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/mnist/test.csv.gz"

	train <- h2o.importFile(train_file)
	test <- h2o.importFile(test_file)

	# To see a brief summary of the data, run the following command
	summary(train)
	summary(test)


	# Specify the response and predictor columns
	y <- "C785"
	x <- setdiff(names(train), y)

	# We encode the response column as categorical for multinomial classification
	train[,y] <- as.factor(train[,y])
	test[,y] <- as.factor(test[,y])

	# Train a Deep Learning model and validate on a test set
	model <- h2o.deeplearning(
	x = x,
	y = y,
	training_frame = train,
	validation_frame = test,
	distribution = "multinomial",
	activation = "RectifierWithDropout",
	hidden = c(200,200,200),
	input_dropout_ratio = 0.2,
	l1 = 1e-5,
	epochs = 10)


	hidden_opt <- list(c(200,200), c(100,300,100), c(500,500,500))
	l1_opt <- c(1e-5,1e-7)
	hyper_params <- list(hidden = hidden_opt, l1 = l1_opt)

	model_grid <- h2o.grid(
	"deeplearning",
	hyper_params = hyper_params,
	x = x,
	y = y,
	distribution = "multinomial",
	training_frame = train,
	validation_frame = test)


	# print out all prediction errors and run times of the models
	model_grid

	# print out the Test MSE for all of the models
	for (model_id in model_grid@model_ids) {
	model <- h2o.getModel(model_id)
	mse <- h2o.mse(model, valid = TRUE)
	print(sprintf("Test set MSE: %f", mse))
	}