alsrgv/hyperas_keras_example.py

## hyperas_keras_example.py
from __future__ import print_function
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform, conditional
import keras
import tensorflow as tf
import horovod.keras as hvd
import keras.backend as K
import math

def data():
    '''
    Data providing function:

    Make sure to have every relevant import statement included here and return data as
    used in model function below. This function is separated from model() so that hyperopt
    won't reload data for each evaluation run.
    '''
    from keras.datasets import mnist
    from keras.utils import np_utils
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    X_train = X_train.reshape(60000, 784)
    X_test = X_test.reshape(10000, 784)
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_train /= 255
    X_test /= 255
    nb_classes = 10
    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)
    return X_train, Y_train, X_test, Y_test


def model(X_train, Y_train, X_test, Y_test):
    '''
    Model providing function:

    Create Keras model with double curly brackets dropped-in as needed.
    Return value has to be a valid python dictionary with two customary keys:
        - loss: Specify a numeric evaluation metric to be minimized
        - status: Just use STATUS_OK and see hyperopt documentation if not feasible
    The last one is optional, though recommended, namely:
        - model: specify the model just created so that we can later use it again.
    '''
    from keras.models import Sequential
    from keras.layers.core import Dense, Dropout, Activation

    model = Sequential()
    model.add(Dense(512, input_shape=(784,)))
    model.add(Activation('relu'))
    model.add(Dropout({{uniform(0, 1)}}))
    model.add(Dense({{choice([256, 512, 1024])}}))
    model.add(Activation({{choice(['relu', 'sigmoid'])}}))
    model.add(Dropout({{uniform(0, 1)}}))

    # If we choose 'four', add an additional fourth layer
    if conditional({{choice(['three', 'four'])}}) == 'four':
        model.add(Dense(100))
        # We can also choose between complete sets of layers
        model.add({{choice([Dropout(0.5), Activation('linear')])}})
        model.add(Activation('relu'))

    model.add(Dense(10))
    model.add(Activation('softmax'))

    optimizer_name={{choice(['rmsprop', 'adam', 'sgd'])}}
    opt = keras.optimizers.get(optimizer_name)

    # Horovod: adjust learning rate based on number of GPUs.
    K.get_session().run(tf.assign(opt.lr, opt.lr * hvd.size()))

    # Horovod: add Horovod Distributed Optimizer.
    opt = hvd.DistributedOptimizer(opt)

    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

    callbacks = [
        # Horovod: broadcast initial variable states from rank 0 to all other processes.
        # This is necessary to ensure consistent initialization of all workers when
        # training is started with random weights or restored from a checkpoint.
        hvd.callbacks.BroadcastGlobalVariablesCallback(0),
    ]

    # Horovod: adjust number of epochs based on number of GPUs.
    epochs = int(math.ceil(12.0 / hvd.size()))

    # horovod: verbose output only on the first worker.
    verbose = 1 if hvd.rank() == 0 else 0

    model.fit(X_train, Y_train,
              batch_size={{choice([64, 128])}},
              epochs=epochs,
              verbose=verbose,
              validation_data=(X_test, Y_test),
              callbacks=callbacks)
    score, acc = model.evaluate(X_test, Y_test, verbose=0)

    # Horovod: get average test accuracy is case of slight divergence between workers.
    acc = hvd.allreduce(acc)
    print('Test accuracy:', acc)
    return {'loss': -acc, 'status': STATUS_OK, 'model': model}

if __name__ == '__main__':
    # Horovod: initialize Horovod.
    hvd.init()

    # Horovod: pin GPU to be used to process local rank (one GPU per process)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(hvd.local_rank())
    K.set_session(tf.Session(config=config))

    # horovod: verbose output only on the first worker.
    verbose = hvd.rank() == 0

    best_run, best_model = optim.minimize(model=model,
                                          data=data,
                                          algo=tpe.suggest,
                                          max_evals=5,
                                          trials=Trials(),
                                          verbose=verbose)
    X_train, Y_train, X_test, Y_test = data()
    print("Evalutation of best performing model:")
    print(best_model.evaluate(X_test, Y_test))
	from __future__ import print_function
	from hyperopt import Trials, STATUS_OK, tpe
	from hyperas import optim
	from hyperas.distributions import choice, uniform, conditional
	import keras
	import tensorflow as tf
	import horovod.keras as hvd
	import keras.backend as K
	import math

	def data():
	'''
	Data providing function:

	Make sure to have every relevant import statement included here and return data as
	used in model function below. This function is separated from model() so that hyperopt
	won't reload data for each evaluation run.
	'''
	from keras.datasets import mnist
	from keras.utils import np_utils
	(X_train, y_train), (X_test, y_test) = mnist.load_data()
	X_train = X_train.reshape(60000, 784)
	X_test = X_test.reshape(10000, 784)
	X_train = X_train.astype('float32')
	X_test = X_test.astype('float32')
	X_train /= 255
	X_test /= 255
	nb_classes = 10
	Y_train = np_utils.to_categorical(y_train, nb_classes)
	Y_test = np_utils.to_categorical(y_test, nb_classes)
	return X_train, Y_train, X_test, Y_test


	def model(X_train, Y_train, X_test, Y_test):
	'''
	Model providing function:

	Create Keras model with double curly brackets dropped-in as needed.
	Return value has to be a valid python dictionary with two customary keys:
	- loss: Specify a numeric evaluation metric to be minimized
	- status: Just use STATUS_OK and see hyperopt documentation if not feasible
	The last one is optional, though recommended, namely:
	- model: specify the model just created so that we can later use it again.
	'''
	from keras.models import Sequential
	from keras.layers.core import Dense, Dropout, Activation

	model = Sequential()
	model.add(Dense(512, input_shape=(784,)))
	model.add(Activation('relu'))
	model.add(Dropout({{uniform(0, 1)}}))
	model.add(Dense({{choice([256, 512, 1024])}}))
	model.add(Activation({{choice(['relu', 'sigmoid'])}}))
	model.add(Dropout({{uniform(0, 1)}}))

	# If we choose 'four', add an additional fourth layer
	if conditional({{choice(['three', 'four'])}}) == 'four':
	model.add(Dense(100))
	# We can also choose between complete sets of layers
	model.add({{choice([Dropout(0.5), Activation('linear')])}})
	model.add(Activation('relu'))

	model.add(Dense(10))
	model.add(Activation('softmax'))

	optimizer_name={{choice(['rmsprop', 'adam', 'sgd'])}}
	opt = keras.optimizers.get(optimizer_name)

	# Horovod: adjust learning rate based on number of GPUs.
	K.get_session().run(tf.assign(opt.lr, opt.lr * hvd.size()))

	# Horovod: add Horovod Distributed Optimizer.
	opt = hvd.DistributedOptimizer(opt)

	model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

	callbacks = [
	# Horovod: broadcast initial variable states from rank 0 to all other processes.
	# This is necessary to ensure consistent initialization of all workers when
	# training is started with random weights or restored from a checkpoint.
	hvd.callbacks.BroadcastGlobalVariablesCallback(0),
	]

	# Horovod: adjust number of epochs based on number of GPUs.
	epochs = int(math.ceil(12.0 / hvd.size()))

	# horovod: verbose output only on the first worker.
	verbose = 1 if hvd.rank() == 0 else 0

	model.fit(X_train, Y_train,
	batch_size={{choice([64, 128])}},
	epochs=epochs,
	verbose=verbose,
	validation_data=(X_test, Y_test),
	callbacks=callbacks)
	score, acc = model.evaluate(X_test, Y_test, verbose=0)

	# Horovod: get average test accuracy is case of slight divergence between workers.
	acc = hvd.allreduce(acc)
	print('Test accuracy:', acc)
	return {'loss': -acc, 'status': STATUS_OK, 'model': model}

	if __name__ == '__main__':
	# Horovod: initialize Horovod.
	hvd.init()

	# Horovod: pin GPU to be used to process local rank (one GPU per process)
	config = tf.ConfigProto()
	config.gpu_options.allow_growth = True
	config.gpu_options.visible_device_list = str(hvd.local_rank())
	K.set_session(tf.Session(config=config))

	# horovod: verbose output only on the first worker.
	verbose = hvd.rank() == 0

	best_run, best_model = optim.minimize(model=model,
	data=data,
	algo=tpe.suggest,
	max_evals=5,
	trials=Trials(),
	verbose=verbose)
	X_train, Y_train, X_test, Y_test = data()
	print("Evalutation of best performing model:")
	print(best_model.evaluate(X_test, Y_test))