mehdidc/nolearn_adjust_learning_rate.py

## nolearn_adjust_learning_rate.py
from nolearn.lasagne import NeuralNet, BatchIterator
from lasagne import layers, nonlinearities, updates, init, objectives
import numpy as np
import theano

class EarlyStopping(object):

    def __init__(self, patience=100, criterion='valid_loss',
                 criterion_smaller_is_better=True):
        self.patience = patience
        if criterion_smaller_is_better is True:
            self.best_valid = np.inf
        else:
            self.best_valid = -np.inf
        self.best_valid_epoch = 0
        self.best_weights = None
        self.criterion = criterion
        self.criterion_smaller_is_better = criterion_smaller_is_better

    def __call__(self, nn, train_history):
        current_valid = train_history[-1][self.criterion]
        current_epoch = train_history[-1]['epoch']
        if self.criterion_smaller_is_better:
            cond = current_valid < self.best_valid
        else:
            cond = current_valid > self.best_valid
        if cond:
            self.best_valid = current_valid
            self.best_valid_epoch = current_epoch
            self.best_weights = nn.get_all_params_values()
        elif self.best_valid_epoch + self.patience < current_epoch:
            if nn.verbose:
                print("Early stopping.")
                print("Best {:s} was {:.6f} at epoch {}.".format(
                    self.criterion, self.best_valid, self.best_valid_epoch))
            nn.load_weights_from(self.best_weights)
            if nn.verbose:
                print("Weights set.")
            raise StopIteration()

    def load_best_weights(self, nn, train_history):
        nn.load_weights_from(self.best_weights)

class AdjustVariable(object):
    def __init__(self, name, initial=0.1, decay=1e-6):
        self.name = name
        self.initial = initial
        self.decay = decay

    def __call__(self, nn, train_history):
        epoch = train_history[-1]['epoch']
        new_val = self.initial / (1. + self.decay * epoch)
        new_val = float(new_val)
        getattr(nn, self.name).set_value(new_val)

def float32(k):
    return np.cast['float32'](k)

net = NeuralNet(
    # Define the architecture here
    layers=[
            ('input', layers.InputLayer),
            ('hidden1', layers.DenseLayer),
            ('dropout1', layers.DropoutLayer),
            ('hidden2', layers.DenseLayer),
            ('dropout2', layers.DropoutLayer),
            ('hidden3', layers.DenseLayer),
            ('output', layers.DenseLayer),
    ],
    # Layers parameters:
    input_shape=(None, 10), # Number of input features

    hidden1_num_units=1500,  # number of units in 1st hidden layer
    hidden1_nonlinearity=nonlinearities.rectify,
    hidden1_W=init.GlorotUniform(gain='relu'),

    dropout1_p=0.5,

    hidden2_num_units=1500,  # number of units in 2nd hidden layer
    hidden2_nonlinearity=nonlinearities.rectify,
    hidden2_W=init.GlorotUniform(gain='relu'),

    dropout2_p=0.5,

    hidden3_num_units=100,  # number of units in 3rd hidden layer
    hidden3_nonlinearity=nonlinearities.rectify,
    hidden3_W=init.GlorotUniform(gain='relu'),

    output_num_units=3,  # 18 classes
    output_W=init.GlorotUniform(),
    output_nonlinearity=nonlinearities.softmax,

    # Optimization method:
    update=updates.adadelta, # The optimization algorithm is Adadelta
    update_learning_rate=theano.shared(float32(0.01)),

    batch_iterator_train=BatchIterator(batch_size=100), # mini-batch size


    use_label_encoder=True, # Converts labels of any kind to integers
    max_epochs=100,  # we want to train this many epochs
    verbose=1, # To monitor training at each epoch

    # handlers
    on_epoch_finished = [EarlyStopping(patience=20, criterion='valid_accuracy',
                                       criterion_smaller_is_better=False),
                         AdjustVariable('update_learning_rate', initial=0.1, decay=1e-6)
                        ]
)
	from nolearn.lasagne import NeuralNet, BatchIterator
	from lasagne import layers, nonlinearities, updates, init, objectives
	import numpy as np
	import theano

	class EarlyStopping(object):

	def __init__(self, patience=100, criterion='valid_loss',
	criterion_smaller_is_better=True):
	self.patience = patience
	if criterion_smaller_is_better is True:
	self.best_valid = np.inf
	else:
	self.best_valid = -np.inf
	self.best_valid_epoch = 0
	self.best_weights = None
	self.criterion = criterion
	self.criterion_smaller_is_better = criterion_smaller_is_better

	def __call__(self, nn, train_history):
	current_valid = train_history[-1][self.criterion]
	current_epoch = train_history[-1]['epoch']
	if self.criterion_smaller_is_better:
	cond = current_valid < self.best_valid
	else:
	cond = current_valid > self.best_valid
	if cond:
	self.best_valid = current_valid
	self.best_valid_epoch = current_epoch
	self.best_weights = nn.get_all_params_values()
	elif self.best_valid_epoch + self.patience < current_epoch:
	if nn.verbose:
	print("Early stopping.")
	print("Best {:s} was {:.6f} at epoch {}.".format(
	self.criterion, self.best_valid, self.best_valid_epoch))
	nn.load_weights_from(self.best_weights)
	if nn.verbose:
	print("Weights set.")
	raise StopIteration()

	def load_best_weights(self, nn, train_history):
	nn.load_weights_from(self.best_weights)

	class AdjustVariable(object):
	def __init__(self, name, initial=0.1, decay=1e-6):
	self.name = name
	self.initial = initial
	self.decay = decay

	def __call__(self, nn, train_history):
	epoch = train_history[-1]['epoch']
	new_val = self.initial / (1. + self.decay * epoch)
	new_val = float(new_val)
	getattr(nn, self.name).set_value(new_val)

	def float32(k):
	return np.cast['float32'](k)

	net = NeuralNet(
	# Define the architecture here
	layers=[
	('input', layers.InputLayer),
	('hidden1', layers.DenseLayer),
	('dropout1', layers.DropoutLayer),
	('hidden2', layers.DenseLayer),
	('dropout2', layers.DropoutLayer),
	('hidden3', layers.DenseLayer),
	('output', layers.DenseLayer),
	],
	# Layers parameters:
	input_shape=(None, 10), # Number of input features

	hidden1_num_units=1500, # number of units in 1st hidden layer
	hidden1_nonlinearity=nonlinearities.rectify,
	hidden1_W=init.GlorotUniform(gain='relu'),

	dropout1_p=0.5,

	hidden2_num_units=1500, # number of units in 2nd hidden layer
	hidden2_nonlinearity=nonlinearities.rectify,
	hidden2_W=init.GlorotUniform(gain='relu'),

	dropout2_p=0.5,

	hidden3_num_units=100, # number of units in 3rd hidden layer
	hidden3_nonlinearity=nonlinearities.rectify,
	hidden3_W=init.GlorotUniform(gain='relu'),

	output_num_units=3, # 18 classes
	output_W=init.GlorotUniform(),
	output_nonlinearity=nonlinearities.softmax,

	# Optimization method:
	update=updates.adadelta, # The optimization algorithm is Adadelta
	update_learning_rate=theano.shared(float32(0.01)),

	batch_iterator_train=BatchIterator(batch_size=100), # mini-batch size


	use_label_encoder=True, # Converts labels of any kind to integers
	max_epochs=100, # we want to train this many epochs
	verbose=1, # To monitor training at each epoch

	# handlers
	on_epoch_finished = [EarlyStopping(patience=20, criterion='valid_accuracy',
	criterion_smaller_is_better=False),
	AdjustVariable('update_learning_rate', initial=0.1, decay=1e-6)
	]
	)