MartinThoma/experiment.py

## experiment.py
import scipy.io
import lasagne
import theano
import theano.tensor as T
import numpy as np
import time

import logging

logger = logging.getLogger('')
logger.setLevel(logging.DEBUG)
fh = logging.FileHandler('experiment.log')
fh.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(message)s')
ch.setFormatter(formatter)
fh.setFormatter(formatter)
logger.addHandler(ch)
logger.addHandler(fh)

TRAIN_NC = '../data/train_1_speaker.nc'
VAL_NC = '../data/val_1_speaker.nc'
BATCH_SIZE = 50


def one_hot(labels, n_classes):
    '''
    Converts an array of label integers to a one-hot matrix encoding

    :parameters:
        - labels : np.ndarray, dtype=int
            Array of integer labels, in {0, n_classes - 1}
        - n_classes : int
            Total number of classes

    :returns:
        - one_hot : np.ndarray, dtype=bool, shape=(labels.shape[0], n_classes)
            One-hot matrix of the input
    '''
    one_hot = np.zeros((labels.shape[0], n_classes)).astype(bool)
    one_hot[range(labels.shape[0]), labels] = True
    return one_hot


def load_netcdf(filename):
    '''
    Loads in data from a netcdf file in rnnlib format

    :parameters:
        - filename : str
            Path to a netcdf file

    :returns:
        - X : list of np.ndarray
            List of time series matrices
        - y : list of np.ndarray
            List of label arrays in one-hot form (see one_hot)
    '''
    with open(filename, 'r') as f:
        netcdf_data = scipy.io.netcdf_file(f).variables

    X = []
    y = []
    n = 0
    for length in netcdf_data['seqLengths'].data:
        X_n = netcdf_data['inputs'].data[n:n + length]
        X.append(X_n.astype(theano.config.floatX))
        y_n = one_hot(netcdf_data['targetClasses'].data[n:n + length],
                      netcdf_data['numTargetClasses'].data)
        y.append(y_n.astype(theano.config.floatX))
        n += length
    return X, y


def make_batches(X, length, batch_size=BATCH_SIZE):
    '''
    Convert a list of matrices into batches of uniform length

    :parameters:
        - X : list of np.ndarray
            List of matrices
        - length : int
            Desired sequence length.  Smaller sequences will be padded with 0s,
            longer will be truncated.
        - batch_size : int
            Mini-batch size

    :returns:
        - X_batch : np.ndarray
            Tensor of time series matrix batches,
            shape=(n_batches, batch_size, length, n_features)
        - X_mask : np.ndarray
            Mask denoting whether to include each time step of each time series
            matrix
    '''
    n_batches = len(X)//batch_size
    X_batch = np.zeros((n_batches, batch_size, length, X[0].shape[1]),
                       dtype=theano.config.floatX)
    X_mask = np.zeros(X_batch.shape, dtype=np.bool)
    for b in range(n_batches):
        for n in range(batch_size):
            X_m = X[b*batch_size + n]
            X_batch[b, n, :X_m.shape[0]] = X_m[:length]
            X_mask[b, n, :X_m.shape[0]] = 1
    return X_batch, X_mask


logger.info('Loading data...')
X_train, y_train = load_netcdf(TRAIN_NC)
X_train = X_train
y_train = y_train
X_val, y_val = load_netcdf(VAL_NC)
X_val = X_val
y_val = y_val

print X_train[0].shape

# Find the longest sequence
length = max(max([X.shape[0] for X in X_train]),
             max([X.shape[0] for X in X_val]))
# Convert to batches of time series of uniform length
X_train, _ = make_batches(X_train, length)
y_train, train_mask = make_batches(y_train, length)
X_val, _ = make_batches(X_val, length)
y_val, val_mask = make_batches(y_val, length)

n_epochs = 500
learning_rate = 10
momentum = .9
precompute = False
l_in = lasagne.layers.InputLayer(shape=(BATCH_SIZE, length, X_val.shape[-1]))
l_noise = lasagne.layers.GaussianNoiseLayer(l_in, sigma=0.6)

l_forward_1 = lasagne.layers.LSTMLayer(l_noise, num_units=156, backwards=False, precompute_input=precompute)
l_backward_1 = lasagne.layers.LSTMLayer(l_noise, num_units=156, backwards=True, precompute_input=precompute)
l_recurrent_1 = lasagne.layers.ElemwiseSumLayer(
    [l_forward_1, l_backward_1])

l_forward_2 = lasagne.layers.LSTMLayer(
    l_recurrent_1, num_units=300, backwards=False, precompute_input=precompute)
l_backward_2 = lasagne.layers.LSTMLayer(
    l_recurrent_1, num_units=300, backwards=True, precompute_input=precompute)
l_recurrent_2 = lasagne.layers.ElemwiseSumLayer(
    [l_forward_2, l_backward_2])

l_forward_3 = lasagne.layers.LSTMLayer(
    l_recurrent_2, num_units=102, backwards=False, precompute_input=precompute)
l_backward_3 = lasagne.layers.LSTMLayer(
    l_recurrent_2, num_units=102, backwards=False, precompute_input=precompute)
l_recurrent_3 = lasagne.layers.ElemwiseSumLayer(
    [l_forward_3, l_backward_3])

l_reshape = lasagne.layers.ReshapeLayer(l_recurrent_3, (-1, 102))
nonlinearity = lasagne.nonlinearities.softmax
l_rec_out = lasagne.layers.DenseLayer(l_reshape, num_units=y_val.shape[-1],
                                      nonlinearity=nonlinearity)
l_out = lasagne.layers.ReshapeLayer(l_rec_out,
                                    (BATCH_SIZE, length, y_val.shape[-1]))

# Cost function is mean squared error
input = T.tensor3('input')
target_output = T.tensor3('target_output')
mask = T.tensor3('mask')


def cost(output):
    return -T.sum(mask*target_output*T.log(output))/T.sum(mask)

cost_train = cost(lasagne.layers.get_output(l_out, input, deterministic=False))
cost_eval = cost(lasagne.layers.get_output(l_out, input, deterministic=True))


# Use SGD for training
all_params = lasagne.layers.get_all_params(l_out, trainable=True)
logger.info('Computing updates...')
updates = lasagne.updates.momentum(cost_train, all_params,
                                   learning_rate, momentum)
logger.info('Compiling functions...')
# Theano functions for training, getting output, and computing cost
train = theano.function([input, target_output, mask], cost_train,
                        updates=updates)
y_pred = theano.function([input], lasagne.layers.get_output(l_out, input, deterministic=True))
compute_cost = theano.function([input, target_output, mask], cost_eval)

logger.info('Training...')
# Train the net
for epoch in range(n_epochs):

    batch_shuffle = np.random.choice(X_train.shape[0], X_train.shape[0], False)
    for sequences, labels, sequence_mask in zip(X_train[batch_shuffle],
                                                y_train[batch_shuffle],
                                                train_mask[batch_shuffle]):

        start_time = time.time()
        sequence_shuffle = np.random.choice(sequences.shape[0],
                                            sequences.shape[0], False)
        train(sequences[sequence_shuffle], labels[sequence_shuffle],
              sequence_mask[sequence_shuffle])
        end_time = time.time()

        print end_time - start_time
    cost_val = sum([compute_cost(X_val_n, y_val_n, mask_n)
                    for X_val_n, y_val_n, mask_n,
                    in zip(X_val, y_val, val_mask)])
    y_val_pred = np.array([y_pred(X_val_n) for X_val_n in X_val])
    y_val_labels = np.argmax(y_val*val_mask, axis=-1).flatten()
    y_val_pred_labels = np.argmax(y_val_pred*val_mask, axis=-1).flatten()
    n_time_steps = np.sum(val_mask)/val_mask.shape[-1]
    error = np.sum(y_val_labels != y_val_pred_labels)/float(n_time_steps)
    logger.info("Epoch {} took {}, cost = {}, error = {}".format(
        epoch, end_time - start_time, cost_val, error))
	import scipy.io
	import lasagne
	import theano
	import theano.tensor as T
	import numpy as np
	import time

	import logging

	logger = logging.getLogger('')
	logger.setLevel(logging.DEBUG)
	fh = logging.FileHandler('experiment.log')
	fh.setLevel(logging.DEBUG)
	ch = logging.StreamHandler()
	ch.setLevel(logging.DEBUG)
	formatter = logging.Formatter('%(message)s')
	ch.setFormatter(formatter)
	fh.setFormatter(formatter)
	logger.addHandler(ch)
	logger.addHandler(fh)

	TRAIN_NC = '../data/train_1_speaker.nc'
	VAL_NC = '../data/val_1_speaker.nc'
	BATCH_SIZE = 50


	def one_hot(labels, n_classes):
	'''
	Converts an array of label integers to a one-hot matrix encoding

	:parameters:
	- labels : np.ndarray, dtype=int
	Array of integer labels, in {0, n_classes - 1}
	- n_classes : int
	Total number of classes

	:returns:
	- one_hot : np.ndarray, dtype=bool, shape=(labels.shape[0], n_classes)
	One-hot matrix of the input
	'''
	one_hot = np.zeros((labels.shape[0], n_classes)).astype(bool)
	one_hot[range(labels.shape[0]), labels] = True
	return one_hot


	def load_netcdf(filename):
	'''
	Loads in data from a netcdf file in rnnlib format

	:parameters:
	- filename : str
	Path to a netcdf file

	:returns:
	- X : list of np.ndarray
	List of time series matrices
	- y : list of np.ndarray
	List of label arrays in one-hot form (see one_hot)
	'''
	with open(filename, 'r') as f:
	netcdf_data = scipy.io.netcdf_file(f).variables

	X = []
	y = []
	n = 0
	for length in netcdf_data['seqLengths'].data:
	X_n = netcdf_data['inputs'].data[n:n + length]
	X.append(X_n.astype(theano.config.floatX))
	y_n = one_hot(netcdf_data['targetClasses'].data[n:n + length],
	netcdf_data['numTargetClasses'].data)
	y.append(y_n.astype(theano.config.floatX))
	n += length
	return X, y


	def make_batches(X, length, batch_size=BATCH_SIZE):
	'''
	Convert a list of matrices into batches of uniform length

	:parameters:
	- X : list of np.ndarray
	List of matrices
	- length : int
	Desired sequence length. Smaller sequences will be padded with 0s,
	longer will be truncated.
	- batch_size : int
	Mini-batch size

	:returns:
	- X_batch : np.ndarray
	Tensor of time series matrix batches,
	shape=(n_batches, batch_size, length, n_features)
	- X_mask : np.ndarray
	Mask denoting whether to include each time step of each time series
	matrix
	'''
	n_batches = len(X)//batch_size
	X_batch = np.zeros((n_batches, batch_size, length, X[0].shape[1]),
	dtype=theano.config.floatX)
	X_mask = np.zeros(X_batch.shape, dtype=np.bool)
	for b in range(n_batches):
	for n in range(batch_size):
	X_m = X[b*batch_size + n]
	X_batch[b, n, :X_m.shape[0]] = X_m[:length]
	X_mask[b, n, :X_m.shape[0]] = 1
	return X_batch, X_mask


	logger.info('Loading data...')
	X_train, y_train = load_netcdf(TRAIN_NC)
	X_train = X_train
	y_train = y_train
	X_val, y_val = load_netcdf(VAL_NC)
	X_val = X_val
	y_val = y_val

	print X_train[0].shape

	# Find the longest sequence
	length = max(max([X.shape[0] for X in X_train]),
	max([X.shape[0] for X in X_val]))
	# Convert to batches of time series of uniform length
	X_train, _ = make_batches(X_train, length)
	y_train, train_mask = make_batches(y_train, length)
	X_val, _ = make_batches(X_val, length)
	y_val, val_mask = make_batches(y_val, length)

	n_epochs = 500
	learning_rate = 10
	momentum = .9
	precompute = False
	l_in = lasagne.layers.InputLayer(shape=(BATCH_SIZE, length, X_val.shape[-1]))
	l_noise = lasagne.layers.GaussianNoiseLayer(l_in, sigma=0.6)

	l_forward_1 = lasagne.layers.LSTMLayer(l_noise, num_units=156, backwards=False, precompute_input=precompute)
	l_backward_1 = lasagne.layers.LSTMLayer(l_noise, num_units=156, backwards=True, precompute_input=precompute)
	l_recurrent_1 = lasagne.layers.ElemwiseSumLayer(
	[l_forward_1, l_backward_1])

	l_forward_2 = lasagne.layers.LSTMLayer(
	l_recurrent_1, num_units=300, backwards=False, precompute_input=precompute)
	l_backward_2 = lasagne.layers.LSTMLayer(
	l_recurrent_1, num_units=300, backwards=True, precompute_input=precompute)
	l_recurrent_2 = lasagne.layers.ElemwiseSumLayer(
	[l_forward_2, l_backward_2])

	l_forward_3 = lasagne.layers.LSTMLayer(
	l_recurrent_2, num_units=102, backwards=False, precompute_input=precompute)
	l_backward_3 = lasagne.layers.LSTMLayer(
	l_recurrent_2, num_units=102, backwards=False, precompute_input=precompute)
	l_recurrent_3 = lasagne.layers.ElemwiseSumLayer(
	[l_forward_3, l_backward_3])

	l_reshape = lasagne.layers.ReshapeLayer(l_recurrent_3, (-1, 102))
	nonlinearity = lasagne.nonlinearities.softmax
	l_rec_out = lasagne.layers.DenseLayer(l_reshape, num_units=y_val.shape[-1],
	nonlinearity=nonlinearity)
	l_out = lasagne.layers.ReshapeLayer(l_rec_out,
	(BATCH_SIZE, length, y_val.shape[-1]))

	# Cost function is mean squared error
	input = T.tensor3('input')
	target_output = T.tensor3('target_output')
	mask = T.tensor3('mask')


	def cost(output):
	return -T.sum(masktarget_outputT.log(output))/T.sum(mask)

	cost_train = cost(lasagne.layers.get_output(l_out, input, deterministic=False))
	cost_eval = cost(lasagne.layers.get_output(l_out, input, deterministic=True))


	# Use SGD for training
	all_params = lasagne.layers.get_all_params(l_out, trainable=True)
	logger.info('Computing updates...')
	updates = lasagne.updates.momentum(cost_train, all_params,
	learning_rate, momentum)
	logger.info('Compiling functions...')
	# Theano functions for training, getting output, and computing cost
	train = theano.function([input, target_output, mask], cost_train,
	updates=updates)
	y_pred = theano.function([input], lasagne.layers.get_output(l_out, input, deterministic=True))
	compute_cost = theano.function([input, target_output, mask], cost_eval)

	logger.info('Training...')
	# Train the net
	for epoch in range(n_epochs):

	batch_shuffle = np.random.choice(X_train.shape[0], X_train.shape[0], False)
	for sequences, labels, sequence_mask in zip(X_train[batch_shuffle],
	y_train[batch_shuffle],
	train_mask[batch_shuffle]):

	start_time = time.time()
	sequence_shuffle = np.random.choice(sequences.shape[0],
	sequences.shape[0], False)
	train(sequences[sequence_shuffle], labels[sequence_shuffle],
	sequence_mask[sequence_shuffle])
	end_time = time.time()

	print end_time - start_time
	cost_val = sum([compute_cost(X_val_n, y_val_n, mask_n)
	for X_val_n, y_val_n, mask_n,
	in zip(X_val, y_val, val_mask)])
	y_val_pred = np.array([y_pred(X_val_n) for X_val_n in X_val])
	y_val_labels = np.argmax(y_val*val_mask, axis=-1).flatten()
	y_val_pred_labels = np.argmax(y_val_pred*val_mask, axis=-1).flatten()
	n_time_steps = np.sum(val_mask)/val_mask.shape[-1]
	error = np.sum(y_val_labels != y_val_pred_labels)/float(n_time_steps)
	logger.info("Epoch {} took {}, cost = {}, error = {}".format(
	epoch, end_time - start_time, cost_val, error))