Last active
August 29, 2015 14:19
-
-
Save kylemcdonald/7e857e861e81fc2571d9 to your computer and use it in GitHub Desktop.
Training models on the GPU and loading onto the CPU with Lasagne.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import print_function | |
import argparse | |
import gzip | |
import itertools | |
import pickle | |
import os | |
import sys | |
import time | |
import numpy as np | |
import lasagne | |
from lasagne.layers import get_all_param_values, set_all_param_values | |
from lasagne.layers import get_all_layers | |
import theano | |
import theano.tensor as T | |
PY2 = sys.version_info[0] == 2 | |
if PY2: | |
from urllib import urlretrieve | |
def pickle_load(f, encoding): | |
return pickle.load(f) | |
else: | |
from urllib.request import urlretrieve | |
def pickle_load(f, encoding): | |
return pickle.load(f, encoding=encoding) | |
DATA_URL = 'http://deeplearning.net/data/mnist/mnist.pkl.gz' | |
DATA_FILENAME = 'mnist.pkl.gz' | |
NUM_EPOCHS = 5 | |
BATCH_SIZE = 600 | |
LEARNING_RATE = 0.01 | |
MOMENTUM = 0.9 | |
def get_input_dim(data, use_conv): | |
n = data[0][0].shape[1] | |
if use_conv: | |
# assume the data is square | |
side = int(np.sqrt(n)) | |
return np.array([side, side]) | |
else: | |
return n | |
def get_output_dim(data): | |
return len(np.unique(data[0][1])) | |
def load_data_raw(url=DATA_URL, filename=DATA_FILENAME): | |
if not os.path.exists(filename): | |
print('Downloading MNIST dataset') | |
urlretrieve(url, filename) | |
with gzip.open(filename, 'rb') as f: | |
return pickle_load(f, encoding='latin-1') | |
def load_data(data, conv=False): | |
X_train, y_train = data[0] | |
X_valid, y_valid = data[1] | |
X_test, y_test = data[2] | |
input_dim = get_input_dim(data, conv) | |
output_dim = get_output_dim(data) | |
if conv: | |
X_train = X_train.reshape((X_train.shape[0], 1, input_dim[0], input_dim[1])) | |
X_valid = X_valid.reshape((X_valid.shape[0], 1, input_dim[0], input_dim[1])) | |
X_test = X_test.reshape((X_test.shape[0], 1, input_dim[0], input_dim[1])) | |
return dict( | |
X_train=theano.shared(lasagne.utils.floatX(X_train)), | |
y_train=T.cast(theano.shared(y_train), 'int32'), | |
X_valid=theano.shared(lasagne.utils.floatX(X_valid)), | |
y_valid=T.cast(theano.shared(y_valid), 'int32'), | |
X_test=theano.shared(lasagne.utils.floatX(X_test)), | |
y_test=T.cast(theano.shared(y_test), 'int32'), | |
num_examples_train=X_train.shape[0], | |
num_examples_valid=X_valid.shape[0], | |
num_examples_test=X_test.shape[0], | |
input_dim=input_dim, | |
output_dim=output_dim, | |
) | |
def build_model(input_dim, output_dim, | |
batch_size=BATCH_SIZE, | |
use_conv=False, | |
use_dnn=False): | |
if use_dnn: | |
from lasagne.layers import dnn | |
if use_conv: | |
l_in = lasagne.layers.InputLayer( | |
shape=(batch_size, 1, input_dim[0], input_dim[1]), | |
) | |
l_conv1 = dnn.Conv2DDNNLayer( | |
l_in, | |
num_filters=32, | |
filter_size=(5, 5), | |
nonlinearity=lasagne.nonlinearities.rectify | |
) if args.use_dnn else lasagne.layers.Conv2DLayer( | |
l_in, | |
num_filters=32, | |
filter_size=(5, 5), | |
nonlinearity=lasagne.nonlinearities.rectify | |
) | |
l_pool1 = dnn.MaxPool2DDNNLayer( | |
l_conv1, | |
ds=(2, 2) | |
) if args.use_dnn else lasagne.layers.MaxPool2DLayer( | |
l_conv1, | |
ds=(2, 2) | |
) | |
l_conv2 = dnn.Conv2DDNNLayer( | |
l_pool1, | |
num_filters=32, | |
filter_size=(5, 5), | |
nonlinearity=lasagne.nonlinearities.rectify | |
) if args.use_dnn else lasagne.layers.Conv2DLayer( | |
l_pool1, | |
num_filters=32, | |
filter_size=(5, 5), | |
nonlinearity=lasagne.nonlinearities.rectify | |
) | |
l_pool2 = dnn.MaxPool2DDNNLayer( | |
l_conv2, | |
ds=(2, 2) | |
) if args.use_dnn else lasagne.layers.MaxPool2DLayer( | |
l_conv2, | |
ds=(2, 2) | |
) | |
l_hidden1 = lasagne.layers.DenseLayer( | |
l_pool2, | |
num_units=256, | |
nonlinearity=lasagne.nonlinearities.rectify | |
) | |
l_hidden1_dropout = lasagne.layers.DropoutLayer( | |
l_hidden1, | |
p=0.5, | |
) | |
l_out = lasagne.layers.DenseLayer( | |
l_hidden1_dropout, | |
num_units=output_dim, | |
nonlinearity=lasagne.nonlinearities.softmax | |
) | |
return l_out | |
else: | |
l_in = lasagne.layers.InputLayer( | |
shape=(batch_size, input_dim), | |
) | |
l_hidden1 = lasagne.layers.DenseLayer( | |
l_in, | |
num_units=512, | |
nonlinearity=lasagne.nonlinearities.rectify, | |
) | |
l_hidden1_dropout = lasagne.layers.DropoutLayer( | |
l_hidden1, | |
p=0.5, | |
) | |
l_hidden2 = lasagne.layers.DenseLayer( | |
l_hidden1_dropout, | |
num_units=512, | |
nonlinearity=lasagne.nonlinearities.rectify, | |
) | |
l_hidden2_dropout = lasagne.layers.DropoutLayer( | |
l_hidden2, | |
p=0.5, | |
) | |
l_out = lasagne.layers.DenseLayer( | |
l_hidden2_dropout, | |
num_units=output_dim, | |
nonlinearity=lasagne.nonlinearities.softmax, | |
) | |
return l_out | |
def create_iter_functions(dataset, output_layer, | |
X_tensor_type=T.matrix, | |
batch_size=BATCH_SIZE, | |
learning_rate=LEARNING_RATE, | |
momentum=MOMENTUM): | |
batch_index = T.iscalar('batch_index') | |
X_batch = X_tensor_type('x') | |
y_batch = T.ivector('y') | |
batch_slice = slice(batch_index * batch_size, | |
(batch_index + 1) * batch_size) | |
objective = lasagne.objectives.Objective(output_layer, | |
loss_function=lasagne.objectives.categorical_crossentropy) | |
loss_train = objective.get_loss(X_batch, target=y_batch) | |
loss_eval = objective.get_loss(X_batch, target=y_batch, | |
deterministic=True) | |
pred = T.argmax( | |
output_layer.get_output(X_batch, deterministic=True), axis=1) | |
accuracy = T.mean(T.eq(pred, y_batch), dtype=theano.config.floatX) | |
all_params = lasagne.layers.get_all_params(output_layer) | |
updates = lasagne.updates.nesterov_momentum( | |
loss_train, all_params, learning_rate, momentum) | |
iter_train = theano.function( | |
[batch_index], loss_train, | |
updates=updates, | |
givens={ | |
X_batch: dataset['X_train'][batch_slice], | |
y_batch: dataset['y_train'][batch_slice], | |
}, | |
) | |
iter_valid = theano.function( | |
[batch_index], [loss_eval, accuracy], | |
givens={ | |
X_batch: dataset['X_valid'][batch_slice], | |
y_batch: dataset['y_valid'][batch_slice], | |
}, | |
) | |
iter_test = theano.function( | |
[batch_index], [loss_eval, accuracy], | |
givens={ | |
X_batch: dataset['X_test'][batch_slice], | |
y_batch: dataset['y_test'][batch_slice], | |
}, | |
) | |
return dict( | |
train=iter_train, | |
valid=iter_valid, | |
test=iter_test, | |
) | |
def train(iter_funcs, dataset, batch_size=BATCH_SIZE): | |
num_batches_train = dataset['num_examples_train'] // batch_size | |
num_batches_valid = dataset['num_examples_valid'] // batch_size | |
for epoch in itertools.count(1): | |
batch_train_losses = [] | |
for b in range(num_batches_train): | |
batch_train_loss = iter_funcs['train'](b) | |
batch_train_losses.append(batch_train_loss) | |
avg_train_loss = np.mean(batch_train_losses) | |
batch_valid_losses = [] | |
batch_valid_accuracies = [] | |
for b in range(num_batches_valid): | |
batch_valid_loss, batch_valid_accuracy = iter_funcs['valid'](b) | |
batch_valid_losses.append(batch_valid_loss) | |
batch_valid_accuracies.append(batch_valid_accuracy) | |
avg_valid_loss = np.mean(batch_valid_losses) | |
avg_valid_accuracy = np.mean(batch_valid_accuracies) | |
yield { | |
'number': epoch, | |
'train_loss': avg_train_loss, | |
'valid_loss': avg_valid_loss, | |
'valid_accuracy': avg_valid_accuracy, | |
} | |
def print_classifications(classifications, normalize=False): | |
if normalize: | |
classifications /= np.max(classifications) | |
width = 50 | |
for i, x in enumerate(classifications): | |
count = int(x * width) | |
print('{0}: {1:.3f} [{2}{3}]'.format(i, x, '|'*count, ' '*(width - count))) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description='Test GPU/CPU model saving/loading.') | |
parser.add_argument('--use_conv', action='store_true') | |
parser.add_argument('--use_dnn', action='store_true') | |
parser.add_argument('--train', action='store_true') | |
parser.add_argument('--run', action='store_true') | |
args = parser.parse_args() | |
params_file = 'network.params.pkl' | |
print('Loading data...') | |
raw = load_data_raw() | |
input_dim=get_input_dim(raw, args.use_conv) | |
output_dim=get_output_dim(raw) | |
print('Input: {} Output: {}'.format(input_dim, output_dim)) | |
print('Building model...') | |
output_layer = build_model( | |
input_dim=input_dim, | |
output_dim=output_dim, | |
batch_size=None, | |
use_conv=args.use_conv, | |
use_dnn=args.use_dnn | |
) | |
if args.train: | |
print('Shaping data...') | |
dataset = load_data(raw, conv=args.use_conv) | |
print('Creating iter functions...') | |
iter_funcs = create_iter_functions( | |
dataset, | |
output_layer, | |
X_tensor_type=T.tensor4 if args.use_conv else T.matrix, | |
) | |
print('Starting training...') | |
now = time.time() | |
try: | |
for epoch in train(iter_funcs, dataset): | |
print("Epoch {} of {} took {:.3f}s".format( | |
epoch['number'], NUM_EPOCHS, time.time() - now)) | |
now = time.time() | |
print(" training loss:\t\t{:.6f}".format(epoch['train_loss'])) | |
print(" validation loss:\t\t{:.6f}".format(epoch['valid_loss'])) | |
print(" validation accuracy:\t\t{:.2f} %%".format( | |
epoch['valid_accuracy'] * 100)) | |
if epoch['number'] >= NUM_EPOCHS: | |
break | |
except KeyboardInterrupt: | |
pass | |
print('Dumping to ' + params_file) | |
with open(params_file, 'w') as f: | |
pickle.dump(get_all_param_values(output_layer), f, -1) | |
if args.run: | |
with open(params_file) as f: | |
params = pickle.load(f) | |
layers = get_all_layers(output_layer) | |
set_all_param_values(layers, params) | |
X_train, y_train = raw[0] | |
single_image = X_train[0] | |
if args.use_conv: | |
single_image = np.array(single_image).reshape(1, 1, input_dim[0], input_dim[1]) | |
else: | |
single_image = np.array(single_image).reshape(1, 1, input_dim) | |
classification = output_layer.get_output(input=single_image).eval() | |
print_classifications(classification[0]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Using gpu device 0: GeForce GT 750M | |
Loading data... | |
Input: 784 Output: 10 | |
Building model... | |
Shaping data... | |
Creating iter functions... | |
Starting training... | |
Epoch 1 of 5 took 1.304s | |
training loss: 1.356000 | |
validation loss: 0.464074 | |
validation accuracy: 87.42 %% | |
Epoch 2 of 5 took 0.951s | |
training loss: 0.589747 | |
validation loss: 0.330294 | |
validation accuracy: 90.67 %% | |
Epoch 3 of 5 took 0.936s | |
training loss: 0.463531 | |
validation loss: 0.280340 | |
validation accuracy: 91.73 %% | |
Epoch 4 of 5 took 0.938s | |
training loss: 0.402245 | |
validation loss: 0.248970 | |
validation accuracy: 92.58 %% | |
Epoch 5 of 5 took 0.942s | |
training loss: 0.362150 | |
validation loss: 0.225799 | |
validation accuracy: 93.28 %% | |
Dumping to network.params.pkl | |
0: 0.004 [ ] | |
1: 0.000 [ ] | |
2: 0.001 [ ] | |
3: 0.301 [||||||||||||||| ] | |
4: 0.000 [ ] | |
5: 0.690 [|||||||||||||||||||||||||||||||||| ] | |
6: 0.000 [ ] | |
7: 0.002 [ ] | |
8: 0.001 [ ] | |
9: 0.001 [ ] | |
Using gpu device 0: GeForce GT 750M | |
Loading data... | |
Input: 784 Output: 10 | |
Building model... | |
0: 0.001 [ ] | |
1: 0.000 [ ] | |
2: 0.000 [ ] | |
3: 0.831 [||||||||||||||||||||||||||||||||||||||||| ] | |
4: 0.000 [ ] | |
5: 0.166 [|||||||| ] | |
6: 0.000 [ ] | |
7: 0.001 [ ] | |
8: 0.000 [ ] | |
9: 0.000 [ ] | |
Loading data... | |
Input: 784 Output: 10 | |
Building model... | |
0: 0.001 [ ] | |
1: 0.000 [ ] | |
2: 0.000 [ ] | |
3: 0.831 [||||||||||||||||||||||||||||||||||||||||| ] | |
4: 0.000 [ ] | |
5: 0.166 [|||||||| ] | |
6: 0.000 [ ] | |
7: 0.001 [ ] | |
8: 0.000 [ ] | |
9: 0.000 [ ] | |
Using gpu device 0: GeForce GT 750M | |
Loading data... | |
Input: [28 28] Output: 10 | |
Building model... | |
Shaping data... | |
Creating iter functions... | |
Starting training... | |
Epoch 1 of 5 took 11.106s | |
training loss: 1.251222 | |
validation loss: 0.263176 | |
validation accuracy: 92.53 %% | |
Epoch 2 of 5 took 10.815s | |
training loss: 0.314461 | |
validation loss: 0.160674 | |
validation accuracy: 95.44 %% | |
Epoch 3 of 5 took 10.847s | |
training loss: 0.221051 | |
validation loss: 0.122939 | |
validation accuracy: 96.29 %% | |
Epoch 4 of 5 took 11.053s | |
training loss: 0.178016 | |
validation loss: 0.103507 | |
validation accuracy: 96.91 %% | |
Epoch 5 of 5 took 10.925s | |
training loss: 0.151570 | |
validation loss: 0.089130 | |
validation accuracy: 97.32 %% | |
Dumping to network.params.pkl | |
0: 0.000 [ ] | |
1: 0.000 [ ] | |
2: 0.000 [ ] | |
3: 0.957 [||||||||||||||||||||||||||||||||||||||||||||||| ] | |
4: 0.000 [ ] | |
5: 0.043 [|| ] | |
6: 0.000 [ ] | |
7: 0.000 [ ] | |
8: 0.000 [ ] | |
9: 0.000 [ ] | |
Using gpu device 0: GeForce GT 750M | |
Loading data... | |
Input: [28 28] Output: 10 | |
Building model... | |
0: 0.000 [ ] | |
1: 0.000 [ ] | |
2: 0.001 [ ] | |
3: 0.605 [|||||||||||||||||||||||||||||| ] | |
4: 0.000 [ ] | |
5: 0.394 [||||||||||||||||||| ] | |
6: 0.000 [ ] | |
7: 0.000 [ ] | |
8: 0.000 [ ] | |
9: 0.000 [ ] | |
Loading data... | |
Input: [28 28] Output: 10 | |
Building model... | |
0: 0.000 [ ] | |
1: 0.000 [ ] | |
2: 0.001 [ ] | |
3: 0.605 [|||||||||||||||||||||||||||||| ] | |
4: 0.000 [ ] | |
5: 0.394 [||||||||||||||||||| ] | |
6: 0.000 [ ] | |
7: 0.000 [ ] | |
8: 0.000 [ ] | |
9: 0.000 [ ] | |
Using gpu device 0: GeForce GT 750M | |
Loading data... | |
Input: [28 28] Output: 10 | |
Building model... | |
Starting training... | |
Epoch 1 of 5 took 8.643s | |
training loss: 1.237200 | |
validation loss: 0.277472 | |
validation accuracy: 91.83 %% | |
Epoch 2 of 5 took 8.218s | |
training loss: 0.318573 | |
validation loss: 0.158532 | |
validation accuracy: 95.33 %% | |
Epoch 3 of 5 took 8.218s | |
training loss: 0.218397 | |
validation loss: 0.117524 | |
validation accuracy: 96.50 %% | |
Epoch 4 of 5 took 8.220s | |
training loss: 0.173628 | |
validation loss: 0.097823 | |
validation accuracy: 97.04 %% | |
Epoch 5 of 5 took 8.213s | |
training loss: 0.146098 | |
validation loss: 0.086276 | |
validation accuracy: 97.36 %% | |
Dumping to network.params.pkl | |
0: 0.000 [ ] | |
1: 0.000 [ ] | |
2: 0.000 [ ] | |
3: 0.224 [||||||||||| ] | |
4: 0.000 [ ] | |
5: 0.776 [|||||||||||||||||||||||||||||||||||||| ] | |
6: 0.000 [ ] | |
7: 0.000 [ ] | |
8: 0.000 [ ] | |
9: 0.000 [ ] | |
Using gpu device 0: GeForce GT 750M | |
Loading data... | |
Input: [28 28] Output: 10 | |
Building model... | |
0: 0.000 [ ] | |
1: 0.000 [ ] | |
2: 0.003 [ ] | |
3: 0.637 [||||||||||||||||||||||||||||||| ] | |
4: 0.000 [ ] | |
5: 0.359 [||||||||||||||||| ] | |
6: 0.000 [ ] | |
7: 0.000 [ ] | |
8: 0.001 [ ] | |
9: 0.000 [ ] | |
Loading data... | |
Input: [28 28] Output: 10 | |
Building model... | |
0: 0.002 [ ] | |
1: 0.000 [ ] | |
2: 0.002 [ ] | |
3: 0.018 [ ] | |
4: 0.001 [ ] | |
5: 0.944 [||||||||||||||||||||||||||||||||||||||||||||||| ] | |
6: 0.007 [ ] | |
7: 0.003 [ ] | |
8: 0.014 [ ] | |
9: 0.010 [ ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export THEANO_FLAGS=device=gpu | |
python mnist_reload.py --train --run | |
python mnist_reload.py --run | |
export THEANO_FLAGS=device=cpu | |
python mnist_reload.py --run | |
export THEANO_FLAGS=device=gpu | |
python mnist_reload.py --train --run --use_conv | |
python mnist_reload.py --run --use_conv | |
export THEANO_FLAGS=device=cpu | |
python mnist_reload.py --run --use_conv | |
export THEANO_FLAGS=device=gpu | |
python mnist_reload.py --train --run --use_conv --use_dnn | |
python mnist_reload.py --run --use_conv --use_dnn | |
export THEANO_FLAGS=device=cpu | |
python mnist_reload.py --run --use_conv |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment