CIFAR-10 on Lasagne
import matplotlib | |
import numpy as np | |
np.random.seed(123) | |
import matplotlib.pyplot as plt | |
import theano | |
from theano.sandbox.cuda import dnn | |
theano.config.device.startswith("gpu") | |
theano.config.floatX='float32' | |
from theano.sandbox.cuda.dnn import dnn_available | |
print dnn_available() | |
import theano.tensor as T | |
import lasagne | |
NUM_EPOCHS = 500 | |
BATCH_SIZE = 256 | |
LEARNING_RATE = 0.001 | |
DIM = 32 | |
NUM_CLASSES = 10 | |
cifar_npz = "cifar10.npz" | |
#Retrieve CIFAR 10 | |
get_ipython().system(u'wget -N https://s3.amazonaws.com/lasagne/recipes/pretrained/cifar10/cifar10.npz') | |
def unpickle(file): | |
import cPickle | |
fo = open(file, 'rb') | |
dict = cPickle.load(fo) | |
fo.close() | |
return dict | |
def load_data(): | |
xs = [] | |
ys = [] | |
for j in range(5): | |
d = unpickle('cifar-10-batches-py/data_batch_'+`j+1`) | |
x = d['data'] | |
y = d['labels'] | |
xs.append(x) | |
ys.append(y) | |
d = unpickle('cifar-10-batches-py/test_batch') | |
xs.append(d['data']) | |
ys.append(d['labels']) | |
x = np.concatenate(xs)/np.float32(255) | |
y = np.concatenate(ys) | |
x = np.dstack((x[:, :1024], x[:, 1024:2048], x[:, 2048:])) | |
x = x.reshape((x.shape[0], 3, 32, 32)) | |
return dict( | |
X_train=lasagne.utils.floatX(x[0:40000,:]), | |
Y_train=y[0:40000].astype('int32'), | |
X_validation = lasagne.utils.floatX(x[40001:50000,:]), | |
Y_validation = y[40001:50000].astype('int32'), | |
X_test = lasagne.utils.floatX(x[50001:60000,:]), | |
Y_test = y[50001:60000].astype('int32'),) | |
data = load_data() | |
print data['X_test'].shape | |
plt.figure(figsize=(8,8)) | |
plt.imshow(data['X_test'][9998].reshape(DIM, DIM,3), interpolation='none') | |
plt.title('Label '+str(data['Y_test'][9998])) | |
plt.axis('off') | |
plt.show() | |
conv = lasagne.layers.Conv2DLayer | |
pool = lasagne.layers.Pool2DLayer | |
from lasagne.layers import InputLayer, DropoutLayer, FlattenLayer | |
def build_model(input_width, input_height, output_dim, | |
batch_size=BATCH_SIZE): | |
ini = lasagne.init.HeUniform() | |
l_in = lasagne.layers.InputLayer(shape=(None, 3, input_width, input_height),input_var=input_var ) | |
conv1 = conv( | |
l_in, num_filters=192, filter_size=(5, 5), W=ini, pad=2) | |
cccp1 = conv( | |
conv1, num_filters=160, filter_size=(1, 1), W=ini) | |
cccp2 = conv( | |
cccp1, num_filters=96, filter_size=(1, 1), W=ini) | |
pool1 = pool(cccp2,pool_size=3, stride=2, mode='max', ignore_border=False) | |
drop3 = DropoutLayer(pool1, p=0.5) | |
conv2 = conv( | |
drop3, num_filters=192, filter_size=(5, 5), W=ini, pad=2) | |
cccp3 = conv( | |
conv2, num_filters=192, filter_size=(1, 1), W=ini) | |
cccp4 = conv( | |
cccp3, num_filters=96, filter_size=(1, 1), W=ini) | |
pool2 = pool(cccp4,pool_size=3, stride=2, mode='average_exc_pad', ignore_border=False) | |
drop6 = DropoutLayer(pool2, p=0.5) | |
conv3 = conv( | |
drop6, num_filters=192, filter_size=(5, 5), W=ini, pad=2) | |
cccp5 = conv( | |
conv3, num_filters=192, filter_size=(1, 1), W=ini) | |
cccp6 = conv( | |
cccp5, num_filters=NUM_CLASSES, filter_size=(1, 1), W=ini) | |
pool3 = pool(cccp6,pool_size=8, stride=2, mode='average_exc_pad', ignore_border=False) | |
l_out = lasagne.layers.FlattenLayer(pool3) | |
return l_out | |
model = build_model(DIM, DIM, NUM_CLASSES) | |
model_params = lasagne.layers.get_all_params(model, trainable=True) | |
input_var = T.tensor4('inputs') | |
target_var = T.ivector('targets') | |
# Create a loss expression for training, i.e., a scalar objective we want | |
# to minimize (for our multi-class problem, it is the cross-entropy loss): | |
prediction = lasagne.layers.get_output(model, input_var , deterministic=False) | |
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) | |
loss = loss.mean() | |
# We could add some weight decay as well here, see lasagne.regularization. | |
# Create update expressions for training, i.e., how to modify the | |
# parameters at each training step. Here, we'll use Stochastic Gradient | |
# Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more. | |
params = lasagne.layers.get_all_params(model, trainable=True) | |
updates = lasagne.updates.nesterov_momentum( | |
loss, params, learning_rate=0.01, momentum=0.9) | |
# Create a loss expression for validation/testing. The crucial difference | |
# here is that we do a deterministic forward pass through the network, | |
# disabling dropout layers. | |
test_prediction = lasagne.layers.get_output(model, input_var, deterministic=True) | |
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, | |
target_var) | |
test_loss = test_loss.mean() | |
# As a bonus, also create an expression for the classification accuracy: | |
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), | |
dtype=theano.config.floatX) | |
# Compile a function performing a training step on a mini-batch (by giving | |
# the updates dictionary) and returning the corresponding training loss: | |
train_fn = theano.function([input_var, target_var], [loss, prediction], updates=updates) | |
# Compile a second function computing the validation loss and accuracy: | |
val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) | |
# In[93]: | |
def train_epoch(X, y): | |
num_samples = X.shape[0] | |
num_batches = int(np.ceil(num_samples / float(BATCH_SIZE))) | |
costs = [] | |
correct = 0 | |
for i in range(num_batches): | |
idx = range(i*BATCH_SIZE, np.minimum((i+1)*BATCH_SIZE, num_samples)) | |
X_batch = X[idx] | |
y_batch = y[idx] | |
print X_batch.shape, y_batch.shape | |
cost_batch = train_fn(X_batch, y_batch) | |
costs += [cost_batch] | |
preds = np.argmax(output_train, axis=-1) | |
correct += np.sum(y_batch == preds) | |
return np.mean(costs), correct / float(num_samples) | |
valid_accs, train_accs, test_accs = [], [], [] | |
try: | |
for n in range(NUM_EPOCHS): | |
print "Epoch " + str(n) | |
train_cost, train_acc = train_epoch(data['X_train'], data['Y_train']) | |
valid_acc, valid_trainsform = val_fn(data['X_validation'], data['Y_validation']) | |
test_acc, test_transform = val_fn(data['X_test'], data['Y_test']) | |
valid_accs += [valid_acc] | |
test_accs += [test_acc] | |
train_accs += [train_acc] | |
if (n+1) % 20 == 0: | |
new_lr = sh_lr.get_value() * 0.7 | |
print "New LR:", new_lr | |
sh_lr.set_value(lasagne.utils.floatX(new_lr)) | |
print "Epoch {0}: Train cost {1}, Train acc {2}, val acc {3}, test acc {4}".format( | |
n, train_cost, train_acc, valid_acc, test_acc) | |
except KeyboardInterrupt: | |
pass | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment