Instantly share code, notes, and snippets.

Embed
What would you like to do?
CIFAR-10 on Lasagne
import matplotlib
import numpy as np
np.random.seed(123)
import matplotlib.pyplot as plt
import theano
from theano.sandbox.cuda import dnn
theano.config.device.startswith("gpu")
theano.config.floatX='float32'
from theano.sandbox.cuda.dnn import dnn_available
print dnn_available()
import theano.tensor as T
import lasagne
NUM_EPOCHS = 500
BATCH_SIZE = 256
LEARNING_RATE = 0.001
DIM = 32
NUM_CLASSES = 10
cifar_npz = "cifar10.npz"
#Retrieve CIFAR 10
get_ipython().system(u'wget -N https://s3.amazonaws.com/lasagne/recipes/pretrained/cifar10/cifar10.npz')
def unpickle(file):
import cPickle
fo = open(file, 'rb')
dict = cPickle.load(fo)
fo.close()
return dict
def load_data():
xs = []
ys = []
for j in range(5):
d = unpickle('cifar-10-batches-py/data_batch_'+`j+1`)
x = d['data']
y = d['labels']
xs.append(x)
ys.append(y)
d = unpickle('cifar-10-batches-py/test_batch')
xs.append(d['data'])
ys.append(d['labels'])
x = np.concatenate(xs)/np.float32(255)
y = np.concatenate(ys)
x = np.dstack((x[:, :1024], x[:, 1024:2048], x[:, 2048:]))
x = x.reshape((x.shape[0], 3, 32, 32))
return dict(
X_train=lasagne.utils.floatX(x[0:40000,:]),
Y_train=y[0:40000].astype('int32'),
X_validation = lasagne.utils.floatX(x[40001:50000,:]),
Y_validation = y[40001:50000].astype('int32'),
X_test = lasagne.utils.floatX(x[50001:60000,:]),
Y_test = y[50001:60000].astype('int32'),)
data = load_data()
print data['X_test'].shape
plt.figure(figsize=(8,8))
plt.imshow(data['X_test'][9998].reshape(DIM, DIM,3), interpolation='none')
plt.title('Label '+str(data['Y_test'][9998]))
plt.axis('off')
plt.show()
conv = lasagne.layers.Conv2DLayer
pool = lasagne.layers.Pool2DLayer
from lasagne.layers import InputLayer, DropoutLayer, FlattenLayer
def build_model(input_width, input_height, output_dim,
batch_size=BATCH_SIZE):
ini = lasagne.init.HeUniform()
l_in = lasagne.layers.InputLayer(shape=(None, 3, input_width, input_height),input_var=input_var )
conv1 = conv(
l_in, num_filters=192, filter_size=(5, 5), W=ini, pad=2)
cccp1 = conv(
conv1, num_filters=160, filter_size=(1, 1), W=ini)
cccp2 = conv(
cccp1, num_filters=96, filter_size=(1, 1), W=ini)
pool1 = pool(cccp2,pool_size=3, stride=2, mode='max', ignore_border=False)
drop3 = DropoutLayer(pool1, p=0.5)
conv2 = conv(
drop3, num_filters=192, filter_size=(5, 5), W=ini, pad=2)
cccp3 = conv(
conv2, num_filters=192, filter_size=(1, 1), W=ini)
cccp4 = conv(
cccp3, num_filters=96, filter_size=(1, 1), W=ini)
pool2 = pool(cccp4,pool_size=3, stride=2, mode='average_exc_pad', ignore_border=False)
drop6 = DropoutLayer(pool2, p=0.5)
conv3 = conv(
drop6, num_filters=192, filter_size=(5, 5), W=ini, pad=2)
cccp5 = conv(
conv3, num_filters=192, filter_size=(1, 1), W=ini)
cccp6 = conv(
cccp5, num_filters=NUM_CLASSES, filter_size=(1, 1), W=ini)
pool3 = pool(cccp6,pool_size=8, stride=2, mode='average_exc_pad', ignore_border=False)
l_out = lasagne.layers.FlattenLayer(pool3)
return l_out
model = build_model(DIM, DIM, NUM_CLASSES)
model_params = lasagne.layers.get_all_params(model, trainable=True)
input_var = T.tensor4('inputs')
target_var = T.ivector('targets')
# Create a loss expression for training, i.e., a scalar objective we want
# to minimize (for our multi-class problem, it is the cross-entropy loss):
prediction = lasagne.layers.get_output(model, input_var , deterministic=False)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
# We could add some weight decay as well here, see lasagne.regularization.
# Create update expressions for training, i.e., how to modify the
# parameters at each training step. Here, we'll use Stochastic Gradient
# Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
params = lasagne.layers.get_all_params(model, trainable=True)
updates = lasagne.updates.nesterov_momentum(
loss, params, learning_rate=0.01, momentum=0.9)
# Create a loss expression for validation/testing. The crucial difference
# here is that we do a deterministic forward pass through the network,
# disabling dropout layers.
test_prediction = lasagne.layers.get_output(model, input_var, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
target_var)
test_loss = test_loss.mean()
# As a bonus, also create an expression for the classification accuracy:
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
dtype=theano.config.floatX)
# Compile a function performing a training step on a mini-batch (by giving
# the updates dictionary) and returning the corresponding training loss:
train_fn = theano.function([input_var, target_var], [loss, prediction], updates=updates)
# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
# In[93]:
def train_epoch(X, y):
num_samples = X.shape[0]
num_batches = int(np.ceil(num_samples / float(BATCH_SIZE)))
costs = []
correct = 0
for i in range(num_batches):
idx = range(i*BATCH_SIZE, np.minimum((i+1)*BATCH_SIZE, num_samples))
X_batch = X[idx]
y_batch = y[idx]
print X_batch.shape, y_batch.shape
cost_batch = train_fn(X_batch, y_batch)
costs += [cost_batch]
preds = np.argmax(output_train, axis=-1)
correct += np.sum(y_batch == preds)
return np.mean(costs), correct / float(num_samples)
valid_accs, train_accs, test_accs = [], [], []
try:
for n in range(NUM_EPOCHS):
print "Epoch " + str(n)
train_cost, train_acc = train_epoch(data['X_train'], data['Y_train'])
valid_acc, valid_trainsform = val_fn(data['X_validation'], data['Y_validation'])
test_acc, test_transform = val_fn(data['X_test'], data['Y_test'])
valid_accs += [valid_acc]
test_accs += [test_acc]
train_accs += [train_acc]
if (n+1) % 20 == 0:
new_lr = sh_lr.get_value() * 0.7
print "New LR:", new_lr
sh_lr.set_value(lasagne.utils.floatX(new_lr))
print "Epoch {0}: Train cost {1}, Train acc {2}, val acc {3}, test acc {4}".format(
n, train_cost, train_acc, valid_acc, test_acc)
except KeyboardInterrupt:
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment