Simple MLP demo using [autograd](https://github.com/HIPS/autograd)
""" | |
Simple MLP demo using [autograd](https://github.com/HIPS/autograd) | |
With l1 and l2 regularization. | |
Depends on autograd and scikit-learn (the latter for the mini digits dataset) | |
pip install autograd scikit-learn | |
""" | |
from autograd import numpy as np | |
from autograd import grad | |
from autograd import elementwise_grad as egrad | |
import sklearn.datasets as datasets | |
import time | |
elu = lambda x: x * (x >= 0.) + (np.exp(x) - 1) * (x < 0.) | |
grad_elu = grad(elu) | |
softmax = lambda x: np.exp(x-np.max(x)) / np.sum(np.exp(x-np.max(x)), axis=1)[:,np.newaxis] | |
get_label = lambda x: 1.*np.argmax(x, axis=1) | |
accuracy = lambda tgt, pred: np.sum(get_label(pred) == get_label(tgt)) / tgt.shape[0] | |
normalize = lambda x: (x-np.mean(x)) / np.std(x) | |
def labels_to_one_hot(tgts): | |
number_classes = np.max(tgts)+1 | |
number_samples = tgts.shape[0] | |
one_hot = np.zeros((number_samples, number_classes)) | |
for ii in range(number_samples): | |
one_hot[ii, tgts[ii]] = 1 | |
return one_hot | |
def mlp_forward(x, weights, activations): | |
for ii in range(len(weights)): | |
x = np.matmul(x, weights[ii]) | |
x = activations[ii](x) | |
return x | |
ce_loss = lambda y_tgts, y_pred: - np.sum(y_tgts * np.log(y_pred)) | |
def get_loss(weights, activations, batch): | |
y_pred = mlp_forward(batch[0], weights, activations) | |
my_loss = ce_loss(batch[1], y_pred) | |
my_loss += 1e-1 * np.sum([np.sum(np.abs(layer**2)) for layer in weights]) | |
my_loss += 1e-2 * np.sum([np.sum(np.abs(layer)) for layer in weights]) | |
return my_loss | |
if __name__ == "__main__": | |
print("loading digits datasets") | |
[xx, tgts] = datasets.load_digits(return_X_y=True) | |
xx = normalize(xx) | |
print("convert labels to one-hot encoding") | |
one_hot = labels_to_one_hot(tgts) | |
# split into training, test, and validation | |
num_val = int(0.1 * xx.shape[0]) | |
np.random.seed(1337) | |
np.random.shuffle(xx) | |
np.random.seed(1337) | |
np.random.shuffle(one_hot) | |
x_val = xx[:num_val,...] | |
x_test = xx[num_val:num_val*2,...] | |
x_train = xx[2*num_val:,...] | |
y_val = one_hot[:num_val,...] | |
y_test = one_hot[num_val:num_val*2,...] | |
y_train = one_hot[2*num_val:,...] | |
# some parameters | |
init_scale = 1e-2 | |
lr = 1e-3 | |
max_epochs = 300 | |
disp_every = 10 | |
batch_size = 128 | |
print("initializing mlp weights") | |
dim_x, dim_y, dim_h = x_train.shape[1], y_train.shape[1], 128 | |
wx2h = init_scale * np.random.randn(dim_x, dim_h) | |
wh2h = init_scale * np.random.randn(dim_h, dim_h) | |
wh2y = init_scale * np.random.randn(dim_h, dim_y) | |
weights = [wx2h, wh2h, wh2y] | |
activations = [elu, elu, softmax] | |
grad_loss = egrad(get_loss) | |
smooth_loss = 300. | |
smooth_acc = 0.0 | |
loss_decay = 0.1 | |
t0 = time.time() | |
for epoch in range(max_epochs): | |
t1 = time.time() | |
for batch_start in range(0, x_train.shape[0]-batch_size,batch_size): | |
my_batch = [x_train[batch_start:batch_start+batch_size],\ | |
y_train[batch_start:batch_start+batch_size]] | |
smooth_loss = (1-loss_decay) * smooth_loss \ | |
+ loss_decay * get_loss(weights, activations, my_batch) | |
y_pred = mlp_forward(my_batch[0], weights, activations) | |
smooth_acc = (1-loss_decay) * smooth_acc \ | |
+ loss_decay * accuracy(my_batch[1], y_pred) | |
my_grad = grad_loss(weights, activations, my_batch) | |
for params, grads in zip(weights, my_grad): | |
params -= lr * grads | |
if epoch % disp_every == 0: | |
my_batch = [x_val, y_val] | |
y_pred = mlp_forward(x_val, weights, activations) | |
val_loss = ce_loss(y_val, y_pred) | |
val_acc = accuracy(y_val, y_pred) | |
t2 = time.time() | |
print("epoch {}, training loss {:.2e}, train acc: {:.2e}, val loss {:.2e}, val accuracy {:.2e}"\ | |
.format(epoch, smooth_loss, smooth_acc, val_loss, val_acc)) | |
print("total time: {:.2f}, epoch time {:.2f}".format(t2-t0, t2-t1)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment