riveSunder/autograd_mlp.py

## autograd_mlp.py
"""
Simple MLP demo using [autograd](https://github.com/HIPS/autograd)
With l1 and l2 regularization.

Depends on autograd and scikit-learn (the latter for the mini digits dataset)

pip install autograd scikit-learn
"""

from autograd import numpy as np
from autograd import grad
from autograd import elementwise_grad as egrad
import sklearn.datasets as datasets
import time

elu = lambda x: x * (x >= 0.) + (np.exp(x) - 1) * (x < 0.)
grad_elu = grad(elu)
softmax = lambda x: np.exp(x-np.max(x)) / np.sum(np.exp(x-np.max(x)), axis=1)[:,np.newaxis]
get_label = lambda x: 1.*np.argmax(x, axis=1)
accuracy = lambda tgt, pred: np.sum(get_label(pred) == get_label(tgt)) / tgt.shape[0]

normalize = lambda x: (x-np.mean(x)) / np.std(x)

def labels_to_one_hot(tgts):
    number_classes = np.max(tgts)+1
    number_samples = tgts.shape[0]

    one_hot = np.zeros((number_samples, number_classes))

    for ii in range(number_samples):
        one_hot[ii, tgts[ii]] = 1

    return one_hot

def mlp_forward(x, weights, activations):

    for ii in range(len(weights)):
        x = np.matmul(x, weights[ii])
        x = activations[ii](x)

    return x

ce_loss = lambda y_tgts, y_pred: - np.sum(y_tgts * np.log(y_pred))

def get_loss(weights, activations, batch):

    y_pred = mlp_forward(batch[0], weights, activations)

    my_loss = ce_loss(batch[1], y_pred)
    my_loss += 1e-1 * np.sum([np.sum(np.abs(layer**2)) for layer in weights])
    my_loss += 1e-2 * np.sum([np.sum(np.abs(layer)) for layer in weights])

    return my_loss

if __name__ == "__main__":

    print("loading digits datasets")
    [xx, tgts] = datasets.load_digits(return_X_y=True)

    xx = normalize(xx)

    print("convert labels to one-hot encoding")
    one_hot = labels_to_one_hot(tgts)

    # split into training, test, and validation

    num_val = int(0.1 * xx.shape[0])

    np.random.seed(1337)
    np.random.shuffle(xx)

    np.random.seed(1337)
    np.random.shuffle(one_hot)

    x_val = xx[:num_val,...]
    x_test = xx[num_val:num_val*2,...]
    x_train = xx[2*num_val:,...]

    y_val = one_hot[:num_val,...]
    y_test = one_hot[num_val:num_val*2,...]
    y_train = one_hot[2*num_val:,...]

    # some parameters
    init_scale = 1e-2
    lr = 1e-3
    max_epochs = 300
    disp_every = 10
    batch_size = 128

    print("initializing mlp weights")
    dim_x, dim_y, dim_h = x_train.shape[1], y_train.shape[1], 128

    wx2h = init_scale * np.random.randn(dim_x, dim_h)
    wh2h = init_scale * np.random.randn(dim_h, dim_h)
    wh2y = init_scale * np.random.randn(dim_h, dim_y)

    weights = [wx2h, wh2h, wh2y]
    activations = [elu, elu, softmax]

    grad_loss = egrad(get_loss)
    smooth_loss = 300.
    smooth_acc = 0.0
    loss_decay = 0.1
    t0 = time.time()
    for epoch in range(max_epochs):
        t1 = time.time()
        for batch_start in range(0, x_train.shape[0]-batch_size,batch_size):

            my_batch = [x_train[batch_start:batch_start+batch_size],\
                    y_train[batch_start:batch_start+batch_size]]

            smooth_loss = (1-loss_decay) * smooth_loss \
                    + loss_decay * get_loss(weights, activations, my_batch)

            y_pred = mlp_forward(my_batch[0], weights, activations)

            smooth_acc = (1-loss_decay) * smooth_acc \
                    + loss_decay * accuracy(my_batch[1], y_pred)

            my_grad = grad_loss(weights, activations, my_batch)

            for params, grads in zip(weights, my_grad):
                params -= lr * grads

        if epoch % disp_every == 0:

            my_batch = [x_val, y_val]

            y_pred = mlp_forward(x_val, weights, activations)
            val_loss = ce_loss(y_val, y_pred)
            val_acc = accuracy(y_val, y_pred)

            t2 = time.time()
            print("epoch {}, training loss {:.2e}, train acc: {:.2e}, val loss {:.2e}, val accuracy {:.2e}"\
                    .format(epoch, smooth_loss, smooth_acc, val_loss, val_acc))
            print("total time: {:.2f}, epoch time {:.2f}".format(t2-t0, t2-t1))
	"""
	Simple MLP demo using [autograd](https://github.com/HIPS/autograd)
	With l1 and l2 regularization.

	Depends on autograd and scikit-learn (the latter for the mini digits dataset)

	pip install autograd scikit-learn
	"""

	from autograd import numpy as np
	from autograd import grad
	from autograd import elementwise_grad as egrad
	import sklearn.datasets as datasets
	import time

	elu = lambda x: x * (x >= 0.) + (np.exp(x) - 1) * (x < 0.)
	grad_elu = grad(elu)
	softmax = lambda x: np.exp(x-np.max(x)) / np.sum(np.exp(x-np.max(x)), axis=1)[:,np.newaxis]
	get_label = lambda x: 1.*np.argmax(x, axis=1)
	accuracy = lambda tgt, pred: np.sum(get_label(pred) == get_label(tgt)) / tgt.shape[0]

	normalize = lambda x: (x-np.mean(x)) / np.std(x)

	def labels_to_one_hot(tgts):
	number_classes = np.max(tgts)+1
	number_samples = tgts.shape[0]

	one_hot = np.zeros((number_samples, number_classes))

	for ii in range(number_samples):
	one_hot[ii, tgts[ii]] = 1

	return one_hot

	def mlp_forward(x, weights, activations):

	for ii in range(len(weights)):
	x = np.matmul(x, weights[ii])
	x = activations[ii](x)

	return x

	ce_loss = lambda y_tgts, y_pred: - np.sum(y_tgts * np.log(y_pred))

	def get_loss(weights, activations, batch):

	y_pred = mlp_forward(batch[0], weights, activations)

	my_loss = ce_loss(batch[1], y_pred)
	my_loss += 1e-1 * np.sum([np.sum(np.abs(layer**2)) for layer in weights])
	my_loss += 1e-2 * np.sum([np.sum(np.abs(layer)) for layer in weights])

	return my_loss

	if __name__ == "__main__":

	print("loading digits datasets")
	[xx, tgts] = datasets.load_digits(return_X_y=True)

	xx = normalize(xx)

	print("convert labels to one-hot encoding")
	one_hot = labels_to_one_hot(tgts)

	# split into training, test, and validation

	num_val = int(0.1 * xx.shape[0])

	np.random.seed(1337)
	np.random.shuffle(xx)

	np.random.seed(1337)
	np.random.shuffle(one_hot)

	x_val = xx[:num_val,...]
	x_test = xx[num_val:num_val*2,...]
	x_train = xx[2*num_val:,...]

	y_val = one_hot[:num_val,...]
	y_test = one_hot[num_val:num_val*2,...]
	y_train = one_hot[2*num_val:,...]

	# some parameters
	init_scale = 1e-2
	lr = 1e-3
	max_epochs = 300
	disp_every = 10
	batch_size = 128

	print("initializing mlp weights")
	dim_x, dim_y, dim_h = x_train.shape[1], y_train.shape[1], 128

	wx2h = init_scale * np.random.randn(dim_x, dim_h)
	wh2h = init_scale * np.random.randn(dim_h, dim_h)
	wh2y = init_scale * np.random.randn(dim_h, dim_y)

	weights = [wx2h, wh2h, wh2y]
	activations = [elu, elu, softmax]

	grad_loss = egrad(get_loss)
	smooth_loss = 300.
	smooth_acc = 0.0
	loss_decay = 0.1
	t0 = time.time()
	for epoch in range(max_epochs):
	t1 = time.time()
	for batch_start in range(0, x_train.shape[0]-batch_size,batch_size):

	my_batch = [x_train[batch_start:batch_start+batch_size],\
	y_train[batch_start:batch_start+batch_size]]

	smooth_loss = (1-loss_decay) * smooth_loss \
	+ loss_decay * get_loss(weights, activations, my_batch)

	y_pred = mlp_forward(my_batch[0], weights, activations)

	smooth_acc = (1-loss_decay) * smooth_acc \
	+ loss_decay * accuracy(my_batch[1], y_pred)

	my_grad = grad_loss(weights, activations, my_batch)

	for params, grads in zip(weights, my_grad):
	params -= lr * grads

	if epoch % disp_every == 0:

	my_batch = [x_val, y_val]

	y_pred = mlp_forward(x_val, weights, activations)
	val_loss = ce_loss(y_val, y_pred)
	val_acc = accuracy(y_val, y_pred)

	t2 = time.time()
	print("epoch {}, training loss {:.2e}, train acc: {:.2e}, val loss {:.2e}, val accuracy {:.2e}"\
	.format(epoch, smooth_loss, smooth_acc, val_loss, val_acc))
	print("total time: {:.2f}, epoch time {:.2f}".format(t2-t0, t2-t1))