Skip to content

Instantly share code, notes, and snippets.

@jessicabuzzelli
Last active December 8, 2024 03:19
Show Gist options
  • Save jessicabuzzelli/8317c2350ce63b899e0682ce256c44cf to your computer and use it in GitHub Desktop.
Save jessicabuzzelli/8317c2350ce63b899e0682ce256c44cf to your computer and use it in GitHub Desktop.
Back-propagating neural network with ReLU activation using only NumPy. Example uses MNIST dataset.
import numpy as np
import gzip
import pickle
from tqdm import tqdm_notebook as tqdm
from abc import ABC, abstractmethod
# load example dataset from http://deeplearning.net/data/mnist/mnist.pkl.gz #
with gzip.open('mnist.pkl.gz', 'rb') as f:
train_set, valid_set, test_set = pickle.load(f, encoding='bytes')
# establish training data #
train_imgs, train_labels = train_set
# outline layers #
class Layer(ABC):
def __init__(self):
pass
@abstractmethod
def forward(self, h_in):
raise NotImplementedError
@abstractmethod
def backward_input(self, h_in, h_out, d_hout):
raise NotImplementedError
class ParamLayer(Layer):
def __init__(self):
self.params = dict()
@abstractmethod
def backward_param(self, h_in, h_out, d_hout):
raise NotImplementedError
# initialize weights (glorot_uniform) #
def weight_init(out_size, in_size):
limit = np.sqrt(6.0 / (in_size + out_size))
return np.random.uniform(-limit, limit, size=[out_size, in_size]).astype(np.float32)
# helper fx to set classification = 1, 0 otherwise #
def get_onehot(labels):
one_hot = np.zeros((len(labels), 10), dtype=np.float32)
one_hot[range(len(labels)), labels] = 1.0
return one_hot
# Mean Square Error layer #
class MeanSquareError(Layer):
def forward(self, labels, predict):
return 2 * ((.5 * (predict - get_onehot(labels)) ** 2).sum(axis=1)).mean(axis=0)
def backward_input(self, labels, predict):
return predict - get_onehot(labels)
# Fully Connected Layer #
class FCLayer(ParamLayer):
def __init__(self, out_size, in_size):
super(FCLayer, self).__init__()
self.out_size = out_size
self.in_size = in_size
self.params["weight"] = weight_init(out_size, in_size)
self.params["bias"] = np.zeros(out_size, dtype=np.float32) # 1 x out_size
def forward(self, x):
return np.dot(x, self.params["weight"].T) + self.params["bias"]
def backward_input(self, x, u, de_du):
return np.dot(de_du, self.params["weight"])
def backward_param(self, x, u, de_du):
return {"weight": np.einsum('ij,ik->jk', de_du, x), "bias": np.sum(de_du, axis=0)}
# Sigmoid activation #
class SigmoidLayer(Layer):
def forward(self, u):
return 1 / (1 + np.exp(-u))
def backward_input(self, u, O, de_dO):
return np.dot(de_dO, (np.exp(-u) / ((1 + np.exp(-u)) ** 2)))
# ReLU activation #
class ReLULayer(Layer):
def forward(self, h_in):
return np.maximum(0, h_in)
def backward_input(self, h_in, h_out, d_hout):
d_hout[h_in <= 0] = 0
return d_hout
# to boost NN accuracy, use ReLU activation function; can substitute with SigmoidLayer() here#
ACT_FUNC = ReLULayer
# Multi-layer perception #
class MLP(object):
def __init__(self, in_dim, hidden_dims, out_dim):
self.act_layer = ACT_FUNC()
dims = [in_dim] + hidden_dims + [out_dim]
self.fc_layers = []
for i in range(len(dims) - 1):
fc_layer = FCLayer(out_size=dims[i + 1], in_size=dims[i])
self.fc_layers.append(fc_layer)
self.loss_func = MeanSquareError()
def forward(self, img_input, img_label):
x = img_input
self.hiddens = [x]
for i in range(len(self.fc_layers)):
x = self.fc_layers[i].forward(x)
self.hiddens.append(x)
if i + 1 < len(self.fc_layers):
x = self.act_layer.forward(x)
self.hiddens.append(x)
logits = x
loss = self.loss_func.forward(img_label, logits)
predict = np.argmax(logits, axis=1)
accuracy = np.mean(predict == img_label)
return loss, accuracy
def backward(self, img_label):
grad = self.loss_func.backward_input(img_label, self.hiddens[-1])
idx = len(self.hiddens) - 1
self.layer_grads = [None] * len(self.fc_layers)
for i in range(len(self.fc_layers) - 1, -1, -1):
assert idx >= 1
g_param = self.fc_layers[i].backward_param(self.hiddens[idx - 1], self.hiddens[idx], grad)
self.layer_grads[i] = g_param
grad = self.fc_layers[i].backward_input(self.hiddens[idx - 1], self.hiddens[idx], grad)
idx -= 1
if i > 0:
grad = self.act_layer.backward_input(self.hiddens[idx - 1], self.hiddens[idx], grad)
idx -= 1
assert idx == 0
def update(self, learning_rate):
for i in range(len(self.fc_layers)):
grad_params = self.layer_grads[i]
params = self.fc_layers[i].params
params['weight'] -= learning_rate * grad_params['weight']
params['bias'] -= learning_rate * grad_params['bias']
# initialize MLP #
net = MLP(784, [1024, 1024], 10)
def loop_over_dataset(net, imgs, labels, is_training, batch_size=100):
loss_list = []
acc_list = []
pbar = range(0, imgs.shape[0], batch_size)
if is_training:
pbar = tqdm(pbar)
for i in pbar:
x = imgs[i: i + batch_size, :]
y = labels[i: i + batch_size]
loss, acc = net.forward(x, y)
if is_training:
net.backward(y)
net.update(5e-5) # learning rate
loss_list.append(loss)
acc_list.append(acc)
if is_training:
pbar.set_description('loss: %.4f, acc: %.4f' % (loss, acc))
if not is_training:
print('average loss:', np.mean(loss_list))
print('average accuracy:', np.mean(acc_list))
# train N epochs #
num_epochs = 10
for e in range(num_epochs):
print('training epoch', e + 1)
loop_over_dataset(net, train_set[0], train_set[1], is_training=True)
print('validation')
loop_over_dataset(net, valid_set[0], valid_set[1], is_training=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment