Skip to content

Instantly share code, notes, and snippets.

Created December 12, 2023 04:45
Show Gist options
  • Save mohitd/609bba8838ff1a473dab74e829d31792 to your computer and use it in GitHub Desktop.
Save mohitd/609bba8838ff1a473dab74e829d31792 to your computer and use it in GitHub Desktop.
Artificial Neural Network Implementation
import numpy as np
from torchvision import datasets
from matplotlib import pyplot as plt
class Sigmoid:
def forward(z):
return 1. / (1. + np.exp(-z))
def backward(z):
return Sigmoid.forward(z) * (1 - Sigmoid.forward(z))
class QuadraticCost:
def forward(a, y):
return 0.5 * np.linalg.norm(a - y) ** 2
def backward(a, y):
return a - y
class ArtificialNeuralNetwork:
def __init__(self, layer_sizes: [int], activation_fn=Sigmoid):
self.layer_sizes = layer_sizes
self.num_layers = len(layer_sizes)
self.activation_fn = activation_fn
# use a unit normal distribution to initialize weights and biases
# performs better in practice than initializing to zeros
# note that weights are j in layer [i] to k in layer [i-1]
self.weights = [np.random.randn(j, k)
for j, k in zip(layer_sizes[1:], layer_sizes[:-1])]
# since the first layer is an input layer, we don't have biases for
self.biases = [np.random.randn(j, 1) for j in layer_sizes[1:]]
def forward(self, a):
self.activations = [a]
self.zs = []
for W, b in zip(self.weights, self.biases):
z =, a) + b
a = self.activation_fn.forward(z)
return a
def _backward(self, y):
nabla_W = [np.zeros(W.shape) for W in self.weights]
nabla_b = [np.zeros(b.shape) for b in self.biases]
z = self.zs[-1]
a_L = self.activations[-1]
delta = self.cost.backward(a_L, y) * self.activation_fn.backward(z)
a = self.activations[-1-1]
nabla_W[-1] =, a.T)
nabla_b[-1] = delta
for l in range(2, self.num_layers):
z = self.zs[-l]
W = self.weights[-l+1]
delta =, delta) * self.activation_fn.backward(z)
a = self.activations[-l-1]
nabla_W[-l] =, a.T)
nabla_b[-l] = delta
return nabla_W, nabla_b
def _zero_grad(self):
self.nabla_W = [np.zeros(W.shape) for W in self.weights]
self.nabla_b = [np.zeros(b.shape) for b in self.biases]
def _accumulate_grad(self, d_nabla_W, d_nabla_b):
self.nabla_W = [nw + dnw for nw, dnw in zip(self.nabla_W, d_nabla_W)]
self.nabla_b = [nb + dnb for nb, dnb in zip(self.nabla_b, d_nabla_b)]
def _step(self):
self.weights = [w - (self.learning_rate / self.minibatch_size) * nw
for w, nw in zip(self.weights, self.nabla_W)]
self.biases = [b - (self.learning_rate / self.minibatch_size) * nb
for b, nb in zip(self.biases, self.nabla_b)]
def train(self, X_train, y_train, X_test, y_test, **kwargs):
num_epochs = kwargs['num_epochs']
self.minibatch_size = kwargs['minibatch_size']
self.cost = kwargs['cost']
self.learning_rate = kwargs['learning_rate']
for epoch in range(num_epochs):
# shuffle data each epoch
permute_idxes = np.random.permutation(X_train.shape[0])
X_train = X_train[permute_idxes]
y_train = y_train[permute_idxes]
epoch_cost = 0
for start in range(0, X_train.shape[0], self.minibatch_size):
minibatch_cost = 0
# partition dataset into minibatches
Xs = X_train[start:start+self.minibatch_size]
ys = y_train[start:start+self.minibatch_size]
for x_i, y_i in zip(Xs, ys):
a = self.forward(x_i)
d_nabla_W, d_nabla_b = self._backward(y_i)
self._accumulate_grad(d_nabla_W, d_nabla_b)
minibatch_cost += self.cost.forward(a, y_i)
minibatch_cost = minibatch_cost / self.minibatch_size
epoch_cost += minibatch_cost
test_set_num_correct = self.num_correct(X_test, y_test)
test_set_accuracy = test_set_num_correct / X_test.shape[0]
print(f"Epoch {epoch+1}: \
\tLoss: {epoch_cost:.2f} \
\ttest set acc: {test_set_accuracy*100:.2f}% \
({test_set_num_correct} / {X_test.shape[0]})")
def num_correct(self, X, Y):
results = [(np.argmax(self.forward(x)), np.argmax(y)) for x, y in zip(X, Y)]
return sum(int(x == y) for (x, y) in results)
# load MNIST dataset
train_dataset = datasets.MNIST('./data', train=True, download=True)
test_dataset = datasets.MNIST('./data', train=False, download=True)
X_train =
X_test =
# normalize training data to [0, 1]
X_train, X_test = X_train / 255., X_test / 255.
# show example img data
fig, axes = plt.subplots(10, 10)
idx = 0
for i in range(10):
for j in range(10):
img = X_train[idx, :]
idx += 1
import sys
# flatten image into 1d array
X_train, X_test = X_train.reshape(X_train.shape[0], -1), X_test.reshape(X_test.shape[0], -1)
# add extra trailing dimension for proper matrix/vector sizes
X_train, X_test = X_train[..., np.newaxis], X_test[..., np.newaxis]
print(f"Training set size: {X_train.shape}")
print(f"Testing set size: {X_test.shape}")
def to_onehot(y):
Convert index to one-hot representation
one_hot = np.zeros((y.shape[0], 10))
one_hot[np.arange(y.shape[0]), y] = 1
return one_hot
y_train, y_test = train_dataset.targets.numpy(), test_dataset.targets.numpy()
y_train, y_test = to_onehot(y_train), to_onehot(y_test)
y_train, y_test = y_train[..., np.newaxis], y_test[..., np.newaxis]
print(f"Training target size: {y_train.shape}")
print(f"Test target size: {y_test.shape}")
ann = ArtificialNeuralNetwork(layer_sizes=[784, 32, 10])
training_params = {
'num_epochs': 30,
'minibatch_size': 16,
'cost': QuadraticCost,
'learning_rate': 3.0,
print(f'Training params: {training_params}')
ann.train(X_train, y_train, X_test, y_test, **training_params)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment