Skip to content

Instantly share code, notes, and snippets.

@mohitd
Created December 12, 2023 04:45
Show Gist options
  • Save mohitd/609bba8838ff1a473dab74e829d31792 to your computer and use it in GitHub Desktop.
Save mohitd/609bba8838ff1a473dab74e829d31792 to your computer and use it in GitHub Desktop.
Artificial Neural Network Implementation
import numpy as np
from torchvision import datasets
from matplotlib import pyplot as plt
np.random.seed(42)
class Sigmoid:
@staticmethod
def forward(z):
return 1. / (1. + np.exp(-z))
@staticmethod
def backward(z):
return Sigmoid.forward(z) * (1 - Sigmoid.forward(z))
class QuadraticCost:
@staticmethod
def forward(a, y):
return 0.5 * np.linalg.norm(a - y) ** 2
@staticmethod
def backward(a, y):
return a - y
class ArtificialNeuralNetwork:
def __init__(self, layer_sizes: [int], activation_fn=Sigmoid):
self.layer_sizes = layer_sizes
self.num_layers = len(layer_sizes)
self.activation_fn = activation_fn
# use a unit normal distribution to initialize weights and biases
# performs better in practice than initializing to zeros
# note that weights are j in layer [i] to k in layer [i-1]
self.weights = [np.random.randn(j, k)
for j, k in zip(layer_sizes[1:], layer_sizes[:-1])]
# since the first layer is an input layer, we don't have biases for
self.biases = [np.random.randn(j, 1) for j in layer_sizes[1:]]
def forward(self, a):
self.activations = [a]
self.zs = []
for W, b in zip(self.weights, self.biases):
z = np.dot(W, a) + b
self.zs.append(z)
a = self.activation_fn.forward(z)
self.activations.append(a)
return a
def _backward(self, y):
nabla_W = [np.zeros(W.shape) for W in self.weights]
nabla_b = [np.zeros(b.shape) for b in self.biases]
z = self.zs[-1]
a_L = self.activations[-1]
delta = self.cost.backward(a_L, y) * self.activation_fn.backward(z)
a = self.activations[-1-1]
nabla_W[-1] = np.dot(delta, a.T)
nabla_b[-1] = delta
for l in range(2, self.num_layers):
z = self.zs[-l]
W = self.weights[-l+1]
delta = np.dot(W.T, delta) * self.activation_fn.backward(z)
a = self.activations[-l-1]
nabla_W[-l] = np.dot(delta, a.T)
nabla_b[-l] = delta
return nabla_W, nabla_b
def _zero_grad(self):
self.nabla_W = [np.zeros(W.shape) for W in self.weights]
self.nabla_b = [np.zeros(b.shape) for b in self.biases]
def _accumulate_grad(self, d_nabla_W, d_nabla_b):
self.nabla_W = [nw + dnw for nw, dnw in zip(self.nabla_W, d_nabla_W)]
self.nabla_b = [nb + dnb for nb, dnb in zip(self.nabla_b, d_nabla_b)]
def _step(self):
self.weights = [w - (self.learning_rate / self.minibatch_size) * nw
for w, nw in zip(self.weights, self.nabla_W)]
self.biases = [b - (self.learning_rate / self.minibatch_size) * nb
for b, nb in zip(self.biases, self.nabla_b)]
def train(self, X_train, y_train, X_test, y_test, **kwargs):
num_epochs = kwargs['num_epochs']
self.minibatch_size = kwargs['minibatch_size']
self.cost = kwargs['cost']
self.learning_rate = kwargs['learning_rate']
for epoch in range(num_epochs):
# shuffle data each epoch
permute_idxes = np.random.permutation(X_train.shape[0])
X_train = X_train[permute_idxes]
y_train = y_train[permute_idxes]
epoch_cost = 0
for start in range(0, X_train.shape[0], self.minibatch_size):
minibatch_cost = 0
# partition dataset into minibatches
Xs = X_train[start:start+self.minibatch_size]
ys = y_train[start:start+self.minibatch_size]
self._zero_grad()
for x_i, y_i in zip(Xs, ys):
a = self.forward(x_i)
d_nabla_W, d_nabla_b = self._backward(y_i)
self._accumulate_grad(d_nabla_W, d_nabla_b)
minibatch_cost += self.cost.forward(a, y_i)
self._step()
minibatch_cost = minibatch_cost / self.minibatch_size
epoch_cost += minibatch_cost
test_set_num_correct = self.num_correct(X_test, y_test)
test_set_accuracy = test_set_num_correct / X_test.shape[0]
print(f"Epoch {epoch+1}: \
\tLoss: {epoch_cost:.2f} \
\ttest set acc: {test_set_accuracy*100:.2f}% \
({test_set_num_correct} / {X_test.shape[0]})")
def num_correct(self, X, Y):
results = [(np.argmax(self.forward(x)), np.argmax(y)) for x, y in zip(X, Y)]
return sum(int(x == y) for (x, y) in results)
# load MNIST dataset
train_dataset = datasets.MNIST('./data', train=True, download=True)
test_dataset = datasets.MNIST('./data', train=False, download=True)
X_train = train_dataset.data.numpy()
X_test = test_dataset.data.numpy()
# normalize training data to [0, 1]
X_train, X_test = X_train / 255., X_test / 255.
"""
# show example img data
fig, axes = plt.subplots(10, 10)
idx = 0
for i in range(10):
for j in range(10):
img = X_train[idx, :]
axes[i,j].imshow(img)
axes[i,j].axis('off')
idx += 1
plt.show()
import sys
sys.exit(1)
"""
# flatten image into 1d array
X_train, X_test = X_train.reshape(X_train.shape[0], -1), X_test.reshape(X_test.shape[0], -1)
# add extra trailing dimension for proper matrix/vector sizes
X_train, X_test = X_train[..., np.newaxis], X_test[..., np.newaxis]
print(f"Training set size: {X_train.shape}")
print(f"Testing set size: {X_test.shape}")
def to_onehot(y):
"""
Convert index to one-hot representation
"""
one_hot = np.zeros((y.shape[0], 10))
one_hot[np.arange(y.shape[0]), y] = 1
return one_hot
y_train, y_test = train_dataset.targets.numpy(), test_dataset.targets.numpy()
y_train, y_test = to_onehot(y_train), to_onehot(y_test)
y_train, y_test = y_train[..., np.newaxis], y_test[..., np.newaxis]
print(f"Training target size: {y_train.shape}")
print(f"Test target size: {y_test.shape}")
ann = ArtificialNeuralNetwork(layer_sizes=[784, 32, 10])
training_params = {
'num_epochs': 30,
'minibatch_size': 16,
'cost': QuadraticCost,
'learning_rate': 3.0,
}
print(f'Training params: {training_params}')
ann.train(X_train, y_train, X_test, y_test, **training_params)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment