Created
December 12, 2023 04:45
-
-
Save mohitd/609bba8838ff1a473dab74e829d31792 to your computer and use it in GitHub Desktop.
Artificial Neural Network Implementation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from torchvision import datasets | |
from matplotlib import pyplot as plt | |
np.random.seed(42) | |
class Sigmoid: | |
@staticmethod | |
def forward(z): | |
return 1. / (1. + np.exp(-z)) | |
@staticmethod | |
def backward(z): | |
return Sigmoid.forward(z) * (1 - Sigmoid.forward(z)) | |
class QuadraticCost: | |
@staticmethod | |
def forward(a, y): | |
return 0.5 * np.linalg.norm(a - y) ** 2 | |
@staticmethod | |
def backward(a, y): | |
return a - y | |
class ArtificialNeuralNetwork: | |
def __init__(self, layer_sizes: [int], activation_fn=Sigmoid): | |
self.layer_sizes = layer_sizes | |
self.num_layers = len(layer_sizes) | |
self.activation_fn = activation_fn | |
# use a unit normal distribution to initialize weights and biases | |
# performs better in practice than initializing to zeros | |
# note that weights are j in layer [i] to k in layer [i-1] | |
self.weights = [np.random.randn(j, k) | |
for j, k in zip(layer_sizes[1:], layer_sizes[:-1])] | |
# since the first layer is an input layer, we don't have biases for | |
self.biases = [np.random.randn(j, 1) for j in layer_sizes[1:]] | |
def forward(self, a): | |
self.activations = [a] | |
self.zs = [] | |
for W, b in zip(self.weights, self.biases): | |
z = np.dot(W, a) + b | |
self.zs.append(z) | |
a = self.activation_fn.forward(z) | |
self.activations.append(a) | |
return a | |
def _backward(self, y): | |
nabla_W = [np.zeros(W.shape) for W in self.weights] | |
nabla_b = [np.zeros(b.shape) for b in self.biases] | |
z = self.zs[-1] | |
a_L = self.activations[-1] | |
delta = self.cost.backward(a_L, y) * self.activation_fn.backward(z) | |
a = self.activations[-1-1] | |
nabla_W[-1] = np.dot(delta, a.T) | |
nabla_b[-1] = delta | |
for l in range(2, self.num_layers): | |
z = self.zs[-l] | |
W = self.weights[-l+1] | |
delta = np.dot(W.T, delta) * self.activation_fn.backward(z) | |
a = self.activations[-l-1] | |
nabla_W[-l] = np.dot(delta, a.T) | |
nabla_b[-l] = delta | |
return nabla_W, nabla_b | |
def _zero_grad(self): | |
self.nabla_W = [np.zeros(W.shape) for W in self.weights] | |
self.nabla_b = [np.zeros(b.shape) for b in self.biases] | |
def _accumulate_grad(self, d_nabla_W, d_nabla_b): | |
self.nabla_W = [nw + dnw for nw, dnw in zip(self.nabla_W, d_nabla_W)] | |
self.nabla_b = [nb + dnb for nb, dnb in zip(self.nabla_b, d_nabla_b)] | |
def _step(self): | |
self.weights = [w - (self.learning_rate / self.minibatch_size) * nw | |
for w, nw in zip(self.weights, self.nabla_W)] | |
self.biases = [b - (self.learning_rate / self.minibatch_size) * nb | |
for b, nb in zip(self.biases, self.nabla_b)] | |
def train(self, X_train, y_train, X_test, y_test, **kwargs): | |
num_epochs = kwargs['num_epochs'] | |
self.minibatch_size = kwargs['minibatch_size'] | |
self.cost = kwargs['cost'] | |
self.learning_rate = kwargs['learning_rate'] | |
for epoch in range(num_epochs): | |
# shuffle data each epoch | |
permute_idxes = np.random.permutation(X_train.shape[0]) | |
X_train = X_train[permute_idxes] | |
y_train = y_train[permute_idxes] | |
epoch_cost = 0 | |
for start in range(0, X_train.shape[0], self.minibatch_size): | |
minibatch_cost = 0 | |
# partition dataset into minibatches | |
Xs = X_train[start:start+self.minibatch_size] | |
ys = y_train[start:start+self.minibatch_size] | |
self._zero_grad() | |
for x_i, y_i in zip(Xs, ys): | |
a = self.forward(x_i) | |
d_nabla_W, d_nabla_b = self._backward(y_i) | |
self._accumulate_grad(d_nabla_W, d_nabla_b) | |
minibatch_cost += self.cost.forward(a, y_i) | |
self._step() | |
minibatch_cost = minibatch_cost / self.minibatch_size | |
epoch_cost += minibatch_cost | |
test_set_num_correct = self.num_correct(X_test, y_test) | |
test_set_accuracy = test_set_num_correct / X_test.shape[0] | |
print(f"Epoch {epoch+1}: \ | |
\tLoss: {epoch_cost:.2f} \ | |
\ttest set acc: {test_set_accuracy*100:.2f}% \ | |
({test_set_num_correct} / {X_test.shape[0]})") | |
def num_correct(self, X, Y): | |
results = [(np.argmax(self.forward(x)), np.argmax(y)) for x, y in zip(X, Y)] | |
return sum(int(x == y) for (x, y) in results) | |
# load MNIST dataset | |
train_dataset = datasets.MNIST('./data', train=True, download=True) | |
test_dataset = datasets.MNIST('./data', train=False, download=True) | |
X_train = train_dataset.data.numpy() | |
X_test = test_dataset.data.numpy() | |
# normalize training data to [0, 1] | |
X_train, X_test = X_train / 255., X_test / 255. | |
""" | |
# show example img data | |
fig, axes = plt.subplots(10, 10) | |
idx = 0 | |
for i in range(10): | |
for j in range(10): | |
img = X_train[idx, :] | |
axes[i,j].imshow(img) | |
axes[i,j].axis('off') | |
idx += 1 | |
plt.show() | |
import sys | |
sys.exit(1) | |
""" | |
# flatten image into 1d array | |
X_train, X_test = X_train.reshape(X_train.shape[0], -1), X_test.reshape(X_test.shape[0], -1) | |
# add extra trailing dimension for proper matrix/vector sizes | |
X_train, X_test = X_train[..., np.newaxis], X_test[..., np.newaxis] | |
print(f"Training set size: {X_train.shape}") | |
print(f"Testing set size: {X_test.shape}") | |
def to_onehot(y): | |
""" | |
Convert index to one-hot representation | |
""" | |
one_hot = np.zeros((y.shape[0], 10)) | |
one_hot[np.arange(y.shape[0]), y] = 1 | |
return one_hot | |
y_train, y_test = train_dataset.targets.numpy(), test_dataset.targets.numpy() | |
y_train, y_test = to_onehot(y_train), to_onehot(y_test) | |
y_train, y_test = y_train[..., np.newaxis], y_test[..., np.newaxis] | |
print(f"Training target size: {y_train.shape}") | |
print(f"Test target size: {y_test.shape}") | |
ann = ArtificialNeuralNetwork(layer_sizes=[784, 32, 10]) | |
training_params = { | |
'num_epochs': 30, | |
'minibatch_size': 16, | |
'cost': QuadraticCost, | |
'learning_rate': 3.0, | |
} | |
print(f'Training params: {training_params}') | |
ann.train(X_train, y_train, X_test, y_test, **training_params) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment