Skip to content

Instantly share code, notes, and snippets.

@stfwn
Last active December 21, 2020 04:32
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save stfwn/62e51d86ca4ff155becd3c6a14adf60e to your computer and use it in GitHub Desktop.
Save stfwn/62e51d86ca4ff155becd3c6a14adf60e to your computer and use it in GitHub Desktop.
A Neural Network in Python From Start to Finish
import numpy as np
import matplotlib.pyplot as plt
from urllib import request
import gzip
import pickle
import os
"""Load up the data.
This is not that interesting. Let's just copy paste it from https://github.com/hsjeong5/MNIST-for-Numpy."""
filename = [
["training_images","train-images-idx3-ubyte.gz"],
["test_images","t10k-images-idx3-ubyte.gz"],
["training_labels","train-labels-idx1-ubyte.gz"],
["test_labels","t10k-labels-idx1-ubyte.gz"]
]
def download_mnist():
base_url = "http://yann.lecun.com/exdb/mnist/"
for name in filename:
print("Downloading "+name[1]+"...")
request.urlretrieve(base_url+name[1], name[1])
print("Download complete.")
def save_mnist():
mnist = {}
for name in filename[:2]:
with gzip.open(name[1], 'rb') as f:
mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1,28*28)
for name in filename[-2:]:
with gzip.open(name[1], 'rb') as f:
mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=8)
with open("mnist.pkl", 'wb') as f:
pickle.dump(mnist,f)
print("Save complete.")
def init():
download_mnist()
save_mnist()
def load():
with open("mnist.pkl",'rb') as f:
mnist = pickle.load(f)
return mnist["training_images"], mnist["training_labels"], mnist["test_images"], mnist["test_labels"]
if 'mnist.pkl' not in os.listdir('.'):
init()
# Here we have our 28x28 grayscale images of the digits and matching tags.
x_train, t_train, x_test, t_test = load()
""" End of data loading copy paste """
"""Beginning of the good stuff. Building the network."""
# Use sigmoid as the activation function.
def sigmoid_func(X):
return 1 / (1 + np.e**(-X))
# A layer is just a matrix of weights.
def one_layer_init(input_size, output_size):
Theta = np.random.uniform(low=-.3, high=0.3, size=(output_size, input_size))
return add_bias(Theta)
# And a bias.
def add_bias(X):
return np.concatenate([np.ones((X.shape[0], 1)), X], axis=1)
# Compute the activation of a layer given its weight matrix and the previous
# layer's activation (or input if it's the first layer).
def compute_layer(A, Theta):
return sigmoid_func(A @ Theta.T)
def one_layer_output(X, Theta):
return compute_layer(add_bias(X), Theta)
# A network is just a list of layers with matching input/output dimensons.
def n_layer_init(layer_sizes):
layers = [one_layer_init(layer_sizes[i-1], layer_sizes[i]) for i in range(1, len(layer_sizes))]
return layers
# Get an output by pushing the acivations through each layer.
def n_layer_output(X, Theta):
current_output = one_layer_output(X, Theta[0])
for i in range(1, len(Theta)):
current_output = one_layer_output(current_output, Theta[i])
return current_output
# Cross-entropy loss function.
def cost_function(A, Y):
return -np.sum(Y * np.log(A) + (1-Y) * np.log(1-A))
# Compute delta term for the output layer.
def output_delta(A_j, Y):
return A_j - Y
# Compute delta for a hidden layer.
def hidden_delta(A_j, Delta_next, Theta_j):
return ((1 - A_j) * A_j) * (Delta_next @ Theta_j)
# Update weights given activations, the delta terms from the next layer,
# the layer's weight matrix and the learning rate.
def weight_update(A_j, Delta_next, Theta_j, rate):
return Theta_j - rate * np.dot(Delta_next.T, A_j)
# Combine it all in a training routine.
def three_layer_training(X, Y, Theta_0, Theta_1, Theta_2, iters=5000, rate=0.9):
A_0 = add_bias(X)
costs = []
for i in range(iters):
# Forward pass
A_1 = add_bias(compute_layer(A_0, Theta_0))
A_2 = add_bias(compute_layer(A_1, Theta_1))
A_3 = compute_layer(A_2, Theta_2)
cost = cost_function(A_3, Y)
print('Cost is now:', cost)
costs.append(cost)
# Backward pass
Delta_3 = output_delta(A_3, Y)
Delta_2 = hidden_delta(A_2, Delta_3, Theta_2)[:,1:]
Delta_1 = hidden_delta(A_1, Delta_2, Theta_1)[:,1:]
# Weight updates
Theta_2 = weight_update(A_2, Delta_3, Theta_2, rate)
Theta_1 = weight_update(A_1, Delta_2, Theta_1, rate)
Theta_0 = weight_update(A_0, Delta_1, Theta_0, rate)
plt.plot(costs)
plt.show()
return [Theta_0, Theta_1, Theta_2]
"""Some helper functions."""
# Reshape the data so that we have a one-hot matrix for the targets.
# https://en.wikipedia.org/wiki/One-hot
def one_hot(t):
one_hot = np.zeros((len(t), 10))
one_hot[np.arange(len(t)), t] = 1
return one_hot
# And a validation function to see how the network did.
# This just compares the network's output to the real labels and results the % correct.
def validate(A, Y):
return np.sum((np.argmax(A, axis=1) == np.argmax(Y, axis=1))) / Y.shape[0]
train_targets = one_hot(t_train)
test_targets = one_hot(t_test)
"""We have all the functions we need now, let's put it to work."""
Theta = n_layer_init(layer_sizes=(784, 300, 100, 10))
Theta = three_layer_training(x_train, train_targets, Theta[0], Theta[1], Theta[2], 200, 0.000005)
"""That was that, now to validate the results..."""
output = n_layer_output(x_train, Theta)
targets = one_hot(t_train)
print(f'Here\'s how the network did on the train set: {validate(output, targets)}')
output = n_layer_output(x_test, Theta)
targets = one_hot(t_test)
print(f'Here\'s how the network did on the test set: {validate(output, targets)}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment