Skip to content

Instantly share code, notes, and snippets.

@kbarbary
Created February 27, 2018 16:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kbarbary/b4ab66208c72e8b7ceaafe5a598a4534 to your computer and use it in GitHub Desktop.
Save kbarbary/b4ab66208c72e8b7ceaafe5a598a4534 to your computer and use it in GitHub Desktop.
What does a minimal implementation of a multi-layer dense neural net (with backpropagation) look like?
"""A minimal implementation of a dense neural net with an arbitrary
number of layers, backpropagation, and a few different activation functions."""
import numpy as np
# Activation and cost functions (with gradients)
def sigmoid(x):
y = 1.0 / (1.0 + np.exp(-x))
return y, y * (1.0 - y)
def relu(x):
y = np.maximum(0.0, x)
grad = (x > 0.0).astype(np.float64)
return y, grad
def tanh(x):
y = np.tanh(x)
return y, 1.0 - y**2
def cross_entropy_cost(A, Y):
m = Y.shape[1]
cost = (1.0 / m) * np.sum(-Y * np.log(A) - (1.0 - Y) * np.log(1.0 - A))
dA = (1.0 / m) * (-(Y / A) + (1.0 - Y) / (1.0 - A)) # dcost/dA
return cost, dA
class Layer(object):
def __init__(self, n_in: int, n_out: int, activation):
self.g = activation
self.W = np.random.normal(scale=0.01, size=(n_out, n_in))
self.b = np.zeros((n_out, 1))
self._cache = {}
def __call__(self, X):
"""Forward propagation (and cache intermediate results)"""
Z = self.W @ X + self.b
A, dAdZ = self.g(Z)
self._cache['X'] = X
self._cache['Z'] = Z
self._cache['dAdZ'] = dAdZ
return A
def backward(self, dA, alpha):
"""Backward propagation and update parameters"""
dZ = dA * self._cache['dAdZ']
dW = dZ @ self._cache['X'].T
db = np.sum(dZ, axis=1, keepdims=True)
dX = self.W.T @ dZ
# update
self.W -= alpha * dW
self.b -= alpha * db
return dX
class NeuralNetwork(object):
def __init__(self, layer_sizes, activations):
assert len(activations) == len(layer_sizes) - 1
self.layers = [Layer(layer_sizes[i], layer_sizes[i+1], activations[i])
for i in range(len(activations))]
self.costs = []
def __call__(self, X):
for layer in self.layers:
X = layer(X)
return X
def train(self, X, Y, niter=100, alpha=0.05):
for i in range(niter):
A = self(X)
cost, dA = cross_entropy_cost(A, Y)
# backprop and update parameters via gradient descent
for layer in reversed(self.layers):
dA = layer.backward(dA, alpha)
self.costs.append(cost)
if not (i % 10):
print(i, "cost =", cost)
return self
# Testing
import os
import gzip
from urllib.request import urlopen
def download_gzip_file(url, file_name):
response = gzip.GzipFile(fileobj=urlopen(url))
with open(file_name, 'wb') as f:
f.write(response.read())
def read_idx(fname):
"""Read an IDX format file into a numpy array. IDX is a very simple
binary format described here: http://yann.lecun.com/exdb/mnist/"""
with open(fname, 'rb') as f:
# read magic bytes: dtype and ndim
magic = f.read(4)
assert magic[0:2] == b'\x00\x00'
dtypes = {8: np.uint8, 9: np.int8, 11: np.int16,
12: np.int32, 13: np.float32, 14: np.float64}
dtype = np.dtype(dtypes[magic[2]]).newbyteorder('>')
ndim = magic[3]
# read dimensions
dims = []
for i in range(ndim):
b = f.read(4)
dims.append(int.from_bytes(b, byteorder='big'))
# read data
data = np.fromfile(f, dtype=dtype, count=np.product(dims))
data.shape = dims
return data
if __name__ == '__main__':
# get some data
root_url = "http://yann.lecun.com/exdb/mnist/"
urls = {"train_images": root_url + "train-images-idx3-ubyte.gz",
"train_labels": root_url + "train-labels-idx1-ubyte.gz",
"test_images": root_url + "t10k-images-idx3-ubyte.gz",
"test_labels": root_url + "t10k-labels-idx1-ubyte.gz"}
fnames = {key: url.split('/')[-1][:-3] for key, url in urls.items()}
for key in urls:
if not os.path.exists(fnames[key]):
download_gzip_file(urls[key], fnames[key])
# Read the data
data = {key: read_idx(fname) for key, fname in fnames.items()}
# Munge data: flatten and scale X
X = {}
for k in ('train', 'test'):
images = data[k + '_images']
images = images.reshape((images.shape[0], -1)).T
X[k] = images / images.max(axis=0)
# Munge data: one-hot encode Y
Y = {}
for k in ('train', 'test'):
labels = data[k + '_labels']
y = np.zeros((labels.size, labels.max() + 1))
y[np.arange(labels.size), labels] = 1.0
Y[k] = y.T
# Run training
network = NeuralNetwork([28*28, 100, 10], [relu, sigmoid])
network.train(X['train'], Y['train'], niter=300, alpha=0.2)
# show training cost
import matplotlib.pyplot as plt
plt.plot(network.costs)
plt.ylim(ymin=0.0)
plt.ylabel("cost")
plt.xlabel("iteration")
plt.savefig("costs.png")
# Validate
for key in ('train', 'test'):
print(key, 'set')
Ypred = network(X[key])
labels = np.argmax(Ypred, axis=0)
print("Truth: ", data[key + '_labels'][:30])
print("Prediction:", labels[:30])
correct = data[key + '_labels'] == labels
print("Correct: {:6.2f}%\n".format(100.0 * correct.mean()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment