Skip to content

Instantly share code, notes, and snippets.

@carlosgmartin
Created September 16, 2018 19:19
Show Gist options
  • Save carlosgmartin/8f5dca0795047e21f4ba7971a416d4b0 to your computer and use it in GitHub Desktop.
Save carlosgmartin/8f5dca0795047e21f4ba7971a416d4b0 to your computer and use it in GitHub Desktop.
Pure-numpy neural network on CIFAR10 dataset
import numpy
import torch
import torchvision
import matplotlib.pyplot as plt
def relu(x):
return numpy.maximum(0, x)
def relu_derivative(x):
return (x > 0).astype(float)
def softmax(x):
y = numpy.exp(x - x.max(-1, keepdims=True))
return y / y.sum(-1, keepdims=True)
class NeuralNetwork:
def __init__(self, dimensions):
self.L = len(dimensions) - 1
self.w = [
numpy.random.randn(i, j) * numpy.sqrt(2 / i)
for i, j in zip(dimensions, dimensions[1:])
]
self.b = [
numpy.zeros(j)
for j in dimensions[1:]
]
self.a = {}
self.dw = {}
self.db = {}
def forward(self, x):
self.a[0] = x.reshape(len(x), -1)
for l in range(self.L):
self.a[l + 1] = (relu if l + 1 < self.L else softmax)(self.a[l] @ self.w[l] + self.b[l])
def backward(self, y):
delta = {}
for l in reversed(range(self.L)):
if l + 1 == self.L:
delta[l] = self.a[l + 1] - numpy.eye(len(self.b[-1]))[y]
else:
delta[l] = delta[l + 1] @ self.w[l + 1].T * relu_derivative(self.a[l + 1])
self.dw[l] = self.a[l].T @ delta[l]
self.db[l] = delta[l].sum(0)
def step(self, learning_rate):
for l in range(self.L):
self.w[l] -= learning_rate * self.dw[l]
self.b[l] -= learning_rate * self.db[l]
def predict(self):
return self.a[self.L].argmax(-1)
def loss(self, y):
return -numpy.log(self.a[self.L][:, y]).mean()
def accuracy(self, y):
return (y == self.predict()).mean()
def train(self, x_train, x_val, y_train, y_val, epochs, learning_rate, batch_size):
for epoch in range(epochs):
print('epoch {}'.format(epoch))
p = numpy.random.permutation(len(x_train))
for i in range(0, len(x_train), batch_size):
x = x_train[p[i:i + batch_size]]
y = y_train[p[i:i + batch_size]]
self.forward(x)
self.backward(y)
self.step(learning_rate)
self.forward(x_train)
print(' train accuracy: {:.3f}'.format(self.accuracy(y_train)))
self.forward(x_val)
print(' validation accuracy: {:.3f}'.format(self.accuracy(y_val)))
transform = torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((.5, .5, .5), (.5, .5, .5)),
torchvision.transforms.Lambda(lambda x: x.numpy())
])
x_train, y_train = map(numpy.array, zip(*torchvision.datasets.CIFAR10(
root='data',
train=True,
transform=transform,
download=True
)))
x_test, y_test = map(numpy.array, zip(*torchvision.datasets.CIFAR10(
root='data',
train=False,
transform=transform,
download=True
)))
shuffle = numpy.random.permutation(len(x_train))
split = [int(len(x_train) * .1)]
x_val, x_train = numpy.split(x_train[shuffle], split)
y_val, y_train = numpy.split(y_train[shuffle], split)
net = NeuralNetwork([
numpy.prod(x_train.shape[1:]),
100,
y_train.max() + 1
])
net.train(
x_train,
x_val,
y_train,
y_val,
epochs=20,
learning_rate=1e-4,
batch_size=10
)
net.forward(x_train)
print('train loss: {:.3f}'.format(net.loss(y_train)))
print('train accuracy: {:.3f}'.format(net.accuracy(y_train)))
net.forward(x_test)
print('test accuracy: {:.3f}'.format(net.accuracy(y_test)))
rows, cols = 3, 5
samples = numpy.random.choice(len(x_test), size=rows * cols, replace=False)
images = numpy.transpose(x_test[samples], (0, 2, 3, 1)) / 2 + .5
classes = numpy.array(['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'])
labels = classes[y_test[samples]]
net.forward(x_test[samples])
predictions = classes[net.predict()]
fig, axes = plt.subplots(rows, cols)
for i in range(rows):
for j in range(cols):
axes[i, j].imshow(images[i * cols + j])
axes[i, j].set_xlabel(predictions[i * 5 + j])
plt.tight_layout()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment