camargo/mnist-from-scratch.py

## mnist-from-scratch.py
'''
Simple 3-layer fully-connected neural network for recognizing MNIST digits.
Implemented from scratch with Numpy.
Written by Chris Camargo.
MIT License.
'''

from keras.datasets import mnist
import numpy as np

np.random.seed(1337)

def relu(x: np.ndarray) -> np.ndarray:
  return (x > 0) * x

def relu_grad(x: np.ndarray) -> np.ndarray:
  return x > 0

def vectorized_result(j: float) -> np.ndarray:
  e = np.zeros((10, 1))
  e[j] = 1.0
  return e.T

(train_x, train_y), (test_x, test_y) = mnist.load_data()

new_train_x = train_x.reshape(60000, 1, 784) / 255
new_test_x = test_x.reshape(10000, 1, 784) / 255
new_train_y = np.array([vectorized_result(y) for y in train_y])

alpha = 0.01
hidden_layer_size = 16

weights_0_1 = 0.2 * np.random.rand(784, hidden_layer_size) - 0.1
weights_1_2 = 0.2 * np.random.rand(hidden_layer_size, 10) - 0.1

for i, (x, y) in enumerate(zip(new_train_x, new_train_y)):
  layer_0 = x
  layer_1 = relu(layer_0.dot(weights_0_1))
  layer_2 = relu(layer_1.dot(weights_1_2))

  loss = 0.5 * np.sum((y - layer_2) ** 2)
  if i % 10000 == 0:
    print(f'loss: {loss:>7f} [{i}/{len(new_train_x)}]')

  layer_2_error = (layer_2 - y) * relu_grad(layer_2)
  layer_1_error = layer_2_error.dot(weights_1_2.T) * relu_grad(layer_1)
  weights_1_2 -= alpha * layer_1.T.dot(layer_2_error)
  weights_0_1 -= alpha * layer_0.T.dot(layer_1_error)

correct = 0
for i, x in enumerate(new_test_x):
  pred = relu(relu(x.dot(weights_0_1)).dot(weights_1_2))
  correct += int(np.argmax(pred) == test_y[i])

print(f'Test Error:\n Accuracy: {(correct / len(new_test_x))*100:>0.1f}%')

'''
loss: 0.520372 [0/60000]
loss: 0.034833 [10000/60000]
loss: 0.065088 [20000/60000]
loss: 0.007295 [30000/60000]
loss: 0.016756 [40000/60000]
loss: 0.029515 [50000/60000]
Test Error:
 Accuracy: 82.2%
'''
	'''
	Simple 3-layer fully-connected neural network for recognizing MNIST digits.
	Implemented from scratch with Numpy.
	Written by Chris Camargo.
	MIT License.
	'''

	from keras.datasets import mnist
	import numpy as np

	np.random.seed(1337)

	def relu(x: np.ndarray) -> np.ndarray:
	return (x > 0) * x

	def relu_grad(x: np.ndarray) -> np.ndarray:
	return x > 0

	def vectorized_result(j: float) -> np.ndarray:
	e = np.zeros((10, 1))
	e[j] = 1.0
	return e.T

	(train_x, train_y), (test_x, test_y) = mnist.load_data()

	new_train_x = train_x.reshape(60000, 1, 784) / 255
	new_test_x = test_x.reshape(10000, 1, 784) / 255
	new_train_y = np.array([vectorized_result(y) for y in train_y])

	alpha = 0.01
	hidden_layer_size = 16

	weights_0_1 = 0.2 * np.random.rand(784, hidden_layer_size) - 0.1
	weights_1_2 = 0.2 * np.random.rand(hidden_layer_size, 10) - 0.1

	for i, (x, y) in enumerate(zip(new_train_x, new_train_y)):
	layer_0 = x
	layer_1 = relu(layer_0.dot(weights_0_1))
	layer_2 = relu(layer_1.dot(weights_1_2))

	loss = 0.5 * np.sum((y - layer_2) ** 2)
	if i % 10000 == 0:
	print(f'loss: {loss:>7f} [{i}/{len(new_train_x)}]')

	layer_2_error = (layer_2 - y) * relu_grad(layer_2)
	layer_1_error = layer_2_error.dot(weights_1_2.T) * relu_grad(layer_1)
	weights_1_2 -= alpha * layer_1.T.dot(layer_2_error)
	weights_0_1 -= alpha * layer_0.T.dot(layer_1_error)

	correct = 0
	for i, x in enumerate(new_test_x):
	pred = relu(relu(x.dot(weights_0_1)).dot(weights_1_2))
	correct += int(np.argmax(pred) == test_y[i])

	print(f'Test Error:\n Accuracy: {(correct / len(new_test_x))*100:>0.1f}%')

	'''
	loss: 0.520372 [0/60000]
	loss: 0.034833 [10000/60000]
	loss: 0.065088 [20000/60000]
	loss: 0.007295 [30000/60000]
	loss: 0.016756 [40000/60000]
	loss: 0.029515 [50000/60000]
	Test Error:
	Accuracy: 82.2%
	'''