Skip to content

Instantly share code, notes, and snippets.

@steven-mi
Last active April 13, 2019 18:07
Show Gist options
  • Save steven-mi/932ef8ae473c63a6cdda78a635866ecb to your computer and use it in GitHub Desktop.
Save steven-mi/932ef8ae473c63a6cdda78a635866ecb to your computer and use it in GitHub Desktop.
A 3 layered neural network with ReLU as activation function.
# this implementation was given as assignment 3 of the course
# B55.2 WT Ausgewählte Kapitel sozialer Webtechnologien at HTW Berlin
# third party
import numpy as np
import matplotlib.pyplot as plt
# internal
from deep_teaching_commons.data.fundamentals.mnist import Mnist
# create mnist loader from deep_teaching_commons
mnist_loader = Mnist(data_dir='data')
# load all data, labels are one-hot-encoded, images are flatten and pixel squashed between [0,1]
train_images, train_labels, test_images, test_labels = mnist_loader.get_all_data(
one_hot_enc=True, normalized=True)
# shuffle training data
shuffle_index = np.random.permutation(60000)
train_images, train_labels = train_images[shuffle_index], train_labels[shuffle_index]
def feed_forward(X, weights):
"""
calculates the forward path of our neural network with RELU as activation function of every neuron
Args:
X: input data of our neural network (in our cases - our images)
weights: the learnable parametre of our network
Returns:
a matrix which represents the forward path
"""
a = [X]
for w in weights:
# the last item of our list is always the latest item that was calculated
# which is why a[-1] is always called
a.append(np.maximum(a[-1].dot(w), 0))
return a
def grads(X, Y, weights):
"""
calculates the gradient of our network by using a algorithm called backpropagation
Args:
X: input data of our neural network (in our cases - our images)
Y: labels of our input data
weights: the learnable parametre of our network
Returns:
the gradient of our loss function
"""
grads = np.empty_like(weights)
a = feed_forward(X, weights)
# calculating the gradient
delta = a[-1] - Y
grads[-1] = a[-2].T.dot(delta)
for i in range(len(a)-2, 0, -1):
delta = (a[i] > 0) * delta.dot(weights[i].T)
grads[i-1] = a[i-1].T.dot(delta)
return grads / len(X)
# To test out weather our implementation works, we are going to first initialize our neural network with
# 3 layers with 784 input neurons, 200 hidden neurons and 10 output neurons.
# The 784 input neurons stand for every pixel of one image (every image has a resolution of 28x28) and
# the 10 output neurons stands for every possible numbber the image could stand for (every image could be 0-9).
# We also set up variables for our train and test dataset
trX, trY, teX, teY = train_images, train_labels, test_images, test_labels
weights = [np.random.randn(
*w) * 0.1 for w in [(784, 200), (200, 100), (100, 10)]]
# After initializing our network we are going to train our network and then see how accurate it performs.
# The number of epochs stands for the amount of times we are going to repeat this/repeat the training.
#
# In order to train our network/minimize our loss we use stochastical gradient descent method
# which is the same as gradient descent but only uses just a part of the whole data
# - a so called "mini-batch" - to calculate the gradient for each iteration.
#
# Gradient descent tries to minimize our loss function
# by substracting our current weights with the gradient of our loss function.
# so we have W_new = W_old - grad(L)*learning_rate
num_epochs, batch_size, learn_rate = 10, 50, 0.1
for i in range(num_epochs):
for j in range(0, len(trX), batch_size):
# creating a mini-batch with the size of batch_size
X, Y = trX[j:j+batch_size], trY[j:j+batch_size]
weights -= learn_rate * grads(X, Y, weights)
prediction_test = np.argmax(feed_forward(teX, weights)[-1], axis=1)
# prints our accuracy on the test data after training
print(i, np.mean(prediction_test == np.argmax(teY, axis=1)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment