Last active
April 13, 2019 18:07
-
-
Save steven-mi/932ef8ae473c63a6cdda78a635866ecb to your computer and use it in GitHub Desktop.
A 3 layered neural network with ReLU as activation function.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this implementation was given as assignment 3 of the course | |
# B55.2 WT Ausgewählte Kapitel sozialer Webtechnologien at HTW Berlin | |
# third party | |
import numpy as np | |
import matplotlib.pyplot as plt | |
# internal | |
from deep_teaching_commons.data.fundamentals.mnist import Mnist | |
# create mnist loader from deep_teaching_commons | |
mnist_loader = Mnist(data_dir='data') | |
# load all data, labels are one-hot-encoded, images are flatten and pixel squashed between [0,1] | |
train_images, train_labels, test_images, test_labels = mnist_loader.get_all_data( | |
one_hot_enc=True, normalized=True) | |
# shuffle training data | |
shuffle_index = np.random.permutation(60000) | |
train_images, train_labels = train_images[shuffle_index], train_labels[shuffle_index] | |
def feed_forward(X, weights): | |
""" | |
calculates the forward path of our neural network with RELU as activation function of every neuron | |
Args: | |
X: input data of our neural network (in our cases - our images) | |
weights: the learnable parametre of our network | |
Returns: | |
a matrix which represents the forward path | |
""" | |
a = [X] | |
for w in weights: | |
# the last item of our list is always the latest item that was calculated | |
# which is why a[-1] is always called | |
a.append(np.maximum(a[-1].dot(w), 0)) | |
return a | |
def grads(X, Y, weights): | |
""" | |
calculates the gradient of our network by using a algorithm called backpropagation | |
Args: | |
X: input data of our neural network (in our cases - our images) | |
Y: labels of our input data | |
weights: the learnable parametre of our network | |
Returns: | |
the gradient of our loss function | |
""" | |
grads = np.empty_like(weights) | |
a = feed_forward(X, weights) | |
# calculating the gradient | |
delta = a[-1] - Y | |
grads[-1] = a[-2].T.dot(delta) | |
for i in range(len(a)-2, 0, -1): | |
delta = (a[i] > 0) * delta.dot(weights[i].T) | |
grads[i-1] = a[i-1].T.dot(delta) | |
return grads / len(X) | |
# To test out weather our implementation works, we are going to first initialize our neural network with | |
# 3 layers with 784 input neurons, 200 hidden neurons and 10 output neurons. | |
# The 784 input neurons stand for every pixel of one image (every image has a resolution of 28x28) and | |
# the 10 output neurons stands for every possible numbber the image could stand for (every image could be 0-9). | |
# We also set up variables for our train and test dataset | |
trX, trY, teX, teY = train_images, train_labels, test_images, test_labels | |
weights = [np.random.randn( | |
*w) * 0.1 for w in [(784, 200), (200, 100), (100, 10)]] | |
# After initializing our network we are going to train our network and then see how accurate it performs. | |
# The number of epochs stands for the amount of times we are going to repeat this/repeat the training. | |
# | |
# In order to train our network/minimize our loss we use stochastical gradient descent method | |
# which is the same as gradient descent but only uses just a part of the whole data | |
# - a so called "mini-batch" - to calculate the gradient for each iteration. | |
# | |
# Gradient descent tries to minimize our loss function | |
# by substracting our current weights with the gradient of our loss function. | |
# so we have W_new = W_old - grad(L)*learning_rate | |
num_epochs, batch_size, learn_rate = 10, 50, 0.1 | |
for i in range(num_epochs): | |
for j in range(0, len(trX), batch_size): | |
# creating a mini-batch with the size of batch_size | |
X, Y = trX[j:j+batch_size], trY[j:j+batch_size] | |
weights -= learn_rate * grads(X, Y, weights) | |
prediction_test = np.argmax(feed_forward(teX, weights)[-1], axis=1) | |
# prints our accuracy on the test data after training | |
print(i, np.mean(prediction_test == np.argmax(teY, axis=1))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment