Last active
July 18, 2019 17:23
-
-
Save mfekadu/701601565cb686790e6d7ff0d0483888 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
# define the neural network architecture | |
# Input Data | |
a_0 = [1, 2] | |
inputs = a_0 | |
#Layer 1 | |
weights_1 = [[-2.0, 1.5], | |
[ 1.5, -1.0], | |
[-1.0, 0.5]] | |
weights_1 = np.array(weights_1) | |
biases_1 = [-0.5, -1.0, 1.0] | |
biases_1 = np.array(biases_1) | |
#Layer 2 | |
weights_2 = [[ 3.0, 0.5, 0.5], | |
[-2.0, 0.5, 0.5], | |
[-1.0, -2.0, -1.0]] | |
weights_2 = np.array(weights_2) | |
biases_2 = [-1.0, -2.0, 2.0] | |
biases_2 = np.array(biases_2) | |
#Neural Network Flow | |
#0. Input Set of Training Data, X | |
#1. For each x in X: | |
#Feed forward: get z,a for each layer | |
#Get output error (derivative of cost function w/r/t weighted input) | |
#Backpropagate the error | |
#2. Gradient Descent: in each layer l, update weights and biases | |
#learning rate = h | |
#w_l = w_l -= h (del_weight) | |
#b_l = b_l -= h (del_bias) | |
#Backpropagation Algorithm | |
#1. Input x | |
#2. For each layer l (1,2,...L) , compute: | |
#z_l = w_l * a_l-1 + b_l | |
#a_l = sigma(z_l) | |
#3. Compute error of last layer, e_L: | |
# e_L = del_Cost @ dsigma(z_L) | |
# del_Cost with respect to a_L | |
#4. for each l (L-1,L-2,...2) compute: | |
# e_l = ((w_l+1).T * e_l+1) @ dsigma(z_l) | |
#5 Output is gradient of cost function: | |
# del_weight = a_l-1 * e_l | |
# del_bias = e_l | |
# do the feed forward step | |
z_1 = weights_1.dot(inputs) + biases_1 | |
print("", z_1) | |
# given a numpy array, return the relu'd np.array | |
def relu(z): | |
z[z<0] = 0 | |
return z | |
# activate! | |
a_1 = relu(z_1) | |
z_2 = weights_2.dot(a_1) + biases_2 | |
print("weighted sum of middle layer (z_2): ", z_2) | |
# the last layer uses a Sigmoid | |
# aka the input squishifier between 0 and 1 | |
def sigmoid(z): | |
return 1 / (1 + np.exp(-z)) | |
a_2 = sigmoid(z_2) | |
outputs = a_2 | |
print("outputs dimension is: ", outputs.shape) | |
print("outputs is: ", outputs) | |
# mean squared error loss | |
def mse(outputs, labels): | |
assert(len(outputs) == len(labels)) | |
return np.sum(np.square(outputs - labels)) / len(outputs) | |
# cross entropy | |
def cross_entropy(outputs, labels): | |
assert(len(outputs) == len(labels)) | |
first_term = np.sum(labels * np.log(outputs)) | |
second_term = np.sum(labels * np.log(labels)) | |
return -(first_term - second_term) | |
labels = np.array([0.3, 0.000001, 0.7]) | |
print("labels is {}".format(labels)) | |
print("MSE Cost is: {}".format(mse(outputs, labels))) | |
# print("Cross entropy cost is: {}".format(cross_entropy(outputs, labels))) | |
# back prop | |
# given 2 numpy arrays | |
def d_mse(outputs, labels): | |
return 2*(outputs - labels) | |
def d_sigmoid(x): | |
return sigmoid(x) * (1 - sigmoid(x)) | |
# error of the output layer | |
errors_2 = d_mse(outputs, labels) * d_sigmoid(z_2) | |
# lets get error of the hidden layer | |
# transpose the weights of the second layer and | |
errors_1 = np.multiply(np.dot(weights_2, errors_2), d_sigmoid(z_1)) | |
print("errors_1", errors_1, "errors_2", errors_2) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment