Skip to content

Instantly share code, notes, and snippets.

@mfekadu
Last active July 18, 2019 17:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mfekadu/701601565cb686790e6d7ff0d0483888 to your computer and use it in GitHub Desktop.
Save mfekadu/701601565cb686790e6d7ff0d0483888 to your computer and use it in GitHub Desktop.
import numpy as np
# define the neural network architecture
# Input Data
a_0 = [1, 2]
inputs = a_0
#Layer 1
weights_1 = [[-2.0, 1.5],
[ 1.5, -1.0],
[-1.0, 0.5]]
weights_1 = np.array(weights_1)
biases_1 = [-0.5, -1.0, 1.0]
biases_1 = np.array(biases_1)
#Layer 2
weights_2 = [[ 3.0, 0.5, 0.5],
[-2.0, 0.5, 0.5],
[-1.0, -2.0, -1.0]]
weights_2 = np.array(weights_2)
biases_2 = [-1.0, -2.0, 2.0]
biases_2 = np.array(biases_2)
#Neural Network Flow
#0. Input Set of Training Data, X
#1. For each x in X:
#Feed forward: get z,a for each layer
#Get output error (derivative of cost function w/r/t weighted input)
#Backpropagate the error
#2. Gradient Descent: in each layer l, update weights and biases
#learning rate = h
#w_l = w_l -= h (del_weight)
#b_l = b_l -= h (del_bias)
#Backpropagation Algorithm
#1. Input x
#2. For each layer l (1,2,...L) , compute:
#z_l = w_l * a_l-1 + b_l
#a_l = sigma(z_l)
#3. Compute error of last layer, e_L:
# e_L = del_Cost @ dsigma(z_L)
# del_Cost with respect to a_L
#4. for each l (L-1,L-2,...2) compute:
# e_l = ((w_l+1).T * e_l+1) @ dsigma(z_l)
#5 Output is gradient of cost function:
# del_weight = a_l-1 * e_l
# del_bias = e_l
# do the feed forward step
z_1 = weights_1.dot(inputs) + biases_1
print("", z_1)
# given a numpy array, return the relu'd np.array
def relu(z):
z[z<0] = 0
return z
# activate!
a_1 = relu(z_1)
z_2 = weights_2.dot(a_1) + biases_2
print("weighted sum of middle layer (z_2): ", z_2)
# the last layer uses a Sigmoid
# aka the input squishifier between 0 and 1
def sigmoid(z):
return 1 / (1 + np.exp(-z))
a_2 = sigmoid(z_2)
outputs = a_2
print("outputs dimension is: ", outputs.shape)
print("outputs is: ", outputs)
# mean squared error loss
def mse(outputs, labels):
assert(len(outputs) == len(labels))
return np.sum(np.square(outputs - labels)) / len(outputs)
# cross entropy
def cross_entropy(outputs, labels):
assert(len(outputs) == len(labels))
first_term = np.sum(labels * np.log(outputs))
second_term = np.sum(labels * np.log(labels))
return -(first_term - second_term)
labels = np.array([0.3, 0.000001, 0.7])
print("labels is {}".format(labels))
print("MSE Cost is: {}".format(mse(outputs, labels)))
# print("Cross entropy cost is: {}".format(cross_entropy(outputs, labels)))
# back prop
# given 2 numpy arrays
def d_mse(outputs, labels):
return 2*(outputs - labels)
def d_sigmoid(x):
return sigmoid(x) * (1 - sigmoid(x))
# error of the output layer
errors_2 = d_mse(outputs, labels) * d_sigmoid(z_2)
# lets get error of the hidden layer
# transpose the weights of the second layer and
errors_1 = np.multiply(np.dot(weights_2, errors_2), d_sigmoid(z_1))
print("errors_1", errors_1, "errors_2", errors_2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment