mfekadu/backprop.py

## backprop.py
import numpy as np

# define the neural network architecture
# Input Data
a_0 = [1, 2]
inputs = a_0

#Layer 1
weights_1 = [[-2.0,  1.5],
             [ 1.5, -1.0],
             [-1.0,  0.5]]
weights_1 = np.array(weights_1)

biases_1 = [-0.5, -1.0, 1.0]
biases_1 = np.array(biases_1)

#Layer 2
weights_2 = [[ 3.0,  0.5,  0.5],
             [-2.0,  0.5,  0.5],
             [-1.0, -2.0, -1.0]]
weights_2 = np.array(weights_2)

biases_2 = [-1.0, -2.0, 2.0]
biases_2 = np.array(biases_2)

#Neural Network Flow
#0. Input Set of Training Data, X
#1. For each x in X:
    #Feed forward: get z,a for each layer
    #Get output error (derivative of cost function w/r/t weighted input)
    #Backpropagate the error
#2. Gradient Descent: in each layer l, update weights and biases
    #learning rate = h
    #w_l = w_l -= h (del_weight)
    #b_l = b_l -= h (del_bias)

#Backpropagation Algorithm
#1. Input x
#2. For each layer l (1,2,...L) , compute:
    #z_l = w_l * a_l-1 + b_l
    #a_l = sigma(z_l)
#3. Compute error of last layer, e_L:
    # e_L = del_Cost @ dsigma(z_L)
	# del_Cost with respect to a_L
#4. for each l (L-1,L-2,...2) compute:
    # e_l = ((w_l+1).T * e_l+1) @ dsigma(z_l)
#5 Output is gradient of cost function:
    # del_weight = a_l-1 * e_l
    # del_bias = e_l


# do the feed forward step

z_1 = weights_1.dot(inputs) + biases_1

print("", z_1)

# given a numpy array, return the relu'd np.array
def relu(z):
    z[z<0] = 0
    return z

# activate!
a_1 = relu(z_1)


z_2 = weights_2.dot(a_1) + biases_2
print("weighted sum of middle layer (z_2): ", z_2)

# the last layer uses a Sigmoid
# aka the input squishifier between 0 and 1
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

a_2 = sigmoid(z_2)

outputs = a_2

print("outputs dimension is: ", outputs.shape)
print("outputs is: ", outputs)

# mean squared error loss
def mse(outputs, labels):
    assert(len(outputs) == len(labels))
    return np.sum(np.square(outputs - labels)) / len(outputs)

# cross entropy
def cross_entropy(outputs, labels):
    assert(len(outputs) == len(labels))
    first_term = np.sum(labels * np.log(outputs))
    second_term = np.sum(labels * np.log(labels))
    return -(first_term - second_term)

labels = np.array([0.3, 0.000001, 0.7])
print("labels is {}".format(labels))
print("MSE Cost is: {}".format(mse(outputs, labels)))
# print("Cross entropy cost is: {}".format(cross_entropy(outputs, labels)))


# back prop

# given 2 numpy arrays
def d_mse(outputs, labels):
    return 2*(outputs - labels)

def d_sigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))


# error of the output layer
errors_2 = d_mse(outputs, labels) * d_sigmoid(z_2)

# lets get error of the hidden layer
# transpose the weights of the second layer and
errors_1 = np.multiply(np.dot(weights_2, errors_2), d_sigmoid(z_1))

print("errors_1", errors_1, "errors_2", errors_2)
	import numpy as np

	# define the neural network architecture
	# Input Data
	a_0 = [1, 2]
	inputs = a_0

	#Layer 1
	weights_1 = [[-2.0, 1.5],
	[ 1.5, -1.0],
	[-1.0, 0.5]]
	weights_1 = np.array(weights_1)

	biases_1 = [-0.5, -1.0, 1.0]
	biases_1 = np.array(biases_1)

	#Layer 2
	weights_2 = [[ 3.0, 0.5, 0.5],
	[-2.0, 0.5, 0.5],
	[-1.0, -2.0, -1.0]]
	weights_2 = np.array(weights_2)

	biases_2 = [-1.0, -2.0, 2.0]
	biases_2 = np.array(biases_2)

	#Neural Network Flow
	#0. Input Set of Training Data, X
	#1. For each x in X:
	#Feed forward: get z,a for each layer
	#Get output error (derivative of cost function w/r/t weighted input)
	#Backpropagate the error
	#2. Gradient Descent: in each layer l, update weights and biases
	#learning rate = h
	#w_l = w_l -= h (del_weight)
	#b_l = b_l -= h (del_bias)

	#Backpropagation Algorithm
	#1. Input x
	#2. For each layer l (1,2,...L) , compute:
	#z_l = w_l * a_l-1 + b_l
	#a_l = sigma(z_l)
	#3. Compute error of last layer, e_L:
	# e_L = del_Cost @ dsigma(z_L)
	# del_Cost with respect to a_L
	#4. for each l (L-1,L-2,...2) compute:
	# e_l = ((w_l+1).T * e_l+1) @ dsigma(z_l)
	#5 Output is gradient of cost function:
	# del_weight = a_l-1 * e_l
	# del_bias = e_l



	# do the feed forward step

	z_1 = weights_1.dot(inputs) + biases_1

	print("", z_1)

	# given a numpy array, return the relu'd np.array
	def relu(z):
	z[z<0] = 0
	return z

	# activate!
	a_1 = relu(z_1)


	z_2 = weights_2.dot(a_1) + biases_2
	print("weighted sum of middle layer (z_2): ", z_2)

	# the last layer uses a Sigmoid
	# aka the input squishifier between 0 and 1
	def sigmoid(z):
	return 1 / (1 + np.exp(-z))

	a_2 = sigmoid(z_2)

	outputs = a_2

	print("outputs dimension is: ", outputs.shape)
	print("outputs is: ", outputs)

	# mean squared error loss
	def mse(outputs, labels):
	assert(len(outputs) == len(labels))
	return np.sum(np.square(outputs - labels)) / len(outputs)

	# cross entropy
	def cross_entropy(outputs, labels):
	assert(len(outputs) == len(labels))
	first_term = np.sum(labels * np.log(outputs))
	second_term = np.sum(labels * np.log(labels))
	return -(first_term - second_term)

	labels = np.array([0.3, 0.000001, 0.7])
	print("labels is {}".format(labels))
	print("MSE Cost is: {}".format(mse(outputs, labels)))
	# print("Cross entropy cost is: {}".format(cross_entropy(outputs, labels)))


	# back prop

	# given 2 numpy arrays
	def d_mse(outputs, labels):
	return 2*(outputs - labels)

	def d_sigmoid(x):
	return sigmoid(x) * (1 - sigmoid(x))


	# error of the output layer
	errors_2 = d_mse(outputs, labels) * d_sigmoid(z_2)

	# lets get error of the hidden layer
	# transpose the weights of the second layer and
	errors_1 = np.multiply(np.dot(weights_2, errors_2), d_sigmoid(z_1))

	print("errors_1", errors_1, "errors_2", errors_2)