Skip to content

Instantly share code, notes, and snippets.

@anujdutt9
Last active April 13, 2017 17:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anujdutt9/5c7c3ad4ffa57f881af92c0d1974d1bf to your computer and use it in GitHub Desktop.
Save anujdutt9/5c7c3ad4ffa57f881af92c0d1974d1bf to your computer and use it in GitHub Desktop.
# Basic 3 Layer Feed Forward Neural Network with Back Propagation from Scratch
import numpy as np
from scipy import optimize
import matplotlib.pyplot as plt
# Sample Input Data
x = np.array(([3,5],[5,1],[10,2]), dtype=float)
y = np.array(([75],[82],[93]),dtype=float)
# Normalizing the Data
X = x / np.amax(x,axis=0)
y = y / 100
# -------------------- 3-Layer Feed Forward Neural Network with Back Propagation ----------------------
class NeuralNetwork(object):
# Define Hyperparameters
# Hyperparameters are constant and define behaviour of NN
def __init__(self):
self.inputLayerSize = 2
self.outputLayerSize = 1
self.hiddenLayerSize = 3
# Initialize Weights (Parameters)
# W(1): All weights from Input layer to Hidden Layer
# W(2): All weights from Hidden Layer to Output Layer
# np.random.randn(x,y,...): Returns random values in a given shape
# Returns a matrix of form 2x4
# Values in 1st row: W11, W12, W13, W14; => W(1)
# Values in second row: W21, W22, W23, W24; => W(1)
self.W1 = np.random.randn(self.inputLayerSize,self.hiddenLayerSize)
# Returns a matrix of random values in shape 4x1 matrix
# 4 rows, 1 column
# Values: W11, W21, W31, W41 => W(2)
self.W2 = np.random.randn(self.hiddenLayerSize,self.outputLayerSize)
# Propogate Inputs through Network
def forward(self, X):
# Pass in matrices instead of individual values
# leading to Speedup.
# Input to Hidden Layer Z(2) values
self.z2 = np.dot(X, self.W1)
# print('Z2:\n',self.z2)
# Using Sigmoid Activation Function to convert values in range (0,1)
self.a2 = self.sigmoid(self.z2)
# print('a2:\n', self.a2)
# Hidden Layer to Output Layer Z(3) values
self.z3 = np.dot(self.a2,self.W2)
# print('Z3:\n', self.z3)
# Final Activation Function gives the Output
y_Hat = self.sigmoid(self.z3)
return y_Hat
# Activation Function
# Applied to each element of matrix "Z = X.W"
def sigmoid(self, z):
return 1 / (1 + np.exp(-z))
# Function to return Prime (Diffrentiation) of f(Z) wrt Z
def sigmoidPrime(self,z):
return np.exp(-z)/((1+np.exp(-z))**2)
# Calculate the Cost i.e error between predicted and actual values
def costFunction(self, X,y):
self.y_Hat = self.forward(X)
J = 0.5*sum((y-self.y_Hat)**2)
return J
# Calculation of BackPropagation error
# This helps to update the weights and tune them to reducse cost function
def costFunctionPrime(self, X, y):
self.y_Hat = self.forward(X)
delta3 = np.multiply(-(y-self.y_Hat), self.sigmoidPrime(self.z3))
dJdW2 = np.dot(self.a2.T, delta3)
delta2 = np.dot(delta3, self.W2.T)*self.sigmoidPrime(self.z2)
dJdW1 = np.dot(X.T, delta2)
return dJdW1, dJdW2
# ---------------- Testing the Gradient Computation Part (costFunctionPrime) ----------------------
# Test if the values of dJdW1 and dJdW2 obtained are accurate or not.
# Helper Functions for interacting with other classes:
def getParams(self):
# Get W1 and W2 unrolled into vector:
params = np.concatenate((self.W1.ravel(), self.W2.ravel()))
return params
def setParams(self, params):
# Set W1 and W2 using single paramater vector.
W1_start = 0
W1_end = self.hiddenLayerSize * self.inputLayerSize
self.W1 = np.reshape(params[W1_start:W1_end], (self.inputLayerSize, self.hiddenLayerSize))
W2_end = W1_end + self.hiddenLayerSize * self.outputLayerSize
self.W2 = np.reshape(params[W1_end:W2_end], (self.hiddenLayerSize, self.outputLayerSize))
# Compute the gradient Values for each weight in W1 and W2.
def computeGradients(self, X, y):
dJdW1, dJdW2 = self.costFunctionPrime(X, y)
return np.concatenate((dJdW1.ravel(), dJdW2.ravel()))
# Finding Slope of x^2(Weights)
# Computing slope for all values of Weights
# If the Weight matrix computed by computeGradients matches the
# matrix generated by computing Numerical Gradients, then the NN works fine.
def computeNumericalGradient(N, X, y):
paramsInitial = N.getParams()
numgrad = np.zeros(paramsInitial.shape)
perturb = np.zeros(paramsInitial.shape)
e = 1e-4
for p in range(len(paramsInitial)):
# Set perturbation vector
perturb[p] = e
# Computing (x + epsilon): value above test point
N.setParams(paramsInitial + perturb)
loss2 = N.costFunction(X, y)
# Computing (x - epsilon): value below test point
N.setParams(paramsInitial - perturb)
loss1 = N.costFunction(X, y)
# Compute Numerical Gradient; Slope of x^2
numgrad[p] = (loss2 - loss1) / (2 * e)
# Return the value we changed to zero:
perturb[p] = 0
# Return Params to original value:
N.setParams(paramsInitial)
return numgrad
# ------------------------ Training the Neural Network ------------------------------
class trainer(object):
def __init__(self, N):
# Make Local reference to network:
self.N = N
def callbackF(self, params):
self.N.setParams(params)
self.J.append(self.N.costFunction(self.X, self.y))
def costFunctionWrapper(self, params, X, y):
self.N.setParams(params)
cost = self.N.costFunction(X, y)
grad = self.N.computeGradients(X, y)
return cost, grad
def train(self, X, y):
# Make an internal variable for the callback function:
self.X = X
self.y = y
# Make empty list to store costs:
self.J = []
params0 = self.N.getParams()
options = {'maxiter': 200, 'disp': True}
_res = optimize.minimize(self.costFunctionWrapper, params0, jac=True, method='BFGS', \
args=(X, y), options=options, callback=self.callbackF)
self.N.setParams(_res.x)
self.optimizationResults = _res
# -------------------- Testing the Neural Network --------------------------
nn = NeuralNetwork()
y_Hat = nn.forward(X)
print('Estimated Values (y_Hat): \n',y_Hat)
print('\n Actual Values (y): \n',y)
# Cost Function without Back Propagation
J = nn.costFunction(X,y)
print('Cost1 Function J: \n', J)
# Back propagated derivatives of cost function
dJdW1, dJdW2 = nn.costFunctionPrime(X,y)
print('dJdW1: \n',dJdW1)
print('\ndJdW2: ',dJdW2)
# Adding the derivatives to the Initial weights (tuning weights)
# Use only if the dJ/dW is -ve.
scalar = 3
nn.W1 = nn.W1 + scalar*dJdW1
nn.W2 = nn.W2 + scalar*dJdW2
cost2 = nn.costFunction(X,y)
print('cost2: \n', cost2)
# Subtracting scalar times the Back Propagated loss to tune weights
# Use only if the dJ/dW is +ve.
nn.W1 = nn.W1 - scalar*dJdW1
nn.W2 = nn.W2 - scalar*dJdW2
cost3 = nn.costFunction(X,y)
print('cost3: \n', cost3)
# Checking for the Accuracy of Numerical Gradient
grad = nn.computeGradients(X,y)
print('Original Gradient: \n',grad)
num_grad = computeNumericalGradient(nn,X,y)
print('Numerical Gradient: \n', num_grad)
# Training the NN
T = trainer(nn)
T.train(X,y)
plt.plot(T.J)
plt.show()
print('\n')
# --------------------- Testing Trained Neural Network ---------------
# Check Cost after Training
cost = nn.costFunction(X,y)
print('Cost after Training: \n',cost)
print('\n')
print('Actual value: \n', y)
print('\n')
predicted_val = nn.forward(X)
print('Predicted Values: \n',predicted_val)
print('\n')
# ----------------------------------- EOC ----------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment