Created June 24, 2017 17:12
Python module containing basic neural networks functions.
# -*- coding: utf-8 -*-
A module containing artificial neural networks functions.
Created by Hamza Abbad.
from numpy import exp,ones,hstack,ndarray,array,zeros,sum,log,zeros_like,any,abs,sqrt
from numbers import Real, Integral
def sigmoid(Z):
# Z is a 2 dimensional array.
'''Calculates the sigmoid of the elements of the array'''
return 1 / (1 + exp(-Z))
def sigmoid_gradient(Z):
# Z is a 2 dimensional array.
'''Calculates the gradient of the sigmoid function for an array'''
sig = sigmoid(Z)
return sig * (1-sig)
def one_of_m(y, possible_labels):
# y is a 1 dimensional lables array, possible_labels is the number of possible labels.
Returns a M*K array, M is the length of y (the number of examples), K is is the number of possible labels.
Y = zeros((y.shape[0], possible_labels), dtype=int)
for i in range(y.shape[0]):
Y[i,y[i]] = 1
return Y
def feedforward(X, Thetas, activation_function = sigmoid, keep_Z = False):
# X is 2 dimensional array of data, Thetas is a list of 2 dimensional arrays of weights.
Performs the forword propagation of an artificial neural network and calculates the output from :
- X : M*N array of examples, M is the number of examples, N is the number of features.
- Thetas : List of L arrays, L is the number of layers of the network minus one, each array has a dimension of Ui*Vi
where Ui is the number of neurons in the layer i excluding the bias unit and Vi is number of neurons in the layer i-1
including the bias. For the wights of the first hidden layer, Vi = N (number of inputs). For the weights of the output layer,
Ui = number of output
A = hstack((ones((X.shape[0],1)), X)) # Add the Column X0
if keep_Z :
Zs = []
for i in range(len(Thetas)):
Z =[i].transpose())
if keep_Z :
A = hstack((ones((A.shape[0],1)), activation_function(Z))) # Column of ones represents the bias unit
if keep_Z :
return (A[:,1:], Zs)
return A[:,1:]
def backpropagation(X, Y, H, Thetas, Zs, activation_function = sigmoid, activation_gradient = sigmoid_gradient, reg_lambda = 0):
# X,Y,H,Thetas,Zs are all 2 dimensional arrays.
Performs a backpropagation pass and calculates the gradient of an artificial neural network from :
- X : M*N array of examples, M is the number of examples, N is the number of features.
- Y : M*K array of labels, M is the number of examples, K is the number of units in the output layer.
- H : M*K array of predicted values returned by the feedforward pass.
- Thetas : List of L arrays, L is the number of layers of the network minus one, in other words, the number of weight arrays.
- Zs : List of L arrays returned by the feedforward pass.
- reg_lambda : The constant of regularization.
X = hstack((ones((X.shape[0],1)), X)) # Add the Column X0
Deltas = [zeros_like(Theta) for Theta in Thetas]
As = [hstack((ones((X.shape[0],1)), activation_function(Z))) for Z in Zs] # Activation values
for m in range(X.shape[0]): # For each example
delta = (H[m:m+1,:] - Y[m:m+1,:]).transpose()
for i in range(len(Deltas)-1, 0, -1) :
Deltas[i] +=[i-1][m:m+1,:])
delta = Thetas[i].transpose().dot(delta)[1:,:] * activation_gradient(Zs[i-1][m:m+1,:]).transpose()
Deltas[0] +=[m:m+1,:])
for i in range(len(Deltas)):
Deltas[i] /= X.shape[0]
Deltas[i][:,1:] += reg_lambda / X.shape[0] * Thetas[i][:,1:]
return Deltas
def cost(X, Y, H, Thetas, reg_lambda = 0):
Calculates the cost of the weights in an artificial neural network.
J = -1 / X.shape[0] * sum(Y * log(H) + (1 - Y) * log(1 - H)) # Original cost function.
if reg_lambda != 0:
regularization = 0 # Add the regularization.
for Theta in Thetas:
regularization += sum(Theta[:,1:] ** 2)
regularization *= reg_lambda / (2 * X.shape[0])
J += regularization
return J
def _numerical_gradient_check(cost_function, Thetas, epsilon = 1e-4):
Calculates the numerical gradient of an artificial neural network.
Used only for checking the implementation of the backpropagation.
numerical_gradient = [zeros_like(Theta) for Theta in Thetas]
for i in range(len(Thetas)):
Theta = Thetas[i]
perturb = zeros_like(Theta)
for p1 in range(Theta.shape[0]):
for p2 in range(Theta.shape[1]):
perturb[p1,p2] = epsilon
Thetas[i] = Theta - perturb
loss1 = cost_function(Thetas)
Thetas[i] = Theta + perturb
loss2 = cost_function(Thetas)
numerical_gradient[i][p1, p2] = (loss2 - loss1) / (2 * epsilon)
perturb[p1,p2] = 0
Thetas[i] = Theta
return numerical_gradient
def _check_parameters(X = None, y = None, possible_labels = None, Y = None, H = None, Thetas = None, Zs = None, reg_lambda = None, epsilon = None):
Checks the parameters sent to other functions.
if not (X is None) :
if not isinstance(X, ndarray) :
raise TypeError("X must be a NumPy array")
if X.ndim != 2 :
raise ValueError("X must be 2 dimensional array")
if not (y is None) :
if not isinstance(y, ndarray) :
raise TypeError("y must be a NumPy array")
if y.ndim != 1 :
raise ValueError("y must be 1 dimensional array")
if not (possible_labels is None) :
if not isinstance(possible_labels, Integral) :
raise TypeError("possible_labels must be an integer")
if possible_labels <= 0 :
raise ValueError("possible_labels must be positive")
if any(y < 0) or any(y >= possible_labels) :
raise ValueError("All labels in y must be between 0 inclusive and possible_labels exclusive")
if not (Y is None) :
if not isinstance(Y, ndarray) :
raise TypeError("Y must be a NumPy array")
if Y.ndim != 2 :
raise ValueError("Y must be 2 dimensional array")
if not (H is None) :
if not isinstance(H, ndarray) :
raise TypeError("H must be a NumPy array")
if H.shape != Y.shape :
raise ValueError("H and Y must have the same dimensions")
if not (Thetas is None) :
if not isinstance(Thetas, list) :
raise TypeError("Thetas must be a list")
for i in range(len(Thetas)):
if not isinstance(Thetas[i], ndarray) :
raise TypeError("Thetas["+str(i)+"] must be a NumPy array")
if Thetas[i].ndim != 2 :
raise ValueError("Thetas["+str(i)+"] must be 2 dimensional array")
if not (X is None) :
if Thetas[0].shape[1]-1 != X.shape[1] :
raise ValueError("The second dimension of X must be equal to the second dimension of Thetas[0] minus one")
for i in range(1, len(Thetas)-1):
if Thetas[i].shape[1]-1 != Thetas[i-1].shape[0] :
raise ValueError("The first dimension of Thetas["+str(i-1)+"] must be equal to the second dimension of Thetas["+str(i)+"] minus one")
if not (Y is None) and Thetas[-1].shape[0] != Y.shape[1] :
raise ValueError("The first dimension of Thetas["+len(Thetas)-1+"] must be equal to the second dimension of Y")
if not (Zs is None) :
if not isinstance(Zs, list) :
raise TypeError("Zs must be a list")
if not (Thetas is None) and len(Zs) != len(Thetas) :
raise ValueError("Zs and Thetas must have the same length")
for i in range(len(Zs)):
if not isinstance(Zs[i], ndarray) :
raise TypeError("Zs["+str(i)+"] must be a NumPy array")
if Zs[i].ndim != 2 :
raise ValueError("Zs["+str(i)+"] must be 2 dimensional array")
if not (X is None) and Zs[i].shape[0] != X.shape[0] :
raise ValueError("The first dimension of Zs["+str(i)+"] must be equal to the first dimension of X")
if not (Thetas is None) and Zs[i].shape[1] != Thetas[i].shape[0] :
raise ValueError("The first dimension of Zs["+str(i)+"] must be equal to the second dimension of Thetas["+str(i)+"]")
if not (reg_lambda is None) :
if not isinstance(reg_lambda, Real) :
raise TypeError("reg_lambda must be a real number")
if reg_lambda < 0 :
raise ValueError("reg_lambda can't be negative")
if not (epsilon is None) :
if not isinstance(epsilon, Real) :
raise TypeError("epsilon must be a real number")
if epsilon <= 0 :
raise ValueError("epsilon must be positive")
def gradient_descent(X, Y, Thetas, regularization_const, alpha, max_iterations, max_cost, max_gradient):
H, Zs = feedforward(X, Thetas, keep_Z = True)
J = cost(X, Y, H, Thetas, regularization_const)
gradient = backpropagation(X, Y, H, Thetas, Zs, reg_lambda = regularization_const)
for j in range(len(Thetas)):
Thetas[j] -= alpha * gradient[j]
i = 1
gradient_value = sqrt(sum(array([sum(g**2) for g in gradient])))
while i < max_iterations and J > max_cost and gradient_value > max_gradient :
H, Zs = feedforward(X, Thetas, keep_Z = True)
gradient = backpropagation(X, Y, H, Thetas, Zs, reg_lambda = regularization_const)
for j in range(len(Thetas)):
Thetas[j] -= alpha * gradient[j]
J = cost(X, Y, H, Thetas, regularization_const)
gradient_value = sqrt(sum(array([sum(g**2) for g in gradient])))
i += 1
return i
if __name__ == '__main__':
X = array([
[0.54030, -0.41615],
[-0.98999, -0.65364],
[0.28366, 0.96017]
Theta1 = array([
[ 0.66294134, 0.5955722 , 0.55686872],
[ 0.89952311, -0.25349201, 0.20393105]
Theta2 = array([
[-0.12538753, 0.71265549, 0.70997811],
[ 1.07216431, 0.02318498, 0.39948136],
[ 1.01592775, -0.20193439, -0.2109316 ],
[ 1.17180091, -0.21056782, 0.63941167]
Theta3 = array([
[-0.12275993, 1.3893123 , 1.46562893, 0.29220495, 1.45709864],
[-0.40565099, 1.05983036, 1.04174971, 0.27491176, 1.27062198],
[ 0.99863495, -0.27216449, 1.39228261, 1.30152133, -0.03507692],
[ 0.96217801, 0.62635206, -0.00421884, 1.37500325, -0.33949522]
Thetas = [Theta1, Theta2, Theta3]
y = array([3,1,2])
reg = 1
# _check_parameters(Thetas=Thetas, y=y, reg_lambda=reg)
H, Zs = feedforward(X, Thetas, keep_Z = True)
Y = one_of_m(y, Thetas[-1].shape[0])
# _check_parameters(X=X, Y=Y, H=H, Zs=Zs, Thetas=Thetas)
grad = backpropagation(X, Y, H, Thetas, Zs, reg_lambda = reg)
J = lambda t: cost(X,Y,feedforward(X,t),t,reg)
print("Cost before optimization :",J(Thetas))
iterations = gradient_descent(X, Y, Thetas, reg, alpha=0.1, max_iterations=5000, max_cost=1e-3, max_gradient=0)
print("Cost after optimization :", J(Thetas), end='\n'*2)
print("Iterations :", iterations)
print("Thetas :", end='\n'*2)
for Theta in Thetas :
print(Theta, end='\n'*2)
