Hamza5/ANNfunctions.py

## ANNfunctions.py
# -*- coding: utf-8 -*-
'''
A module containing artificial neural networks functions.
Created by Hamza Abbad.
'''
from numpy import exp,ones,hstack,ndarray,array,zeros,sum,log,zeros_like,any,abs,sqrt
from numbers import Real, Integral

def sigmoid(Z):
# Z is a 2 dimensional array.
    '''Calculates the sigmoid of the elements of the array'''
    return 1 / (1 + exp(-Z))

def sigmoid_gradient(Z):
# Z is a 2 dimensional array.
    '''Calculates the gradient of the sigmoid function for an array'''
    sig = sigmoid(Z)
    return sig * (1-sig)

def one_of_m(y, possible_labels):
# y is a 1 dimensional lables array, possible_labels is the number of possible labels.
    '''
    Returns a M*K array, M is the length of y (the number of examples), K is is the number of possible labels.
    '''
    Y = zeros((y.shape[0], possible_labels), dtype=int)
    for i in range(y.shape[0]):
        Y[i,y[i]] = 1
    return Y

def feedforward(X, Thetas, activation_function = sigmoid, keep_Z = False):
# X is 2 dimensional array of data, Thetas is a list of 2 dimensional arrays of weights.
    '''
    Performs the forword propagation of an artificial neural network and calculates the output from :
    - X : M*N array of examples, M is the number of examples, N is the number of features.
    - Thetas : List of L arrays, L is the number of layers of the network minus one, each array has a dimension of Ui*Vi
    where Ui is the number of neurons in the layer i excluding the bias unit and Vi is number of neurons in the layer i-1
    including the bias. For the wights of the first hidden layer, Vi = N (number of inputs). For the weights of the output layer,
    Ui = number of output
    '''
    A = hstack((ones((X.shape[0],1)), X)) # Add the Column X0
    if keep_Z :
        Zs = []
    for i in range(len(Thetas)):
        Z = A.dot(Thetas[i].transpose())
        if keep_Z :
            Zs.append(Z)
        A = hstack((ones((A.shape[0],1)), activation_function(Z))) # Column of ones represents the bias unit
    if keep_Z :
        return (A[:,1:], Zs)
    return A[:,1:]

def backpropagation(X, Y, H, Thetas, Zs, activation_function = sigmoid, activation_gradient = sigmoid_gradient, reg_lambda = 0):
# X,Y,H,Thetas,Zs are all 2 dimensional arrays.
    '''
    Performs a backpropagation pass and calculates the gradient of an artificial neural network from :
    - X : M*N array of examples, M is the number of examples, N is the number of features.
    - Y : M*K array of labels, M is the number of examples, K is the number of units in the output layer.
    - H : M*K array of predicted values returned by the feedforward pass.
    - Thetas : List of L arrays, L is the number of layers of the network minus one, in other words, the number of weight arrays.
    - Zs : List of L arrays returned by the feedforward pass.
    - reg_lambda : The constant of regularization.
    '''
    X = hstack((ones((X.shape[0],1)), X)) # Add the Column X0
    Deltas = [zeros_like(Theta) for Theta in Thetas]
    As = [hstack((ones((X.shape[0],1)), activation_function(Z))) for Z in Zs] # Activation values
    for m in range(X.shape[0]): # For each example
        delta = (H[m:m+1,:] - Y[m:m+1,:]).transpose()
        for i in range(len(Deltas)-1, 0, -1) :
            Deltas[i] += delta.dot(As[i-1][m:m+1,:])
            delta = Thetas[i].transpose().dot(delta)[1:,:] * activation_gradient(Zs[i-1][m:m+1,:]).transpose()
        Deltas[0] += delta.dot(X[m:m+1,:])
    for i in range(len(Deltas)):
        Deltas[i] /= X.shape[0]
        Deltas[i][:,1:] += reg_lambda / X.shape[0] * Thetas[i][:,1:]
    return Deltas

def cost(X, Y, H, Thetas, reg_lambda = 0):
    '''
    Calculates the cost of the weights in an artificial neural network.
    '''
    J = -1 / X.shape[0] * sum(Y * log(H) + (1 - Y) * log(1 - H)) # Original cost function.
    if reg_lambda != 0:
        regularization = 0 # Add the regularization.
        for Theta in Thetas:
            regularization += sum(Theta[:,1:] ** 2)
        regularization *= reg_lambda / (2 * X.shape[0])
        J += regularization
    return J

def _numerical_gradient_check(cost_function, Thetas, epsilon = 1e-4):
    '''
    Calculates the numerical gradient of an artificial neural network.
    Used only for checking the implementation of the backpropagation.
    '''
    numerical_gradient = [zeros_like(Theta) for Theta in Thetas]
    for i in range(len(Thetas)):
        Theta = Thetas[i]
        perturb = zeros_like(Theta)
        for p1 in range(Theta.shape[0]):
            for p2 in range(Theta.shape[1]):
                perturb[p1,p2] = epsilon
                Thetas[i] = Theta - perturb
                loss1 = cost_function(Thetas)
                Thetas[i] = Theta + perturb
                loss2 = cost_function(Thetas)
                numerical_gradient[i][p1, p2] = (loss2 - loss1) / (2 * epsilon)
                perturb[p1,p2] = 0
        Thetas[i] = Theta
    return numerical_gradient

def _check_parameters(X = None, y = None, possible_labels = None, Y = None, H = None, Thetas = None, Zs = None, reg_lambda = None, epsilon = None):
    '''
    Checks the parameters sent to other functions.
    '''
    if not (X is None) :
        if not isinstance(X, ndarray) :
            raise TypeError("X must be a NumPy array")
        if X.ndim != 2 :
            raise ValueError("X must be 2 dimensional array")
    if not (y is None) :
        if not isinstance(y, ndarray) :
            raise TypeError("y must be a NumPy array")
        if y.ndim != 1 :
            raise ValueError("y must be 1 dimensional array")
        if not (possible_labels is None) :
            if not isinstance(possible_labels, Integral) :
                raise TypeError("possible_labels must be an integer")
            if possible_labels <= 0 :
                raise ValueError("possible_labels must be positive")
            if any(y < 0) or any(y >= possible_labels) :
                raise ValueError("All labels in y must be between 0 inclusive and possible_labels exclusive")
    if not (Y is None) :
        if not isinstance(Y, ndarray) :
            raise TypeError("Y must be a NumPy array")
        if Y.ndim != 2 :
            raise ValueError("Y must be 2 dimensional array")
        if not (H is None) :
            if not isinstance(H, ndarray) :
                raise TypeError("H must be a NumPy array")
            if H.shape != Y.shape :
                raise ValueError("H and Y must have the same dimensions")
    if not (Thetas is None) :
        if not isinstance(Thetas, list) :
            raise TypeError("Thetas must be a list")
        for i in range(len(Thetas)):
            if not isinstance(Thetas[i], ndarray) :
                raise TypeError("Thetas["+str(i)+"] must be a NumPy array")
            if Thetas[i].ndim != 2 :
                raise ValueError("Thetas["+str(i)+"] must be 2 dimensional array")
        if not (X is None) :
            if Thetas[0].shape[1]-1 != X.shape[1] :
                raise ValueError("The second dimension of X must be equal to the second dimension of Thetas[0] minus one")
            for i in range(1, len(Thetas)-1):
                if Thetas[i].shape[1]-1 != Thetas[i-1].shape[0] :
                    raise ValueError("The first dimension of Thetas["+str(i-1)+"] must be equal to the second dimension of Thetas["+str(i)+"] minus one")
            if not (Y is None) and Thetas[-1].shape[0] != Y.shape[1] :
                raise ValueError("The first dimension of Thetas["+len(Thetas)-1+"] must be equal to the second dimension of Y")
    if not (Zs is None) :
        if not isinstance(Zs, list) :
            raise TypeError("Zs must be a list")
            if not (Thetas is None) and len(Zs) != len(Thetas) :
                raise ValueError("Zs and Thetas must have the same length")
            for i in range(len(Zs)):
                if not isinstance(Zs[i], ndarray) :
                    raise TypeError("Zs["+str(i)+"] must be a NumPy array")
                if Zs[i].ndim != 2 :
                    raise ValueError("Zs["+str(i)+"] must be 2 dimensional array")
                if not (X is None) and Zs[i].shape[0] != X.shape[0] :
                    raise ValueError("The first dimension of Zs["+str(i)+"] must be equal to the first dimension of X")
                if not (Thetas is None) and Zs[i].shape[1] != Thetas[i].shape[0] :
                    raise ValueError("The first dimension of Zs["+str(i)+"] must be equal to the second dimension of Thetas["+str(i)+"]")
    if not (reg_lambda is None) :
        if not isinstance(reg_lambda, Real) :
            raise TypeError("reg_lambda must be a real number")
        if reg_lambda < 0 :
            raise ValueError("reg_lambda can't be negative")
    if not (epsilon is None) :
        if not isinstance(epsilon, Real) :
            raise TypeError("epsilon must be a real number")
        if epsilon <= 0 :
            raise ValueError("epsilon must be positive")

def gradient_descent(X, Y, Thetas, regularization_const, alpha, max_iterations, max_cost, max_gradient):
    H, Zs = feedforward(X, Thetas, keep_Z = True)
    J = cost(X, Y, H, Thetas, regularization_const)
    gradient = backpropagation(X, Y, H, Thetas, Zs, reg_lambda = regularization_const)
    for j in range(len(Thetas)):
        Thetas[j] -= alpha * gradient[j]
    i = 1
    gradient_value = sqrt(sum(array([sum(g**2) for g in gradient])))
    while i < max_iterations and J > max_cost and gradient_value > max_gradient :
        H, Zs = feedforward(X, Thetas, keep_Z = True)
        gradient = backpropagation(X, Y, H, Thetas, Zs, reg_lambda = regularization_const)
        for j in range(len(Thetas)):
            Thetas[j] -= alpha * gradient[j]
        J = cost(X, Y, H, Thetas, regularization_const)
        gradient_value = sqrt(sum(array([sum(g**2) for g in gradient])))
        i += 1
    return i

if __name__ == '__main__':

    X = array([
    [0.54030,  -0.41615],
    [-0.98999,  -0.65364],
    [0.28366,   0.96017]
    ])
    Theta1 = array([
    [ 0.66294134,  0.5955722 ,  0.55686872],
    [ 0.89952311, -0.25349201,  0.20393105]
    ])
    Theta2 = array([
    [-0.12538753,  0.71265549,  0.70997811],
    [ 1.07216431,  0.02318498,  0.39948136],
    [ 1.01592775, -0.20193439, -0.2109316 ],
    [ 1.17180091, -0.21056782,  0.63941167]
    ])
    Theta3 = array([
    [-0.12275993,  1.3893123 ,  1.46562893,  0.29220495,  1.45709864],
    [-0.40565099,  1.05983036,  1.04174971,  0.27491176,  1.27062198],
    [ 0.99863495, -0.27216449,  1.39228261,  1.30152133, -0.03507692],
    [ 0.96217801,  0.62635206, -0.00421884,  1.37500325, -0.33949522]
    ])
    Thetas = [Theta1, Theta2, Theta3]
    y = array([3,1,2])
    reg = 1
#    _check_parameters(Thetas=Thetas, y=y, reg_lambda=reg)
    H, Zs = feedforward(X, Thetas, keep_Z = True)
    print("H",H)
    Y = one_of_m(y, Thetas[-1].shape[0])
    print("Y",Y)
#    _check_parameters(X=X, Y=Y, H=H, Zs=Zs, Thetas=Thetas)
    grad = backpropagation(X, Y, H, Thetas, Zs, reg_lambda = reg)
    J = lambda t: cost(X,Y,feedforward(X,t),t,reg)
    print("Cost before optimization :",J(Thetas))
    iterations = gradient_descent(X, Y, Thetas, reg, alpha=0.1, max_iterations=5000, max_cost=1e-3, max_gradient=0)
    print("Cost after optimization :", J(Thetas), end='\n'*2)
    print("Iterations :", iterations)
    print("Thetas :", end='\n'*2)
    for Theta in Thetas :
        print(Theta, end='\n'*2)
	# -- coding: utf-8 --
	'''
	A module containing artificial neural networks functions.
	Created by Hamza Abbad.
	'''
	from numpy import exp,ones,hstack,ndarray,array,zeros,sum,log,zeros_like,any,abs,sqrt
	from numbers import Real, Integral

	def sigmoid(Z):
	# Z is a 2 dimensional array.
	'''Calculates the sigmoid of the elements of the array'''
	return 1 / (1 + exp(-Z))

	def sigmoid_gradient(Z):
	# Z is a 2 dimensional array.
	'''Calculates the gradient of the sigmoid function for an array'''
	sig = sigmoid(Z)
	return sig * (1-sig)

	def one_of_m(y, possible_labels):
	# y is a 1 dimensional lables array, possible_labels is the number of possible labels.
	'''
	Returns a M*K array, M is the length of y (the number of examples), K is is the number of possible labels.
	'''
	Y = zeros((y.shape[0], possible_labels), dtype=int)
	for i in range(y.shape[0]):
	Y[i,y[i]] = 1
	return Y

	def feedforward(X, Thetas, activation_function = sigmoid, keep_Z = False):
	# X is 2 dimensional array of data, Thetas is a list of 2 dimensional arrays of weights.
	'''
	Performs the forword propagation of an artificial neural network and calculates the output from :
	- X : M*N array of examples, M is the number of examples, N is the number of features.
	- Thetas : List of L arrays, L is the number of layers of the network minus one, each array has a dimension of Ui*Vi
	where Ui is the number of neurons in the layer i excluding the bias unit and Vi is number of neurons in the layer i-1
	including the bias. For the wights of the first hidden layer, Vi = N (number of inputs). For the weights of the output layer,
	Ui = number of output
	'''
	A = hstack((ones((X.shape[0],1)), X)) # Add the Column X0
	if keep_Z :
	Zs = []
	for i in range(len(Thetas)):
	Z = A.dot(Thetas[i].transpose())
	if keep_Z :
	Zs.append(Z)
	A = hstack((ones((A.shape[0],1)), activation_function(Z))) # Column of ones represents the bias unit
	if keep_Z :
	return (A[:,1:], Zs)
	return A[:,1:]

	def backpropagation(X, Y, H, Thetas, Zs, activation_function = sigmoid, activation_gradient = sigmoid_gradient, reg_lambda = 0):
	# X,Y,H,Thetas,Zs are all 2 dimensional arrays.
	'''
	Performs a backpropagation pass and calculates the gradient of an artificial neural network from :
	- X : M*N array of examples, M is the number of examples, N is the number of features.
	- Y : M*K array of labels, M is the number of examples, K is the number of units in the output layer.
	- H : M*K array of predicted values returned by the feedforward pass.
	- Thetas : List of L arrays, L is the number of layers of the network minus one, in other words, the number of weight arrays.
	- Zs : List of L arrays returned by the feedforward pass.
	- reg_lambda : The constant of regularization.
	'''
	X = hstack((ones((X.shape[0],1)), X)) # Add the Column X0
	Deltas = [zeros_like(Theta) for Theta in Thetas]
	As = [hstack((ones((X.shape[0],1)), activation_function(Z))) for Z in Zs] # Activation values
	for m in range(X.shape[0]): # For each example
	delta = (H[m:m+1,:] - Y[m:m+1,:]).transpose()
	for i in range(len(Deltas)-1, 0, -1) :
	Deltas[i] += delta.dot(As[i-1][m:m+1,:])
	delta = Thetas[i].transpose().dot(delta)[1:,:] * activation_gradient(Zs[i-1][m:m+1,:]).transpose()
	Deltas[0] += delta.dot(X[m:m+1,:])
	for i in range(len(Deltas)):
	Deltas[i] /= X.shape[0]
	Deltas[i][:,1:] += reg_lambda / X.shape[0] * Thetas[i][:,1:]
	return Deltas

	def cost(X, Y, H, Thetas, reg_lambda = 0):
	'''
	Calculates the cost of the weights in an artificial neural network.
	'''
	J = -1 / X.shape[0] * sum(Y * log(H) + (1 - Y) * log(1 - H)) # Original cost function.
	if reg_lambda != 0:
	regularization = 0 # Add the regularization.
	for Theta in Thetas:
	regularization += sum(Theta[:,1:] ** 2)
	regularization = reg_lambda / (2 X.shape[0])
	J += regularization
	return J

	def _numerical_gradient_check(cost_function, Thetas, epsilon = 1e-4):
	'''
	Calculates the numerical gradient of an artificial neural network.
	Used only for checking the implementation of the backpropagation.
	'''
	numerical_gradient = [zeros_like(Theta) for Theta in Thetas]
	for i in range(len(Thetas)):
	Theta = Thetas[i]
	perturb = zeros_like(Theta)
	for p1 in range(Theta.shape[0]):
	for p2 in range(Theta.shape[1]):
	perturb[p1,p2] = epsilon
	Thetas[i] = Theta - perturb
	loss1 = cost_function(Thetas)
	Thetas[i] = Theta + perturb
	loss2 = cost_function(Thetas)
	numerical_gradient[i][p1, p2] = (loss2 - loss1) / (2 * epsilon)
	perturb[p1,p2] = 0
	Thetas[i] = Theta
	return numerical_gradient

	def _check_parameters(X = None, y = None, possible_labels = None, Y = None, H = None, Thetas = None, Zs = None, reg_lambda = None, epsilon = None):
	'''
	Checks the parameters sent to other functions.
	'''
	if not (X is None) :
	if not isinstance(X, ndarray) :
	raise TypeError("X must be a NumPy array")
	if X.ndim != 2 :
	raise ValueError("X must be 2 dimensional array")
	if not (y is None) :
	if not isinstance(y, ndarray) :
	raise TypeError("y must be a NumPy array")
	if y.ndim != 1 :
	raise ValueError("y must be 1 dimensional array")
	if not (possible_labels is None) :
	if not isinstance(possible_labels, Integral) :
	raise TypeError("possible_labels must be an integer")
	if possible_labels <= 0 :
	raise ValueError("possible_labels must be positive")
	if any(y < 0) or any(y >= possible_labels) :
	raise ValueError("All labels in y must be between 0 inclusive and possible_labels exclusive")
	if not (Y is None) :
	if not isinstance(Y, ndarray) :
	raise TypeError("Y must be a NumPy array")
	if Y.ndim != 2 :
	raise ValueError("Y must be 2 dimensional array")
	if not (H is None) :
	if not isinstance(H, ndarray) :
	raise TypeError("H must be a NumPy array")
	if H.shape != Y.shape :
	raise ValueError("H and Y must have the same dimensions")
	if not (Thetas is None) :
	if not isinstance(Thetas, list) :
	raise TypeError("Thetas must be a list")
	for i in range(len(Thetas)):
	if not isinstance(Thetas[i], ndarray) :
	raise TypeError("Thetas["+str(i)+"] must be a NumPy array")
	if Thetas[i].ndim != 2 :
	raise ValueError("Thetas["+str(i)+"] must be 2 dimensional array")
	if not (X is None) :
	if Thetas[0].shape[1]-1 != X.shape[1] :
	raise ValueError("The second dimension of X must be equal to the second dimension of Thetas[0] minus one")
	for i in range(1, len(Thetas)-1):
	if Thetas[i].shape[1]-1 != Thetas[i-1].shape[0] :
	raise ValueError("The first dimension of Thetas["+str(i-1)+"] must be equal to the second dimension of Thetas["+str(i)+"] minus one")
	if not (Y is None) and Thetas[-1].shape[0] != Y.shape[1] :
	raise ValueError("The first dimension of Thetas["+len(Thetas)-1+"] must be equal to the second dimension of Y")
	if not (Zs is None) :
	if not isinstance(Zs, list) :
	raise TypeError("Zs must be a list")
	if not (Thetas is None) and len(Zs) != len(Thetas) :
	raise ValueError("Zs and Thetas must have the same length")
	for i in range(len(Zs)):
	if not isinstance(Zs[i], ndarray) :
	raise TypeError("Zs["+str(i)+"] must be a NumPy array")
	if Zs[i].ndim != 2 :
	raise ValueError("Zs["+str(i)+"] must be 2 dimensional array")
	if not (X is None) and Zs[i].shape[0] != X.shape[0] :
	raise ValueError("The first dimension of Zs["+str(i)+"] must be equal to the first dimension of X")
	if not (Thetas is None) and Zs[i].shape[1] != Thetas[i].shape[0] :
	raise ValueError("The first dimension of Zs["+str(i)+"] must be equal to the second dimension of Thetas["+str(i)+"]")
	if not (reg_lambda is None) :
	if not isinstance(reg_lambda, Real) :
	raise TypeError("reg_lambda must be a real number")
	if reg_lambda < 0 :
	raise ValueError("reg_lambda can't be negative")
	if not (epsilon is None) :
	if not isinstance(epsilon, Real) :
	raise TypeError("epsilon must be a real number")
	if epsilon <= 0 :
	raise ValueError("epsilon must be positive")

	def gradient_descent(X, Y, Thetas, regularization_const, alpha, max_iterations, max_cost, max_gradient):
	H, Zs = feedforward(X, Thetas, keep_Z = True)
	J = cost(X, Y, H, Thetas, regularization_const)
	gradient = backpropagation(X, Y, H, Thetas, Zs, reg_lambda = regularization_const)
	for j in range(len(Thetas)):
	Thetas[j] -= alpha * gradient[j]
	i = 1
	gradient_value = sqrt(sum(array([sum(g**2) for g in gradient])))
	while i < max_iterations and J > max_cost and gradient_value > max_gradient :
	H, Zs = feedforward(X, Thetas, keep_Z = True)
	gradient = backpropagation(X, Y, H, Thetas, Zs, reg_lambda = regularization_const)
	for j in range(len(Thetas)):
	Thetas[j] -= alpha * gradient[j]
	J = cost(X, Y, H, Thetas, regularization_const)
	gradient_value = sqrt(sum(array([sum(g**2) for g in gradient])))
	i += 1
	return i

	if __name__ == '__main__':

	X = array([
	[0.54030, -0.41615],
	[-0.98999, -0.65364],
	[0.28366, 0.96017]
	])
	Theta1 = array([
	[ 0.66294134, 0.5955722 , 0.55686872],
	[ 0.89952311, -0.25349201, 0.20393105]
	])
	Theta2 = array([
	[-0.12538753, 0.71265549, 0.70997811],
	[ 1.07216431, 0.02318498, 0.39948136],
	[ 1.01592775, -0.20193439, -0.2109316 ],
	[ 1.17180091, -0.21056782, 0.63941167]
	])
	Theta3 = array([
	[-0.12275993, 1.3893123 , 1.46562893, 0.29220495, 1.45709864],
	[-0.40565099, 1.05983036, 1.04174971, 0.27491176, 1.27062198],
	[ 0.99863495, -0.27216449, 1.39228261, 1.30152133, -0.03507692],
	[ 0.96217801, 0.62635206, -0.00421884, 1.37500325, -0.33949522]
	])
	Thetas = [Theta1, Theta2, Theta3]
	y = array([3,1,2])
	reg = 1
	# _check_parameters(Thetas=Thetas, y=y, reg_lambda=reg)
	H, Zs = feedforward(X, Thetas, keep_Z = True)
	print("H",H)
	Y = one_of_m(y, Thetas[-1].shape[0])
	print("Y",Y)
	# _check_parameters(X=X, Y=Y, H=H, Zs=Zs, Thetas=Thetas)
	grad = backpropagation(X, Y, H, Thetas, Zs, reg_lambda = reg)
	J = lambda t: cost(X,Y,feedforward(X,t),t,reg)
	print("Cost before optimization :",J(Thetas))
	iterations = gradient_descent(X, Y, Thetas, reg, alpha=0.1, max_iterations=5000, max_cost=1e-3, max_gradient=0)
	print("Cost after optimization :", J(Thetas), end='\n'*2)
	print("Iterations :", iterations)
	print("Thetas :", end='\n'*2)
	for Theta in Thetas :
	print(Theta, end='\n'*2)