GermanCM/gradient_descent_optimization.py

## gradient_descent_optimization.py
import math
import numpy as np

# get the overall cost of the model
def compute_cost(X, y, coeff):
    '''
    inputs:
      * 'X': features matrix (independent variables)
      * 'y': target values (dependent variable)
      * 'coeff': regression coefficients
    output:
      * the mean squared error (MSE)
    '''
    squared_errors = np.power((predict_output(X, coeff) - y), 2)
    return np.sum(squared_errors) / (len(X))

# returns the dependent variable (y axis) value which the model assigns to a certain independent variable (x axis) value
def predict_output(feature_matrix, coefficients):
    '''
    inputs:
        * feature_matrix: two-dimensions array of the data points, where each columns is a feature and a row a point
        * coefficients: one-dimension array of estimated feature coefficients

    output:
        * one-dimension array of predictions
    '''
    predictions = np.dot(feature_matrix, coefficients)
    return predictions

# derivative of the cost function, the heart of the process which gradient descent will use to minimize the cost
def feature_derivative(errors, feature):
    N = len(feature)
    derivative = (2)*np.dot(errors, feature)
    return(derivative)

# gradient descent optimization algorithm (GD)
def gradient_descent_regression(H, y, initial_coefficients, alpha, max_iterations=10000):
    '''
    inputs:
        * H: two-dimensions array of data points, where each columns is a feature and a row a point
        * y: one-dimension array with target values (dependent variable)
        * initial_coefficients: one-dimension array of initial coefficients (weights of the regression)
        * alpha: float value, it is the step size with which GD will "walk" towards the optimum values
        * max_iterations: int, tells the program when to terminate, based on the number of epochs (i.e. runs of the algorithm)

    output - one-dimensional array of estimated regression coefficients
    '''
    converged = False
    w = initial_coefficients
    iteration = 0
    cost=[]
    while iteration < max_iterations:
        pred = predict_output(H, w)
        residuals = pred-y
        gradient_sum_squares = 0
        for i in range(len(w)):
            partial = feature_derivative(residuals, H[:, i])
            gradient_sum_squares += partial**2
            w[i] = w[i] - alpha*partial
        iteration += 1
        cost.append(compute_cost(H, y, w))
    return w, cost
	import math
	import numpy as np

	# get the overall cost of the model
	def compute_cost(X, y, coeff):
	'''
	inputs:
	* 'X': features matrix (independent variables)
	* 'y': target values (dependent variable)
	* 'coeff': regression coefficients
	output:
	* the mean squared error (MSE)
	'''
	squared_errors = np.power((predict_output(X, coeff) - y), 2)
	return np.sum(squared_errors) / (len(X))

	# returns the dependent variable (y axis) value which the model assigns to a certain independent variable (x axis) value
	def predict_output(feature_matrix, coefficients):
	'''
	inputs:
	* feature_matrix: two-dimensions array of the data points, where each columns is a feature and a row a point
	* coefficients: one-dimension array of estimated feature coefficients

	output:
	* one-dimension array of predictions
	'''
	predictions = np.dot(feature_matrix, coefficients)
	return predictions

	# derivative of the cost function, the heart of the process which gradient descent will use to minimize the cost
	def feature_derivative(errors, feature):
	N = len(feature)
	derivative = (2)*np.dot(errors, feature)
	return(derivative)

	# gradient descent optimization algorithm (GD)
	def gradient_descent_regression(H, y, initial_coefficients, alpha, max_iterations=10000):
	'''
	inputs:
	* H: two-dimensions array of data points, where each columns is a feature and a row a point
	* y: one-dimension array with target values (dependent variable)
	* initial_coefficients: one-dimension array of initial coefficients (weights of the regression)
	* alpha: float value, it is the step size with which GD will "walk" towards the optimum values
	* max_iterations: int, tells the program when to terminate, based on the number of epochs (i.e. runs of the algorithm)

	output - one-dimensional array of estimated regression coefficients
	'''
	converged = False
	w = initial_coefficients
	iteration = 0
	cost=[]
	while iteration < max_iterations:
	pred = predict_output(H, w)
	residuals = pred-y
	gradient_sum_squares = 0
	for i in range(len(w)):
	partial = feature_derivative(residuals, H[:, i])
	gradient_sum_squares += partial**2
	w[i] = w[i] - alpha*partial
	iteration += 1
	cost.append(compute_cost(H, y, w))
	return w, cost