leonaburime/gradientDescent.py

## gradientDescent.py
from __future__ import division
import numpy as np
import math, pdb
from sklearn import linear_model


#http://stackoverflow.com/questions/17784587/gradient-descent-using-python-and-numpy
def genData(numPoints, bias, variance):
	x = np.zeros(shape=(numPoints, 1))
	y = np.zeros(shape=numPoints)
	# basically a straight line
	for i in range(0, numPoints):
		# bias feature
		x[i] = i
		# our target variable
		y[i] = (i + bias) + np.random.uniform(0, 1) * variance
	return x, y

X, y = genData(100, 25, 10)

def prependOnesColumn(X):

	# Lets do some preprocessing to add a column of ones for intercept(x^0) values
	# This series of steps is being done so if you have a nx1 or nxm array it will prepend a column of ones

	num_of_rows =X.shape[0] # First get the dimensions
	new_matrix  = np.insert( X.flatten('F'), 0, np.ones(num_of_rows)) # Next flatten out the matrix and insert ones at the zeroth index
	X = new_matrix.reshape( num_of_rows, int(X.size/num_of_rows + 1), order='F') # Our new X with a prepended column of ones

	return X

#A much simpler, faster, and waaay more efficient way of doing linear regression.
def normalEquation(X,y):
	X, y = np.array(X), np.array(y)


	X = prependOnesColumn(X)

	#Normal Equation - Solve for x at http://mathworld.wolfram.com/NormalEquation.html
	A_inv = np.linalg.inv( np.dot( X.T, X) )
	B = np.dot( X.T, y)
	theta = np.dot(A_inv, B)

	print 'Thetas are ' + str( theta )


class linearRegression:

	learning_rate = 0.0005
	max_iterations = 1000

	def __init__(self):
		pass

	def hypothesis(self, X, thetas):
		return  np.dot(X, thetas)

	def getCost(self, X, Y, thetas):

		m = X.shape[0]
		cost = (1.0/(2*m))*np.sum( (self.hypothesis(X,thetas)-Y)**2)

		return cost

	def solve(self, X=X, Y=y ):

		X = prependOnesColumn(X)

		num_of_rows, num_of_columns = X.shape[0], X.shape[1] # Number of rows
		thetas = np.ones( num_of_columns,)# Create and initialize thetas
		cost = [0]*num_of_columns

		max_iterations = 100000


		for i in range(max_iterations):

			error = (self.hypothesis(X,thetas).T - Y.T)
			j_derivative = np.dot(error, X)
			thetas -= (self.learning_rate/num_of_rows) * j_derivative

			print 'Cost ', self.getCost(X,y, thetas)


		print 'Thetas ' , thetas
	from __future__ import division
	import numpy as np
	import math, pdb
	from sklearn import linear_model




	#http://stackoverflow.com/questions/17784587/gradient-descent-using-python-and-numpy
	def genData(numPoints, bias, variance):
	x = np.zeros(shape=(numPoints, 1))
	y = np.zeros(shape=numPoints)
	# basically a straight line
	for i in range(0, numPoints):
	# bias feature
	x[i] = i
	# our target variable
	y[i] = (i + bias) + np.random.uniform(0, 1) * variance
	return x, y

	X, y = genData(100, 25, 10)

	def prependOnesColumn(X):

	# Lets do some preprocessing to add a column of ones for intercept(x^0) values
	# This series of steps is being done so if you have a nx1 or nxm array it will prepend a column of ones

	num_of_rows =X.shape[0] # First get the dimensions
	new_matrix = np.insert( X.flatten('F'), 0, np.ones(num_of_rows)) # Next flatten out the matrix and insert ones at the zeroth index
	X = new_matrix.reshape( num_of_rows, int(X.size/num_of_rows + 1), order='F') # Our new X with a prepended column of ones

	return X

	#A much simpler, faster, and waaay more efficient way of doing linear regression.
	def normalEquation(X,y):
	X, y = np.array(X), np.array(y)


	X = prependOnesColumn(X)

	#Normal Equation - Solve for x at http://mathworld.wolfram.com/NormalEquation.html
	A_inv = np.linalg.inv( np.dot( X.T, X) )
	B = np.dot( X.T, y)
	theta = np.dot(A_inv, B)

	print 'Thetas are ' + str( theta )


	class linearRegression:

	learning_rate = 0.0005
	max_iterations = 1000

	def __init__(self):
	pass

	def hypothesis(self, X, thetas):
	return np.dot(X, thetas)

	def getCost(self, X, Y, thetas):

	m = X.shape[0]
	cost = (1.0/(2m))np.sum( (self.hypothesis(X,thetas)-Y)**2)

	return cost

	def solve(self, X=X, Y=y ):

	X = prependOnesColumn(X)

	num_of_rows, num_of_columns = X.shape[0], X.shape[1] # Number of rows
	thetas = np.ones( num_of_columns,)# Create and initialize thetas
	cost = [0]*num_of_columns

	max_iterations = 100000


	for i in range(max_iterations):

	error = (self.hypothesis(X,thetas).T - Y.T)
	j_derivative = np.dot(error, X)
	thetas -= (self.learning_rate/num_of_rows) * j_derivative

	print 'Cost ', self.getCost(X,y, thetas)




	print 'Thetas ' , thetas