Skip to content

Instantly share code, notes, and snippets.

@Andrew62
Last active August 27, 2017 08:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Andrew62/1ec169fad18b6df6ef464ec8df246fc7 to your computer and use it in GitHub Desktop.
Save Andrew62/1ec169fad18b6df6ef464ec8df246fc7 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import time
def computeError(intercept, slope, x_data, y_data):
# you originally were using x_train which was defined as a global
# above. In general global variables are BAD
# again, pandas and numpy will handle this broadcast
y_pred = (intercept + (slope * x_data))
sum_ = ((y_pred - y_data)**2).sum() # can do .sum here since we're using numpy/pandas
return sum_/(len(x_data))
def costFunction(intercept, slope, y_train, x_train, learning_rate):
#go through each value
count = len(x_train)
# this will broadcast across the dataframe
y_pred = intercept + (slope * x_train)
sum0 = ((y_pred - y_train) / count).sum()
sum1 = (((y_pred - y_train) * x_train) / count).sum()
temp0 = intercept - (learning_rate * sum0)
temp1 = slope - (learning_rate * sum1)
# don't need to wrap these in [] b/c the comma here defines a tuple
return temp0, temp1
def graphResult(value0, value1, x_train, y_train):
plt.title('Linear Regression')
plt.xlabel('X Training Data')
plt.ylabel('Y Training Data')
plt.scatter(x_train,y_train)
x = np.linspace(x_train.min(), x_train.max())
y_pred = value0 + value1 * x
plt.plot(x, y_pred)
plt.show();
def main():
# changed run() to main() b/c it's more conventional
start = time.time()
# This is the old data that didn't really have a linear correlation
# need to specify no header otherwise you will use the first like of data as col headers
d = pd.read_csv('data.csv', header=None)
d.columns = ["X", "y"]
x_train = d['X']
y_train = d['y']
value0 = 0
value1 = 0
learning_rate = 0.00001
iterations = 1000
print("Starting gradient descent linear regression with theta0: "
+ str(value0) + " theta1: " + str(value1))
print("Working...")
# for each number in iterations run the cost function
# you need to have a loop here
for i in range(iterations):
value0, value1 = costFunction(value0, value1, y_train, x_train, learning_rate)
graphResult(value0, value1, x_train, y_train)
error = computeError(value0,value1,x_train,y_train)
end = time.time()
print("Finsihed with theta0: " + str(value0) +
" theta1: " + str(value1) + " iterations: "
+ str(iterations) + " time elapsed: "
+ str(round(end-start, 3)) + "s"
+ " error: " + str(error))
# this construct ensures the main function is run when this file is run as
# the primary entry point
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment