-
-
Save Andrew62/1ec169fad18b6df6ef464ec8df246fc7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import time | |
def computeError(intercept, slope, x_data, y_data): | |
# you originally were using x_train which was defined as a global | |
# above. In general global variables are BAD | |
# again, pandas and numpy will handle this broadcast | |
y_pred = (intercept + (slope * x_data)) | |
sum_ = ((y_pred - y_data)**2).sum() # can do .sum here since we're using numpy/pandas | |
return sum_/(len(x_data)) | |
def costFunction(intercept, slope, y_train, x_train, learning_rate): | |
#go through each value | |
count = len(x_train) | |
# this will broadcast across the dataframe | |
y_pred = intercept + (slope * x_train) | |
sum0 = ((y_pred - y_train) / count).sum() | |
sum1 = (((y_pred - y_train) * x_train) / count).sum() | |
temp0 = intercept - (learning_rate * sum0) | |
temp1 = slope - (learning_rate * sum1) | |
# don't need to wrap these in [] b/c the comma here defines a tuple | |
return temp0, temp1 | |
def graphResult(value0, value1, x_train, y_train): | |
plt.title('Linear Regression') | |
plt.xlabel('X Training Data') | |
plt.ylabel('Y Training Data') | |
plt.scatter(x_train,y_train) | |
x = np.linspace(x_train.min(), x_train.max()) | |
y_pred = value0 + value1 * x | |
plt.plot(x, y_pred) | |
plt.show(); | |
def main(): | |
# changed run() to main() b/c it's more conventional | |
start = time.time() | |
# This is the old data that didn't really have a linear correlation | |
# need to specify no header otherwise you will use the first like of data as col headers | |
d = pd.read_csv('data.csv', header=None) | |
d.columns = ["X", "y"] | |
x_train = d['X'] | |
y_train = d['y'] | |
value0 = 0 | |
value1 = 0 | |
learning_rate = 0.00001 | |
iterations = 1000 | |
print("Starting gradient descent linear regression with theta0: " | |
+ str(value0) + " theta1: " + str(value1)) | |
print("Working...") | |
# for each number in iterations run the cost function | |
# you need to have a loop here | |
for i in range(iterations): | |
value0, value1 = costFunction(value0, value1, y_train, x_train, learning_rate) | |
graphResult(value0, value1, x_train, y_train) | |
error = computeError(value0,value1,x_train,y_train) | |
end = time.time() | |
print("Finsihed with theta0: " + str(value0) + | |
" theta1: " + str(value1) + " iterations: " | |
+ str(iterations) + " time elapsed: " | |
+ str(round(end-start, 3)) + "s" | |
+ " error: " + str(error)) | |
# this construct ensures the main function is run when this file is run as | |
# the primary entry point | |
if __name__ == "__main__": | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment