Last active
August 6, 2017 18:34
-
-
Save codeboy101/d9c17515a0f2c3a5be0745d357d5dfe5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
def normalize(data): | |
return data/data.mean() | |
def predict(x, weights): | |
hypothesis = np.dot(x, weights) | |
return hypothesis | |
def gradient_descent(X, weights, y, lr): | |
delta_weights = weights | |
for x in range(len(X)): | |
for _ in range(len(weights)): | |
hypothesis = predict(X[x], delta_weights) | |
err = hypothesis - y[x] | |
q = np.multiply(np.multiply(X[x].T,lr), err) | |
delta_weights = delta_weights + q | |
weights = delta_weights | |
return delta_weights | |
def calculate_error(hypothesis, y): | |
return np.sum((hypothesis - y)**2)/2 | |
def mean_error(total, length): | |
return total/length | |
def train(X, y, weights, lr, epoch, show=True, test_set=None): | |
print('training for {} epochs'.format(epoch)) | |
for e in range(epoch): | |
weights = gradient_descent(X, weights, y, lr) | |
if test_set: | |
print('testing') | |
total = 0 | |
for x in range(test_set[0].shape[0]): | |
prediction = predict(test_set[0][x], weights) | |
test_error = calculate_error(prediction, test_set[1][x]) | |
total += test_error | |
print(mean_error(total, test_error)) | |
return weights | |
df = pd.read_csv('train.csv') | |
df = df[['LotArea', 'SalePrice']] | |
extra_col = pd.Series(np.ones(df.shape[0]), name='Ones') | |
df = df.join(extra_col, how='inner') | |
features = np.matrix(df[['LotArea', 'Ones']]) | |
labels = np.array(df['SalePrice']) | |
weights = np.array([np.random.random(1) for r in range(features.shape[1])]) | |
features = normalize(features) | |
labels = normalize(labels) | |
labels = labels.reshape(features.shape[0], 1) | |
train(features[:100], labels[:100], weights, 0.000000001, 100, | |
test_set=(features[100:200], labels[100:200])) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment