Skip to content

Instantly share code, notes, and snippets.

@codeboy101
Last active August 6, 2017 18:34
Show Gist options
  • Save codeboy101/d9c17515a0f2c3a5be0745d357d5dfe5 to your computer and use it in GitHub Desktop.
Save codeboy101/d9c17515a0f2c3a5be0745d357d5dfe5 to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
def normalize(data):
return data/data.mean()
def predict(x, weights):
hypothesis = np.dot(x, weights)
return hypothesis
def gradient_descent(X, weights, y, lr):
delta_weights = weights
for x in range(len(X)):
for _ in range(len(weights)):
hypothesis = predict(X[x], delta_weights)
err = hypothesis - y[x]
q = np.multiply(np.multiply(X[x].T,lr), err)
delta_weights = delta_weights + q
weights = delta_weights
return delta_weights
def calculate_error(hypothesis, y):
return np.sum((hypothesis - y)**2)/2
def mean_error(total, length):
return total/length
def train(X, y, weights, lr, epoch, show=True, test_set=None):
print('training for {} epochs'.format(epoch))
for e in range(epoch):
weights = gradient_descent(X, weights, y, lr)
if test_set:
print('testing')
total = 0
for x in range(test_set[0].shape[0]):
prediction = predict(test_set[0][x], weights)
test_error = calculate_error(prediction, test_set[1][x])
total += test_error
print(mean_error(total, test_error))
return weights
df = pd.read_csv('train.csv')
df = df[['LotArea', 'SalePrice']]
extra_col = pd.Series(np.ones(df.shape[0]), name='Ones')
df = df.join(extra_col, how='inner')
features = np.matrix(df[['LotArea', 'Ones']])
labels = np.array(df['SalePrice'])
weights = np.array([np.random.random(1) for r in range(features.shape[1])])
features = normalize(features)
labels = normalize(labels)
labels = labels.reshape(features.shape[0], 1)
train(features[:100], labels[:100], weights, 0.000000001, 100,
test_set=(features[100:200], labels[100:200]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment