Created
April 5, 2017 08:31
-
-
Save reddragon/91c023a123b8aab4c200183173e583fc to your computer and use it in GitHub Desktop.
Linear Regression in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Linear Regression From First Principles | |
Author: Gaurav Menghani (gaurav.menghani@gmail.com) | |
''' | |
import numpy as np | |
import matplotlib.pyplot as plt | |
def linear_sum(X, W, b): | |
return X.dot(W) + b | |
def data_gen(num_rows, num_feats, op_gen_fn=linear_sum): | |
data = {} | |
W = 0.1 * np.random.randn(num_feats) | |
b = 0.1 * np.random.randn() | |
data['X'] = np.random.randn(num_rows, num_feats) | |
data['y'] = op_gen_fn(data['X'], W, b) | |
return data | |
class LinearRegression(object): | |
def __init__(self): | |
self.W = None | |
self.b = 0 | |
def init_matrix(self, X): | |
self.W = 0.1 * np.random.randn(X.shape[1]) | |
self.b = 0.1 * np.random.randn() | |
def predict(self, X): | |
if self.W is None: | |
self.init_matrix(X) | |
return X.dot(self.W) + self.b | |
def diff(self, pred, y): | |
return pred-y | |
def l1_loss(self, pred, y): | |
return reduce(lambda a,b: a+b, self.diff(pred, y)) * 1.0 / len(y) | |
def l2_loss(self, pred, y): | |
return reduce(lambda a,b: a+b*b, self.diff(pred, y)) * 1.0 / len(y) | |
def loss(self, pred, y): | |
return self.l2_loss(pred, y) | |
def train(self, data, iters, lr): | |
X = data['X'] | |
y = data['y'] | |
orig_pred = self.predict(X) | |
orig_l1 = self.l1_loss(orig_pred, y) | |
orig_l2 = self.l2_loss(orig_pred, y) | |
print_every = 50 | |
l1 = orig_l1 | |
l2 = orig_l2 | |
l1_losses = [] | |
l2_losses = [] | |
l1_losses.append(l1) | |
l2_losses.append(l2) | |
for it in range(iters): | |
pred = self.predict(X) | |
s1 = (pred - y) | |
s2 = np.multiply(X, np.repeat(s1, X.shape[1])\ | |
.reshape(X.shape[0], X.shape[1])) | |
wGrad = np.sum(s2, axis=0) / X.shape[0] | |
bGrad = np.sum(s1) * 1.0 / X.shape[0] | |
wDelta = -lr * wGrad | |
bDelta = -lr * bGrad | |
self.W += wDelta | |
self.b += bDelta | |
l1 = self.l1_loss(pred, y) | |
l2 = self.l2_loss(pred, y) | |
l1_losses.append(l1) | |
l2_losses.append(l2) | |
if it % print_every == 0: | |
print 'Iteration: ' + str(it) | |
print 'L1 loss: %Lf, L2 loss: %Lf' % (l1, l2) | |
print '---' | |
print '\n====' | |
print 'Original L1 loss: %Lf, L2 loss: %Lf ' % (orig_l1, orig_l2) | |
print 'Final L1 loss: %Lf, L2 loss: %Lf' % (l1, l2) | |
print 'Estimated Params: ' | |
print '- b: ', self.b | |
print '- W: ', self.W | |
print '====\n' | |
fig = plt.figure() | |
ax = fig.add_subplot(111) | |
ax.set_title('Linear Regression Over %d Variables' % X.shape[1]) | |
ax.set_xlabel('Iterations') | |
ax.set_ylabel('L2 Loss') | |
plt.gca().set_ylim(ymin=0) | |
plt.plot(l2_losses, label='lr') | |
plt.show() | |
np.random.seed(2) | |
data = data_gen(100, 5) | |
# Persist training data | |
data_file = open('lin_reg_data', 'w') | |
for xd,yd in zip(data['X'], data['y']): | |
line = str(yd) + ',' + ','.join(str(x) for x in xd) + "\n" | |
data_file.write(line) | |
data_file.close() | |
regression = LinearRegression() | |
predictions = regression.predict(data['X']) | |
loss = regression.loss(predictions, data['y']) | |
regression.train(data, iters=1000, lr=0.01) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Validating the solution in R: