Linear Regression in Python
''' | |
Linear Regression From First Principles | |
Author: Gaurav Menghani (gaurav.menghani@gmail.com) | |
''' | |
import numpy as np | |
import matplotlib.pyplot as plt | |
def linear_sum(X, W, b): | |
return X.dot(W) + b | |
def data_gen(num_rows, num_feats, op_gen_fn=linear_sum): | |
data = {} | |
W = 0.1 * np.random.randn(num_feats) | |
b = 0.1 * np.random.randn() | |
data['X'] = np.random.randn(num_rows, num_feats) | |
data['y'] = op_gen_fn(data['X'], W, b) | |
return data | |
class LinearRegression(object): | |
def __init__(self): | |
self.W = None | |
self.b = 0 | |
def init_matrix(self, X): | |
self.W = 0.1 * np.random.randn(X.shape[1]) | |
self.b = 0.1 * np.random.randn() | |
def predict(self, X): | |
if self.W is None: | |
self.init_matrix(X) | |
return X.dot(self.W) + self.b | |
def diff(self, pred, y): | |
return pred-y | |
def l1_loss(self, pred, y): | |
return reduce(lambda a,b: a+b, self.diff(pred, y)) * 1.0 / len(y) | |
def l2_loss(self, pred, y): | |
return reduce(lambda a,b: a+b*b, self.diff(pred, y)) * 1.0 / len(y) | |
def loss(self, pred, y): | |
return self.l2_loss(pred, y) | |
def train(self, data, iters, lr): | |
X = data['X'] | |
y = data['y'] | |
orig_pred = self.predict(X) | |
orig_l1 = self.l1_loss(orig_pred, y) | |
orig_l2 = self.l2_loss(orig_pred, y) | |
print_every = 50 | |
l1 = orig_l1 | |
l2 = orig_l2 | |
l1_losses = [] | |
l2_losses = [] | |
l1_losses.append(l1) | |
l2_losses.append(l2) | |
for it in range(iters): | |
pred = self.predict(X) | |
s1 = (pred - y) | |
s2 = np.multiply(X, np.repeat(s1, X.shape[1])\ | |
.reshape(X.shape[0], X.shape[1])) | |
wGrad = np.sum(s2, axis=0) / X.shape[0] | |
bGrad = np.sum(s1) * 1.0 / X.shape[0] | |
wDelta = -lr * wGrad | |
bDelta = -lr * bGrad | |
self.W += wDelta | |
self.b += bDelta | |
l1 = self.l1_loss(pred, y) | |
l2 = self.l2_loss(pred, y) | |
l1_losses.append(l1) | |
l2_losses.append(l2) | |
if it % print_every == 0: | |
print 'Iteration: ' + str(it) | |
print 'L1 loss: %Lf, L2 loss: %Lf' % (l1, l2) | |
print '---' | |
print '\n====' | |
print 'Original L1 loss: %Lf, L2 loss: %Lf ' % (orig_l1, orig_l2) | |
print 'Final L1 loss: %Lf, L2 loss: %Lf' % (l1, l2) | |
print 'Estimated Params: ' | |
print '- b: ', self.b | |
print '- W: ', self.W | |
print '====\n' | |
fig = plt.figure() | |
ax = fig.add_subplot(111) | |
ax.set_title('Linear Regression Over %d Variables' % X.shape[1]) | |
ax.set_xlabel('Iterations') | |
ax.set_ylabel('L2 Loss') | |
plt.gca().set_ylim(ymin=0) | |
plt.plot(l2_losses, label='lr') | |
plt.show() | |
np.random.seed(2) | |
data = data_gen(100, 5) | |
# Persist training data | |
data_file = open('lin_reg_data', 'w') | |
for xd,yd in zip(data['X'], data['y']): | |
line = str(yd) + ',' + ','.join(str(x) for x in xd) + "\n" | |
data_file.write(line) | |
data_file.close() | |
regression = LinearRegression() | |
predictions = regression.predict(data['X']) | |
loss = regression.loss(predictions, data['y']) | |
regression.train(data, iters=1000, lr=0.01) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
Validating the solution in R: