Skip to content

Instantly share code, notes, and snippets.

@reddragon
Created April 5, 2017 08:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save reddragon/91c023a123b8aab4c200183173e583fc to your computer and use it in GitHub Desktop.
Save reddragon/91c023a123b8aab4c200183173e583fc to your computer and use it in GitHub Desktop.
Linear Regression in Python
'''
Linear Regression From First Principles
Author: Gaurav Menghani (gaurav.menghani@gmail.com)
'''
import numpy as np
import matplotlib.pyplot as plt
def linear_sum(X, W, b):
return X.dot(W) + b
def data_gen(num_rows, num_feats, op_gen_fn=linear_sum):
data = {}
W = 0.1 * np.random.randn(num_feats)
b = 0.1 * np.random.randn()
data['X'] = np.random.randn(num_rows, num_feats)
data['y'] = op_gen_fn(data['X'], W, b)
return data
class LinearRegression(object):
def __init__(self):
self.W = None
self.b = 0
def init_matrix(self, X):
self.W = 0.1 * np.random.randn(X.shape[1])
self.b = 0.1 * np.random.randn()
def predict(self, X):
if self.W is None:
self.init_matrix(X)
return X.dot(self.W) + self.b
def diff(self, pred, y):
return pred-y
def l1_loss(self, pred, y):
return reduce(lambda a,b: a+b, self.diff(pred, y)) * 1.0 / len(y)
def l2_loss(self, pred, y):
return reduce(lambda a,b: a+b*b, self.diff(pred, y)) * 1.0 / len(y)
def loss(self, pred, y):
return self.l2_loss(pred, y)
def train(self, data, iters, lr):
X = data['X']
y = data['y']
orig_pred = self.predict(X)
orig_l1 = self.l1_loss(orig_pred, y)
orig_l2 = self.l2_loss(orig_pred, y)
print_every = 50
l1 = orig_l1
l2 = orig_l2
l1_losses = []
l2_losses = []
l1_losses.append(l1)
l2_losses.append(l2)
for it in range(iters):
pred = self.predict(X)
s1 = (pred - y)
s2 = np.multiply(X, np.repeat(s1, X.shape[1])\
.reshape(X.shape[0], X.shape[1]))
wGrad = np.sum(s2, axis=0) / X.shape[0]
bGrad = np.sum(s1) * 1.0 / X.shape[0]
wDelta = -lr * wGrad
bDelta = -lr * bGrad
self.W += wDelta
self.b += bDelta
l1 = self.l1_loss(pred, y)
l2 = self.l2_loss(pred, y)
l1_losses.append(l1)
l2_losses.append(l2)
if it % print_every == 0:
print 'Iteration: ' + str(it)
print 'L1 loss: %Lf, L2 loss: %Lf' % (l1, l2)
print '---'
print '\n===='
print 'Original L1 loss: %Lf, L2 loss: %Lf ' % (orig_l1, orig_l2)
print 'Final L1 loss: %Lf, L2 loss: %Lf' % (l1, l2)
print 'Estimated Params: '
print '- b: ', self.b
print '- W: ', self.W
print '====\n'
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_title('Linear Regression Over %d Variables' % X.shape[1])
ax.set_xlabel('Iterations')
ax.set_ylabel('L2 Loss')
plt.gca().set_ylim(ymin=0)
plt.plot(l2_losses, label='lr')
plt.show()
np.random.seed(2)
data = data_gen(100, 5)
# Persist training data
data_file = open('lin_reg_data', 'w')
for xd,yd in zip(data['X'], data['y']):
line = str(yd) + ',' + ','.join(str(x) for x in xd) + "\n"
data_file.write(line)
data_file.close()
regression = LinearRegression()
predictions = regression.predict(data['X'])
loss = regression.loss(predictions, data['y'])
regression.train(data, iters=1000, lr=0.01)
@reddragon
Copy link
Author

Validating the solution in R:

> df <- read.delim(file="lin_reg_data", header=F, sep=",")
> lm(df[,1] ~ df[,2]+df[,3]+df[,4]+df[,5]+df[,6])

Call:
lm(formula = df[, 1] ~ df[, 2] + df[, 3] + df[, 4] + df[, 5] +
    df[, 6])

Coefficients:
(Intercept)      df[, 2]      df[, 3]      df[, 4]      df[, 5]      df[, 6]
  -0.084175    -0.041676    -0.005627    -0.213620     0.164027    -0.179344

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment