Last active
February 6, 2018 16:22
-
-
Save baudm/f293696e30f88e5a4bad18a6bfedfc61 to your computer and use it in GitHub Desktop.
EE 298: Linear Regression via Gradient Descent
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import numpy as np | |
import matplotlib.pyplot as plt | |
EPSILON = np.finfo(float).eps | |
NUM_SAMPLES = 1000 | |
# Make the learning rate inversely proportional to the number of samples | |
# the gradient updates get proportionally bigger with sample size | |
LR = 0.1 / NUM_SAMPLES | |
# Noise as percentage of the range (y_max - y_min) | |
NOISE = 0.25 | |
def make_polynomial_func(coefficients): | |
while len(coefficients) < 4: | |
coefficients.insert(0, 0.) | |
coefficients = np.array(coefficients) | |
def f(x): | |
return x.dot(coefficients) | |
return f | |
def glorot_uniform(fan_in, fan_out): | |
"""Glorot/Xavier initializer""" | |
scale = 1. / max(1., float(fan_in + fan_out) / 2) | |
limit = np.sqrt(3. * scale) | |
shape = (fan_in, fan_out) | |
return np.random.uniform(-limit, limit, shape) | |
def generate_training_data(f): | |
# Generate inputs | |
x = np.random.uniform(-2., 2., (NUM_SAMPLES, 1)) | |
x = np.concatenate([x*x*x, x*x, x, np.ones_like(x)], axis=-1) | |
# Generate outputs | |
y = f(x).reshape(-1, 1) | |
# Add noise | |
n = NOISE * (y.max() - y.min()) | |
y += np.random.uniform(-n, n, y.shape).astype(y.dtype) | |
return x, y | |
def mean_squared_error(y_true, y_pred): | |
return np.mean(np.square(y_pred - y_true), axis=-1) | |
def train(x, y): | |
# Initialize weights | |
W = glorot_uniform(x.shape[-1], y.shape[-1]) | |
loss = sys.float_info.max | |
steps = 0 | |
lr = LR | |
while True: | |
# Get predictions | |
yp = x.dot(W) | |
# Use MSE as loss | |
last_loss = loss | |
loss = mean_squared_error(y, yp).mean() | |
# Decay when the loss increased since last iteration or didn't change | |
if loss > last_loss or np.abs(last_loss - loss) < 2 * EPSILON: | |
lr *= 0.9 | |
# Totally stop when the learning rate has become too low | |
if lr < 2 * EPSILON: | |
break | |
# Gradient: dloss/dyp | |
dloss = 2 * (yp - y) | |
# Gradient: dyp/dW | |
dyp = x | |
# Backprop to W: dloss/dW = dloss/dyp * dyp/dW | |
dW = dyp.T.dot(dloss) | |
# Update weights | |
W -= lr * dW | |
steps += 1 | |
print('step:', steps, 'loss:', loss, 'lr:', lr) | |
return W.squeeze() | |
def visualize(x, y, W): | |
fp = make_polynomial_func(W) | |
x_plot = x[:, 2] | |
plt.plot(x_plot, y, 'bo', label='y_true') | |
plt.plot(x_plot, fp(x), 'ro', label='y_pred') | |
plt.legend() | |
plt.xlabel('x') | |
plt.ylabel('y') | |
plt.show() | |
def main(): | |
poly_degree = len(sys.argv) - 2 | |
if poly_degree < 1 or poly_degree > 3: | |
print('Polynomial degree should be between 1 and 3') | |
exit(1) | |
coefficients = sys.argv[1:] | |
coefficients = list(map(float, coefficients)) | |
f = make_polynomial_func(coefficients) | |
x, y = generate_training_data(f) | |
W = train(x, y) | |
print('GT:', coefficients) | |
print('P: ', W) | |
visualize(x, y, W) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment