Univariate Gradient Descent, in Python
def gradient_descent(training_examples, alpha=0.01): | |
""" | |
Apply gradient descent on the training examples to learn a line that fits through the examples | |
:param examples: set of all examples in (x,y) format | |
:param alpha = learning rate | |
:return: | |
""" | |
# initialize w0 and w1 to some small value, here just using 0 for simplicity | |
w0 = 0 | |
w1 = 0 | |
# repeat until "convergence", meaning that w0 and w1 aren't changing very much | |
# --> need to define what 'not very much' means, and that may depend on problem domain | |
convergence = False | |
while not convergence: | |
# initialize temporary variables, and set them to 0 | |
delta_w0 = 0 | |
delta_w1 = 0 | |
for pair in training_examples: | |
# grab our data points from the example | |
x_i = pair[0] | |
y_i = pair[1] | |
# calculate a prediction, and find the error | |
h_of_x_i = model_prediction(w0,w1,x_i) | |
delta_w0 += prediction_error(w0,w1, x_i, y_i) | |
delta_w1 += prediction_error(w0,w1,x_i,y_i)*x_i | |
# store previous weighting values | |
prev_w0 = w0 | |
prev_w1 = w1 | |
# get new weighting values | |
w0 = w0 + alpha*delta_w0 | |
w1 = w1 + alpha*delta_w1 | |
alpha -= 0.001 | |
# every few iterations print out current model | |
# 1. --> (w0 + w1x1 + w2x2 + ... + wnxn) | |
print "Current model is: ("+str(w0)+" + "+str(w1)+"x1)" | |
# 2. --> averaged squared error over training set, using the current line | |
summed_error = sum_of_squared_error_over_entire_dataset(w0, w1, training_examples) | |
avg_error = summed_error/len(training_examples) | |
print "Average Squared Error="+str(avg_error) | |
# check if we have converged | |
if abs(prev_w0 - w0) < 0.00001 and abs(prev_w1 - w1) < 0.00001: | |
convergence = True | |
# after convergence, print out the parameters of the trained model (w0, ... wn) | |
print "Parameters of trained model are: w0="+str(w0)+", w1="+str(w1) | |
return w0, w1 | |
############################ | |
##### TRAINING HELPERS ##### | |
############################ | |
def model_prediction(w0, w1, x_i): | |
return w0 + (w1 * x_i) | |
def prediction_error(w0, w1, x_i, y_i): | |
# basically, we just take the true value (y_i) | |
# and we subtract the predicted value from it | |
# this gives us an error, or J(w0,w1) value | |
return y_i - model_prediction(w0, w1, x_i) | |
def sum_of_squared_error_over_entire_dataset(w0, w1, training_examples): | |
# find the squared error over the whole training set | |
sum = 0 | |
for pair in training_examples: | |
x_i = pair[0] | |
y_i = pair[1] | |
sum += prediction_error(w0,w1,x_i,y_i) ** 2 | |
return sum |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment