adpoe/uni_gd.py

## uni_gd.py
def gradient_descent(training_examples, alpha=0.01):
    """
    Apply gradient descent on the training examples to learn a line that fits through the examples
    :param examples: set of all examples in (x,y) format
    :param alpha = learning rate
    :return:
    """
    # initialize w0 and w1 to some small value, here just using 0 for simplicity
    w0 = 0
    w1 = 0

    # repeat until "convergence", meaning that w0 and w1 aren't changing very much
    # --> need to define what 'not very much' means, and that may depend on problem domain
    convergence = False
    while not convergence:
        # initialize temporary variables, and set them to 0
        delta_w0 = 0
        delta_w1 = 0

        for pair in training_examples:
            # grab our data points from the example
            x_i = pair[0]
            y_i = pair[1]

            # calculate a prediction, and find the error
            h_of_x_i = model_prediction(w0,w1,x_i)
            delta_w0 += prediction_error(w0,w1, x_i, y_i)
            delta_w1 += prediction_error(w0,w1,x_i,y_i)*x_i

        # store previous weighting values
        prev_w0 = w0
        prev_w1 = w1

        # get new weighting values
        w0 = w0 + alpha*delta_w0
        w1 = w1 + alpha*delta_w1
        alpha -= 0.001

        # every few iterations print out current model
        #     1.  -->  (w0 + w1x1 + w2x2 + ... + wnxn)
        print "Current model is: ("+str(w0)+" + "+str(w1)+"x1)"
        #     2.  -->  averaged squared error over training set, using the current line
        summed_error = sum_of_squared_error_over_entire_dataset(w0, w1, training_examples)
        avg_error = summed_error/len(training_examples)
        print "Average Squared Error="+str(avg_error)


        # check if we have converged
        if abs(prev_w0 - w0) < 0.00001 and abs(prev_w1 - w1) < 0.00001:
            convergence = True

    # after convergence, print out the parameters of the trained model (w0, ... wn)
    print "Parameters of trained model are: w0="+str(w0)+", w1="+str(w1)
    return w0, w1


############################
##### TRAINING HELPERS #####
############################
def model_prediction(w0, w1, x_i):
    return w0 + (w1 * x_i)

def prediction_error(w0, w1, x_i, y_i):
    # basically, we just take the true value (y_i)
    # and we subtract the predicted value from it
    # this gives us an error, or J(w0,w1) value
    return y_i - model_prediction(w0, w1, x_i)

def sum_of_squared_error_over_entire_dataset(w0, w1, training_examples):
    # find the squared error over the whole training set
    sum = 0
    for pair in training_examples:
        x_i = pair[0]
        y_i = pair[1]
        sum += prediction_error(w0,w1,x_i,y_i) ** 2
    return sum
	def gradient_descent(training_examples, alpha=0.01):
	"""
	Apply gradient descent on the training examples to learn a line that fits through the examples
	:param examples: set of all examples in (x,y) format
	:param alpha = learning rate
	:return:
	"""
	# initialize w0 and w1 to some small value, here just using 0 for simplicity
	w0 = 0
	w1 = 0

	# repeat until "convergence", meaning that w0 and w1 aren't changing very much
	# --> need to define what 'not very much' means, and that may depend on problem domain
	convergence = False
	while not convergence:
	# initialize temporary variables, and set them to 0
	delta_w0 = 0
	delta_w1 = 0

	for pair in training_examples:
	# grab our data points from the example
	x_i = pair[0]
	y_i = pair[1]

	# calculate a prediction, and find the error
	h_of_x_i = model_prediction(w0,w1,x_i)
	delta_w0 += prediction_error(w0,w1, x_i, y_i)
	delta_w1 += prediction_error(w0,w1,x_i,y_i)*x_i

	# store previous weighting values
	prev_w0 = w0
	prev_w1 = w1

	# get new weighting values
	w0 = w0 + alpha*delta_w0
	w1 = w1 + alpha*delta_w1
	alpha -= 0.001

	# every few iterations print out current model
	# 1. --> (w0 + w1x1 + w2x2 + ... + wnxn)
	print "Current model is: ("+str(w0)+" + "+str(w1)+"x1)"
	# 2. --> averaged squared error over training set, using the current line
	summed_error = sum_of_squared_error_over_entire_dataset(w0, w1, training_examples)
	avg_error = summed_error/len(training_examples)
	print "Average Squared Error="+str(avg_error)


	# check if we have converged
	if abs(prev_w0 - w0) < 0.00001 and abs(prev_w1 - w1) < 0.00001:
	convergence = True

	# after convergence, print out the parameters of the trained model (w0, ... wn)
	print "Parameters of trained model are: w0="+str(w0)+", w1="+str(w1)
	return w0, w1


	############################
	##### TRAINING HELPERS #####
	############################
	def model_prediction(w0, w1, x_i):
	return w0 + (w1 * x_i)

	def prediction_error(w0, w1, x_i, y_i):
	# basically, we just take the true value (y_i)
	# and we subtract the predicted value from it
	# this gives us an error, or J(w0,w1) value
	return y_i - model_prediction(w0, w1, x_i)

	def sum_of_squared_error_over_entire_dataset(w0, w1, training_examples):
	# find the squared error over the whole training set
	sum = 0
	for pair in training_examples:
	x_i = pair[0]
	y_i = pair[1]
	sum += prediction_error(w0,w1,x_i,y_i) ** 2
	return sum