forrestbao/hw2.py

## hw2.py
# Course website: http://chienjuho.com/courses/cse417t/
# HW2: http://chienjuho.com/courses/cse417t/hw2.pdf
# Code here shall explain how to train a logistic regressor using gradient descent when the loss is cross entropy.
# CC-By license

# Homework 2 Code
import numpy as np
import pandas as pd
import time
import scipy.stats

def find_binary_error(w, X, y):
    # find_binary_error: compute the binary error of a linear classifier w on data set (X, y)
    # Inputs:
    #        w: weight vector
    #        X: data matrix (without an initial column of 1s)
    #        y: data labels (plus or minus 1)
    # Outputs:
    #        binary_error: binary classification error of w on the data set (X, y)
    #           this should be between 0 and 1.

    # Your code here, assign the proper value to binary_error:


    probability_threshold = 0.5

    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

    y_hat = 1 / (1+np.exp(np.dot(X, - w))) # \in [0, 1]
    y_hat = np.sign(y_hat - probability_threshold) # \in {-1, 1}

    binary_error = np.mean (y_hat != y)
    return binary_error

def logistic_reg(X, y, w_init, max_its, eta, grad_threshold):
    # logistic_reg learn logistic regression model using gradient descent
    # Inputs:
    #        X : data matrix (without an initial column of 1s)
    #        y : data labels (plus or minus 1)
    #        w_init: initial value of the w vector (d+1 dimensional)
    #        max_its: maximum number of iterations to run for
    #        eta: learning rate
    #        grad_threshold: one of the terminate conditions;
    #               terminate if the magnitude of every element of gradient is smaller than grad_threshold
    # Outputs:
    #        t : number of iterations gradient descent ran for
    #        w : weight vector
    #        e_in : in-sample error (the cross-entropy error as defined in LFD)

    # Your code here, assign the proper values to t, w, and e_in:

    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
    w = np.zeros(X.shape[1])
    t = 0;
    for i in range(max_its):
        gradient_val = gradient(X, w, y)
        # h = sigmoid(np.dot(X, w))
        # gradient_val = gradient(X, h, y)
        if max(abs(gradient_val))<grad_threshold:
            break
        t = t + 1;
        # print(abs(gradient_val))
        # print(max(abs(gradient_val)))
        w = w - (eta * gradient_val)
    e_in = sum ( np.log (
                          (1 + np.exp(-y * np.dot(X, w) ) )
                        )
               ) \
          / y.shape[0]

    return t, w, e_in

def gradient(X, w, y):
    denominator  = (1 + np.exp(
                           y * np.dot(X, w)
                          )
               ) # scalar

    # print (denominator)

    numerator = y[:,None] * X

    tmp = numerator/ denominator [:,None]

    return -1* sum(tmp) / y.shape[0]


# def sigmoid(z):
#     return  1 / (1 + np.exp(-z))
#
# def cost(h, y):
#     return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
#
# def gradient(X, h, y):
#     return np.dot(X.T, (h - y)) / y.shape[0]

def main():
    # Load training data
    train_data = pd.read_csv('clevelandtrain.csv')

    # extract data
    X = train_data[['age', 'sex',	'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal']]
    y = train_data['heartdisease::category|0|1']

    y = 2*y - 1
    w_init = np.zeros(X.shape[1]+1)
    eta = 0.01
    max_its = 10**6   #10000, 100000, 1000000
    grad_threshold = 10**(-6)
    start_time = time.time()

    X = scipy.stats.zscore(X, axis= 0)

    t, w, e_in = logistic_reg(X, y, w_init, max_its, eta, grad_threshold)

    print(t, e_in, (time.time() - start_time))
    print (find_binary_error(w, X, y))

    # Load test data
    test_data = pd.read_csv('clevelandtest.csv')

    # Your code here


if __name__ == "__main__":
    main()
	# Course website: http://chienjuho.com/courses/cse417t/
	# HW2: http://chienjuho.com/courses/cse417t/hw2.pdf
	# Code here shall explain how to train a logistic regressor using gradient descent when the loss is cross entropy.
	# CC-By license

	# Homework 2 Code
	import numpy as np
	import pandas as pd
	import time
	import scipy.stats

	def find_binary_error(w, X, y):
	# find_binary_error: compute the binary error of a linear classifier w on data set (X, y)
	# Inputs:
	# w: weight vector
	# X: data matrix (without an initial column of 1s)
	# y: data labels (plus or minus 1)
	# Outputs:
	# binary_error: binary classification error of w on the data set (X, y)
	# this should be between 0 and 1.

	# Your code here, assign the proper value to binary_error:


	probability_threshold = 0.5

	X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

	y_hat = 1 / (1+np.exp(np.dot(X, - w))) # \in [0, 1]
	y_hat = np.sign(y_hat - probability_threshold) # \in {-1, 1}

	binary_error = np.mean (y_hat != y)
	return binary_error

	def logistic_reg(X, y, w_init, max_its, eta, grad_threshold):
	# logistic_reg learn logistic regression model using gradient descent
	# Inputs:
	# X : data matrix (without an initial column of 1s)
	# y : data labels (plus or minus 1)
	# w_init: initial value of the w vector (d+1 dimensional)
	# max_its: maximum number of iterations to run for
	# eta: learning rate
	# grad_threshold: one of the terminate conditions;
	# terminate if the magnitude of every element of gradient is smaller than grad_threshold
	# Outputs:
	# t : number of iterations gradient descent ran for
	# w : weight vector
	# e_in : in-sample error (the cross-entropy error as defined in LFD)

	# Your code here, assign the proper values to t, w, and e_in:

	X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
	w = np.zeros(X.shape[1])
	t = 0;
	for i in range(max_its):
	gradient_val = gradient(X, w, y)
	# h = sigmoid(np.dot(X, w))
	# gradient_val = gradient(X, h, y)
	if max(abs(gradient_val))<grad_threshold:
	break
	t = t + 1;
	# print(abs(gradient_val))
	# print(max(abs(gradient_val)))
	w = w - (eta * gradient_val)
	e_in = sum ( np.log (
	(1 + np.exp(-y * np.dot(X, w) ) )
	)
	) \
	/ y.shape[0]

	return t, w, e_in

	def gradient(X, w, y):
	denominator = (1 + np.exp(
	y * np.dot(X, w)
	)
	) # scalar

	# print (denominator)

	numerator = y[:,None] * X

	tmp = numerator/ denominator [:,None]

	return -1* sum(tmp) / y.shape[0]



	# def sigmoid(z):
	# return 1 / (1 + np.exp(-z))
	#
	# def cost(h, y):
	# return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
	#
	# def gradient(X, h, y):
	# return np.dot(X.T, (h - y)) / y.shape[0]

	def main():
	# Load training data
	train_data = pd.read_csv('clevelandtrain.csv')

	# extract data
	X = train_data[['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal']]
	y = train_data['heartdisease::category\|0\|1']

	y = 2*y - 1
	w_init = np.zeros(X.shape[1]+1)
	eta = 0.01
	max_its = 10**6 #10000, 100000, 1000000
	grad_threshold = 10**(-6)
	start_time = time.time()

	X = scipy.stats.zscore(X, axis= 0)

	t, w, e_in = logistic_reg(X, y, w_init, max_its, eta, grad_threshold)

	print(t, e_in, (time.time() - start_time))
	print (find_binary_error(w, X, y))

	# Load test data
	test_data = pd.read_csv('clevelandtest.csv')

	# Your code here


	if __name__ == "__main__":
	main()