arikaa/logreg.py

## logreg.py
import numpy as np

def logistic_regression(data, label, iteration, learning_rate):
    '''
    Logistic regression classifier

    Arguments
    data: This is the training data with shape (n, d), which corresponds to n samples and each sample has d features.
    label: This is the training data's label with shape (n,1). The 1 corresponds to the correct classification of the data.
    iteration: The number of times to iterate.
    learning_rate: The learning rate for the weight update.

    Returns
    w: This is the separator with the shape (1, d).
    '''
    dim = data.shape[1]
    w = np.zeros((dim,1))  # w = gradient descent of the entropy error
    for i in range(iteration): # iteration of data, while not close to zero
        g = np.zeros((dim,1))
        for j in range(data.shape[0]): # get the gradient of the cross entropy error
                gradient = (-1 * label[j] * data[j]) / ( 1 + np.exp(label[j] * np.dot(np.transpose(w),data[j])))
                g = g + gradient[:,np.newaxis]
        g = (1 / data.shape[0] ) * g
        # move in the direction v = -n * gradient(w(t))
        v = -(learning_rate) * g
        # update weights: w(t + 1) = w(t) + nv
        w = w + v
    return w

def sig(z):
    return 1 / (1 + math.e ** -z)

def accuracy(x, y, w):
    '''
    This computes the accuracy of a logsitic regression model.

    Arguments
    x: This is the input data with shape (n, d). n represents total data samples while d represents the
       total feature numbers for a data sample.
    y: This is the corresponding label of x with the shape  of (n, 1). n represents
       the total data samples.
    w: This is the separator developed from the logistic regression function with shape (d, 1).
       d represents total feature numbers for a data sample.

    Returns
    accuracy: This is the total percentage of correctly classified samples.
              The threshold is set to 0.5, where if the predicted probability is greater,
	      then the classification is 1, otherwise it is -1.
    '''

    correct = 0
    final = 0
    for i in range(x.shape[0]):
        wt = np.transpose(w)
        prediction = sig(np.dot(wt, x[i]))
        if ((prediction >= 0.5 and y[i] == 1) or (prediction < 0.5 and y[i] == -1)): # classify and compare with label
            correct = correct + 1
    return correct / x.shape[0] # return percent of correct classifications
	import numpy as np

	def logistic_regression(data, label, iteration, learning_rate):
	'''
	Logistic regression classifier

	Arguments
	data: This is the training data with shape (n, d), which corresponds to n samples and each sample has d features.
	label: This is the training data's label with shape (n,1). The 1 corresponds to the correct classification of the data.
	iteration: The number of times to iterate.
	learning_rate: The learning rate for the weight update.

	Returns
	w: This is the separator with the shape (1, d).
	'''
	dim = data.shape[1]
	w = np.zeros((dim,1)) # w = gradient descent of the entropy error
	for i in range(iteration): # iteration of data, while not close to zero
	g = np.zeros((dim,1))
	for j in range(data.shape[0]): # get the gradient of the cross entropy error
	gradient = (-1 * label[j] * data[j]) / ( 1 + np.exp(label[j] * np.dot(np.transpose(w),data[j])))
	g = g + gradient[:,np.newaxis]
	g = (1 / data.shape[0] ) * g
	# move in the direction v = -n * gradient(w(t))
	v = -(learning_rate) * g
	# update weights: w(t + 1) = w(t) + nv
	w = w + v
	return w

	def sig(z):
	return 1 / (1 + math.e ** -z)

	def accuracy(x, y, w):
	'''
	This computes the accuracy of a logsitic regression model.

	Arguments
	x: This is the input data with shape (n, d). n represents total data samples while d represents the
	total feature numbers for a data sample.
	y: This is the corresponding label of x with the shape of (n, 1). n represents
	the total data samples.
	w: This is the separator developed from the logistic regression function with shape (d, 1).
	d represents total feature numbers for a data sample.

	Returns
	accuracy: This is the total percentage of correctly classified samples.
	The threshold is set to 0.5, where if the predicted probability is greater,
	then the classification is 1, otherwise it is -1.
	'''

	correct = 0
	final = 0
	for i in range(x.shape[0]):
	wt = np.transpose(w)
	prediction = sig(np.dot(wt, x[i]))
	if ((prediction >= 0.5 and y[i] == 1) or (prediction < 0.5 and y[i] == -1)): # classify and compare with label
	correct = correct + 1
	return correct / x.shape[0] # return percent of correct classifications