Skip to content

Instantly share code, notes, and snippets.

@arikaa
Last active July 17, 2018 03:12
Show Gist options
  • Save arikaa/70d7cb567cbab68b8e19cf57bed5c9f0 to your computer and use it in GitHub Desktop.
Save arikaa/70d7cb567cbab68b8e19cf57bed5c9f0 to your computer and use it in GitHub Desktop.
Logistic regression classifier
import numpy as np
def logistic_regression(data, label, iteration, learning_rate):
'''
Logistic regression classifier
Arguments
data: This is the training data with shape (n, d), which corresponds to n samples and each sample has d features.
label: This is the training data's label with shape (n,1). The 1 corresponds to the correct classification of the data.
iteration: The number of times to iterate.
learning_rate: The learning rate for the weight update.
Returns
w: This is the separator with the shape (1, d).
'''
dim = data.shape[1]
w = np.zeros((dim,1)) # w = gradient descent of the entropy error
for i in range(iteration): # iteration of data, while not close to zero
g = np.zeros((dim,1))
for j in range(data.shape[0]): # get the gradient of the cross entropy error
gradient = (-1 * label[j] * data[j]) / ( 1 + np.exp(label[j] * np.dot(np.transpose(w),data[j])))
g = g + gradient[:,np.newaxis]
g = (1 / data.shape[0] ) * g
# move in the direction v = -n * gradient(w(t))
v = -(learning_rate) * g
# update weights: w(t + 1) = w(t) + nv
w = w + v
return w
def sig(z):
return 1 / (1 + math.e ** -z)
def accuracy(x, y, w):
'''
This computes the accuracy of a logsitic regression model.
Arguments
x: This is the input data with shape (n, d). n represents total data samples while d represents the
total feature numbers for a data sample.
y: This is the corresponding label of x with the shape of (n, 1). n represents
the total data samples.
w: This is the separator developed from the logistic regression function with shape (d, 1).
d represents total feature numbers for a data sample.
Returns
accuracy: This is the total percentage of correctly classified samples.
The threshold is set to 0.5, where if the predicted probability is greater,
then the classification is 1, otherwise it is -1.
'''
correct = 0
final = 0
for i in range(x.shape[0]):
wt = np.transpose(w)
prediction = sig(np.dot(wt, x[i]))
if ((prediction >= 0.5 and y[i] == 1) or (prediction < 0.5 and y[i] == -1)): # classify and compare with label
correct = correct + 1
return correct / x.shape[0] # return percent of correct classifications
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment