Skip to content

Instantly share code, notes, and snippets.

@sinadalvand
Last active April 8, 2022 04:07
Show Gist options
  • Save sinadalvand/af1e5fa09488bc773f1bbb12c40717bb to your computer and use it in GitHub Desktop.
Save sinadalvand/af1e5fa09488bc773f1bbb12c40717bb to your computer and use it in GitHub Desktop.
Logistic Regression Implementation
# extract special label from Y_train
def desireY(y, specificClass):
return np.asarray(list(map(lambda x: [1] if (x[0] == specificClass) else [0], y)))
def gradientDescent(x, y, alpha, iter):
n = x.shape[1] # get features count
class_labels = list(set(y)) # extract class labels as set of items
thetas = [] # thetas container
for j in range(len(class_labels)): # for each class label
theta = np.zeros((n, 1)) # initialize theta for each label by zeros vector
desire_y = desireY(y, class_labels[j]) # get desire Y for each label
m = len(desire_y) # get desire Y size
for i in range(iter): # do gradient descent for each label according to iteration
theta = theta - (alpha / m * (np.dot(x.T, (hypothesis(x, theta) - desire_y)))) # update thetas
thetas.append(theta.flatten()) # add theta to thetas container
return thetas, class_labels # return thetas and class labels after process
def gradientDescent(x, y, alpha, iter):
n = x.shape[1] # get features count
theta = np.zeros(n) # initialize theta
for i in range(iter):
theta = theta - alpha * np.dot(x.T, (hypothesis(x, theta) - y))
return theta
# calculate hypothesis for logistic regression
def hypothesis(X, theta):
return sigmoid(np.dot(X, theta))
# import libraries and fetch data from dataset and parsing into NumPy Dataframe
import numpy as np
import pandas as pd
df = pd.read_excel('dataset.xls', 'Data').to_numpy()
def normalize(data):
data = data.astype(float)
return (data - np.mean(data,axis=0)) / np.std(data, axis=0)
X_train = normalize(X_train)
X_test = normalize(X_test)
def predict(X, theta):
p = sigmoid(X @ np.asarray(theta).T)
p = np.asarray(list(map(lambda x: np.argmax(x), p)))
return p
thetas, classes = gradientDescent(X_train, Y_train, 0.03, 500)
datas = ["A","A","B","A","C","C","A"]
list(set(datas))
# simple sigmoid function
def sigmoid(x):
return 1 / (1 + np.exp(-x))
test_set_value = list(map(lambda x: classes[x], predict(X_test, thetas)))
test_set_percent = sum(test_set_value == Y_test) / len(Y_test)
print(f"Accuracy for Test Set: {test_set_percent*100}%")
for i in range(iter): # do gradient descent for each label according to iteration
theta = theta - (alpha / m * (np.dot(x.T, (hypothesis(x, theta) - desire_y)))) # update thetas
train_set_value = list(map(lambda x: classes[x], predict(X_train, thetas)))
train_set_percent = sum(train_set_value == Y_train) / len(Y_train)
print(f"Accuracy for Train Set: {train_set_percent*100}%")
# split dataset into train and test set by ration 70/30
threshold = int(len(df) * 0.7)
randomArray = [True if i < threshold else False for i in range(len(df))]
random.shuffle(randomArray)
randomArray = list(zip(randomArray, df.tolist()))
test_set = np.array(list(map(lambda y: y[1], filter(lambda x: x[0] == False, randomArray))))
train_set = np.array(list(map(lambda y: y[1], filter(lambda x: x[0] == True, randomArray))))
featureCounts = len(df[0])-1
X_train = train_set[:, :featureCounts]
Y_train = train_set[:, featureCounts]
X_test = test_set[:, :featureCounts]
Y_test = test_set[:, featureCounts]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment