Last active
April 8, 2022 04:07
-
-
Save sinadalvand/af1e5fa09488bc773f1bbb12c40717bb to your computer and use it in GitHub Desktop.
Logistic Regression Implementation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# extract special label from Y_train | |
def desireY(y, specificClass): | |
return np.asarray(list(map(lambda x: [1] if (x[0] == specificClass) else [0], y))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def gradientDescent(x, y, alpha, iter): | |
n = x.shape[1] # get features count | |
class_labels = list(set(y)) # extract class labels as set of items | |
thetas = [] # thetas container | |
for j in range(len(class_labels)): # for each class label | |
theta = np.zeros((n, 1)) # initialize theta for each label by zeros vector | |
desire_y = desireY(y, class_labels[j]) # get desire Y for each label | |
m = len(desire_y) # get desire Y size | |
for i in range(iter): # do gradient descent for each label according to iteration | |
theta = theta - (alpha / m * (np.dot(x.T, (hypothesis(x, theta) - desire_y)))) # update thetas | |
thetas.append(theta.flatten()) # add theta to thetas container | |
return thetas, class_labels # return thetas and class labels after process |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def gradientDescent(x, y, alpha, iter): | |
n = x.shape[1] # get features count | |
theta = np.zeros(n) # initialize theta | |
for i in range(iter): | |
theta = theta - alpha * np.dot(x.T, (hypothesis(x, theta) - y)) | |
return theta |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# calculate hypothesis for logistic regression | |
def hypothesis(X, theta): | |
return sigmoid(np.dot(X, theta)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import libraries and fetch data from dataset and parsing into NumPy Dataframe | |
import numpy as np | |
import pandas as pd | |
df = pd.read_excel('dataset.xls', 'Data').to_numpy() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def normalize(data): | |
data = data.astype(float) | |
return (data - np.mean(data,axis=0)) / np.std(data, axis=0) | |
X_train = normalize(X_train) | |
X_test = normalize(X_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def predict(X, theta): | |
p = sigmoid(X @ np.asarray(theta).T) | |
p = np.asarray(list(map(lambda x: np.argmax(x), p))) | |
return p |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
thetas, classes = gradientDescent(X_train, Y_train, 0.03, 500) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
datas = ["A","A","B","A","C","C","A"] | |
list(set(datas)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# simple sigmoid function | |
def sigmoid(x): | |
return 1 / (1 + np.exp(-x)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
test_set_value = list(map(lambda x: classes[x], predict(X_test, thetas))) | |
test_set_percent = sum(test_set_value == Y_test) / len(Y_test) | |
print(f"Accuracy for Test Set: {test_set_percent*100}%") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for i in range(iter): # do gradient descent for each label according to iteration | |
theta = theta - (alpha / m * (np.dot(x.T, (hypothesis(x, theta) - desire_y)))) # update thetas |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train_set_value = list(map(lambda x: classes[x], predict(X_train, thetas))) | |
train_set_percent = sum(train_set_value == Y_train) / len(Y_train) | |
print(f"Accuracy for Train Set: {train_set_percent*100}%") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# split dataset into train and test set by ration 70/30 | |
threshold = int(len(df) * 0.7) | |
randomArray = [True if i < threshold else False for i in range(len(df))] | |
random.shuffle(randomArray) | |
randomArray = list(zip(randomArray, df.tolist())) | |
test_set = np.array(list(map(lambda y: y[1], filter(lambda x: x[0] == False, randomArray)))) | |
train_set = np.array(list(map(lambda y: y[1], filter(lambda x: x[0] == True, randomArray)))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
featureCounts = len(df[0])-1 | |
X_train = train_set[:, :featureCounts] | |
Y_train = train_set[:, featureCounts] | |
X_test = test_set[:, :featureCounts] | |
Y_test = test_set[:, featureCounts] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment