sinadalvand/Logestic_regression_Imports.py

## Logestic_regression_desire_y.py
# extract special label from Y_train
def desireY(y, specificClass):
    return np.asarray(list(map(lambda x: [1] if (x[0] == specificClass) else [0], y)))

## Logestic_regression_gradientDescent.py
def gradientDescent(x, y, alpha, iter):
    n = x.shape[1]                             # get features count
    class_labels = list(set(y))                # extract class labels as set of items
    thetas = []                                # thetas container
    for j in range(len(class_labels)):         # for each class label
        theta = np.zeros((n, 1))               # initialize theta for each label by zeros vector
        desire_y = desireY(y, class_labels[j]) # get desire Y for each label
        m = len(desire_y)                      # get desire Y size
        for i in range(iter):                   # do gradient descent for each label according to iteration
            theta = theta - (alpha / m * (np.dot(x.T, (hypothesis(x, theta) - desire_y)))) # update thetas
        thetas.append(theta.flatten())         # add theta to thetas container
    return thetas, class_labels                # return thetas and class labels after process

## Logestic_regression_gradientDescent_Binary.py
def gradientDescent(x, y, alpha, iter):
    n = x.shape[1]                             # get features count
    theta = np.zeros(n)                        # initialize theta
    for i in range(iter):
        theta = theta - alpha * np.dot(x.T, (hypothesis(x, theta) - y))
    return theta

## Logestic_regression_hypothesis.py
# calculate hypothesis for logistic regression
def hypothesis(X, theta):
    return sigmoid(np.dot(X, theta))

## Logestic_regression_Imports.py
# import libraries and fetch data from dataset and parsing into NumPy Dataframe
import numpy as np
import pandas as pd
df = pd.read_excel('dataset.xls', 'Data').to_numpy()

## Logestic_regression_normalize.py
def normalize(data):
    data = data.astype(float)
    return (data - np.mean(data,axis=0)) / np.std(data, axis=0)

X_train = normalize(X_train)
X_test = normalize(X_test)

## Logestic_regression_predict.py
def predict(X, theta):
    p = sigmoid(X @ np.asarray(theta).T)
    p = np.asarray(list(map(lambda x: np.argmax(x), p)))
    return p

## Logestic_regression_run.py
thetas, classes = gradientDescent(X_train, Y_train, 0.03, 500)

## Logestic_regression_set_sample.py
datas = ["A","A","B","A","C","C","A"]
list(set(datas))

## Logestic_regression_sigmoid.py
# simple sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

## Logestic_regression_test_set_eval.py
test_set_value = list(map(lambda x: classes[x], predict(X_test, thetas)))
test_set_percent = sum(test_set_value == Y_test) / len(Y_test)
print(f"Accuracy for Test Set: {test_set_percent*100}%")

## Logestic_regression_theta.py
for i in range(iter):                   # do gradient descent for each label according to iteration
            theta = theta - (alpha / m * (np.dot(x.T, (hypothesis(x, theta) - desire_y)))) # update thetas

## Logestic_regression_train_set_eval.py
train_set_value = list(map(lambda x: classes[x], predict(X_train, thetas)))
train_set_percent = sum(train_set_value == Y_train) / len(Y_train)
print(f"Accuracy for Train Set: {train_set_percent*100}%")

## Logestic_regression_train_test.py
# split dataset into train and test set by ration 70/30
threshold = int(len(df) * 0.7)
randomArray = [True if i < threshold else False for i in range(len(df))]
random.shuffle(randomArray)
randomArray = list(zip(randomArray, df.tolist()))
test_set = np.array(list(map(lambda y: y[1], filter(lambda x: x[0] == False, randomArray))))
train_set = np.array(list(map(lambda y: y[1], filter(lambda x: x[0] == True, randomArray))))

## Logestic_regression_X_Y.py
featureCounts = len(df[0])-1
X_train = train_set[:, :featureCounts]
Y_train = train_set[:, featureCounts]
X_test = test_set[:, :featureCounts]
Y_test = test_set[:, featureCounts]
	# extract special label from Y_train
	def desireY(y, specificClass):
	return np.asarray(list(map(lambda x: [1] if (x[0] == specificClass) else [0], y)))
	def gradientDescent(x, y, alpha, iter):
	n = x.shape[1] # get features count
	class_labels = list(set(y)) # extract class labels as set of items
	thetas = [] # thetas container
	for j in range(len(class_labels)): # for each class label
	theta = np.zeros((n, 1)) # initialize theta for each label by zeros vector
	desire_y = desireY(y, class_labels[j]) # get desire Y for each label
	m = len(desire_y) # get desire Y size
	for i in range(iter): # do gradient descent for each label according to iteration
	theta = theta - (alpha / m * (np.dot(x.T, (hypothesis(x, theta) - desire_y)))) # update thetas
	thetas.append(theta.flatten()) # add theta to thetas container
	return thetas, class_labels # return thetas and class labels after process
	def gradientDescent(x, y, alpha, iter):
	n = x.shape[1] # get features count
	theta = np.zeros(n) # initialize theta
	for i in range(iter):
	theta = theta - alpha * np.dot(x.T, (hypothesis(x, theta) - y))
	return theta
	# calculate hypothesis for logistic regression
	def hypothesis(X, theta):
	return sigmoid(np.dot(X, theta))
	# import libraries and fetch data from dataset and parsing into NumPy Dataframe
	import numpy as np
	import pandas as pd
	df = pd.read_excel('dataset.xls', 'Data').to_numpy()
	def normalize(data):
	data = data.astype(float)
	return (data - np.mean(data,axis=0)) / np.std(data, axis=0)

	X_train = normalize(X_train)
	X_test = normalize(X_test)
	def predict(X, theta):
	p = sigmoid(X @ np.asarray(theta).T)
	p = np.asarray(list(map(lambda x: np.argmax(x), p)))
	return p
	# simple sigmoid function
	def sigmoid(x):
	return 1 / (1 + np.exp(-x))
	test_set_value = list(map(lambda x: classes[x], predict(X_test, thetas)))
	test_set_percent = sum(test_set_value == Y_test) / len(Y_test)
	print(f"Accuracy for Test Set: {test_set_percent*100}%")
	train_set_value = list(map(lambda x: classes[x], predict(X_train, thetas)))
	train_set_percent = sum(train_set_value == Y_train) / len(Y_train)
	print(f"Accuracy for Train Set: {train_set_percent*100}%")