Skip to content

Instantly share code, notes, and snippets.

@addy1997
Last active December 10, 2021 00:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save addy1997/325aa1dc560ae3e4217ccbfbb6d7d3fb to your computer and use it in GitHub Desktop.
Save addy1997/325aa1dc560ae3e4217ccbfbb6d7d3fb to your computer and use it in GitHub Desktop.
CSI assignment 2
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Dec 5 00:24:39 2021
@author: adwaitnaik
"""
import pandas as pd
import numpy as np
pd.options.mode.chained_assignment = None # default='warn'
def train_test(X1_train, X2_train, X_test):
#Compute mean and standard deviation for Class == 'smile'
smile_mean = X1_train.mean(axis=0)
smile_std = X1_train.std(axis=0)
#Compute mean and standard deviation for Class == 'frown'
frown_mean = X2_train.mean(axis=0)
frown_std = X2_train.std(axis=0)
log_P_Xi_C1 = np.zeros(X_test.shape)
log_P_Xi_C2 = np.zeros(X_test.shape)
log_P_X_C1 = np.zeros(X_test.shape[0],)
log_P_X_C2 = np.zeros(X_test.shape[0],)
#calculating the likelihood P(Xi|Ck=1)
for i in range(len(X_test.columns)):
logA = (np.sqrt(2*np.pi)*smile_std.iloc[i])
logB = np.square(X_test.iloc[:,i] - smile_mean.iloc[i])/(2*np.square(smile_std.iloc[i]))
logP = np.log(logA)+logB
#Log likelihood of observing the 17 features when class is smile
log_P_Xi_C1[:,i] = logP
log_P_X_C1 = -np.sum(log_P_Xi_C1,axis=1)
# print(log_P_X_C1)
#calculating the likelihood P(Xi|Ck=0)
for i in range(len(X_test.columns)):
logA = (np.sqrt(2*np.pi)*frown_std.iloc[i])
logB = np.square(X_test.iloc[:,i] - frown_mean.iloc[i])/(2*np.square(frown_std.iloc[i]))
logP = np.log(logA)+logB
#Log likelihood of observing the 17 features when class is frown
log_P_Xi_C2[:,i] = logP
log_P_X_C2 = -np.sum(log_P_Xi_C2,axis=1)
#Gaussian Discriminant Function Rule
#discriminant function implementation
gamma_prime_1 = log_P_X_C1 + np.log(Ck)
gamma_prime_0 = log_P_X_C2 + np.log(Ck)
G = np.column_stack((gamma_prime_0, gamma_prime_1))
k = np.argmax(G, axis=1)
return k
if __name__ == "__main__":
#Load data
test_path = r'/Users/adwaitnaik/test-part-2.csv' #insert path to the test file
training_path = r'/Users/adwaitnaik/training-part-2.csv' #insert path to the train file'
test_data = pd.read_csv(test_path)
train_data = pd.read_csv(training_path)
#Separate data according to class
smile_train = train_data.loc[train_data["Class"] == 'smile']
frown_train = train_data.loc[train_data["Class"] == 'frown']
#Separate features from class of test data
X_test = test_data.iloc[:,0:17]
y_test = test_data.iloc[:,17]
y_test[y_test == 'smile'] = 1
y_test[y_test == 'frown'] = 0
Ck = 0.5
#Training and testing
result = train_test(smile_train, frown_train, X_test)
classified_total = len(y_test)
classified_correct = 0
classified_incorrect = 0
for i in range(classified_total):
if y_test[i]==result[i]:
classified_correct = classified_correct + 1 #smile
else:
classified_incorrect = classified_incorrect + 1 #frown
accuracy = classified_correct/classified_total
error_rate = 1 - accuracy
print("The accuracy is:",accuracy)
print("The error rate is:",error_rate)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment