addy1997/gist:325aa1dc560ae3e4217ccbfbb6d7d3fb

## gistfile1.txt
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Dec 5 00:24:39 2021

@author: adwaitnaik
"""

import pandas as pd
import numpy as np
pd.options.mode.chained_assignment = None  # default='warn'

def train_test(X1_train, X2_train, X_test):

    #Compute mean and standard deviation for Class == 'smile'
    smile_mean = X1_train.mean(axis=0)
    smile_std = X1_train.std(axis=0)

    #Compute mean and standard deviation for Class == 'frown'
    frown_mean = X2_train.mean(axis=0)
    frown_std = X2_train.std(axis=0)

    log_P_Xi_C1 = np.zeros(X_test.shape)
    log_P_Xi_C2 = np.zeros(X_test.shape)
    log_P_X_C1 = np.zeros(X_test.shape[0],)
    log_P_X_C2 = np.zeros(X_test.shape[0],)


    #calculating the likelihood P(Xi|Ck=1)
    for i in range(len(X_test.columns)):
        logA = (np.sqrt(2*np.pi)*smile_std.iloc[i])
        logB = np.square(X_test.iloc[:,i] - smile_mean.iloc[i])/(2*np.square(smile_std.iloc[i]))
        logP = np.log(logA)+logB
        #Log likelihood of observing the 17 features when class is smile
        log_P_Xi_C1[:,i] = logP

    log_P_X_C1 = -np.sum(log_P_Xi_C1,axis=1)

    # print(log_P_X_C1)

    #calculating the likelihood P(Xi|Ck=0)
    for i in range(len(X_test.columns)):

        logA = (np.sqrt(2*np.pi)*frown_std.iloc[i])
        logB = np.square(X_test.iloc[:,i] - frown_mean.iloc[i])/(2*np.square(frown_std.iloc[i]))
        logP = np.log(logA)+logB
        #Log likelihood of observing the 17 features when class is frown
        log_P_Xi_C2[:,i] = logP

    log_P_X_C2 = -np.sum(log_P_Xi_C2,axis=1)


    #Gaussian Discriminant Function Rule
    #discriminant function implementation
    gamma_prime_1 = log_P_X_C1 + np.log(Ck)
    gamma_prime_0 = log_P_X_C2 + np.log(Ck)

    G = np.column_stack((gamma_prime_0, gamma_prime_1))
    k = np.argmax(G, axis=1)

    return k

if __name__ == "__main__":

    #Load data
    test_path = r'/Users/adwaitnaik/test-part-2.csv' #insert path to the test file
    training_path = r'/Users/adwaitnaik/training-part-2.csv' #insert path to the train file'

    test_data = pd.read_csv(test_path)
    train_data = pd.read_csv(training_path)

    #Separate data according to class
    smile_train = train_data.loc[train_data["Class"] == 'smile']
    frown_train = train_data.loc[train_data["Class"] == 'frown']

    #Separate features from class of test data
    X_test = test_data.iloc[:,0:17]
    y_test = test_data.iloc[:,17]

    y_test[y_test == 'smile'] = 1
    y_test[y_test == 'frown'] = 0

    Ck =  0.5

    #Training and testing
    result = train_test(smile_train, frown_train, X_test)

    classified_total = len(y_test)
    classified_correct = 0
    classified_incorrect = 0

    for i in range(classified_total):
        if y_test[i]==result[i]:
            classified_correct = classified_correct + 1 #smile
        else:
            classified_incorrect = classified_incorrect + 1 #frown

    accuracy = classified_correct/classified_total
    error_rate = 1 - accuracy

    print("The accuracy is:",accuracy)
    print("The error rate is:",error_rate)
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	"""
	Created on Sat Dec 5 00:24:39 2021

	@author: adwaitnaik
	"""

	import pandas as pd
	import numpy as np
	pd.options.mode.chained_assignment = None # default='warn'

	def train_test(X1_train, X2_train, X_test):

	#Compute mean and standard deviation for Class == 'smile'
	smile_mean = X1_train.mean(axis=0)
	smile_std = X1_train.std(axis=0)

	#Compute mean and standard deviation for Class == 'frown'
	frown_mean = X2_train.mean(axis=0)
	frown_std = X2_train.std(axis=0)

	log_P_Xi_C1 = np.zeros(X_test.shape)
	log_P_Xi_C2 = np.zeros(X_test.shape)
	log_P_X_C1 = np.zeros(X_test.shape[0],)
	log_P_X_C2 = np.zeros(X_test.shape[0],)


	#calculating the likelihood P(Xi\|Ck=1)
	for i in range(len(X_test.columns)):
	logA = (np.sqrt(2np.pi)smile_std.iloc[i])
	logB = np.square(X_test.iloc[:,i] - smile_mean.iloc[i])/(2*np.square(smile_std.iloc[i]))
	logP = np.log(logA)+logB
	#Log likelihood of observing the 17 features when class is smile
	log_P_Xi_C1[:,i] = logP

	log_P_X_C1 = -np.sum(log_P_Xi_C1,axis=1)

	# print(log_P_X_C1)

	#calculating the likelihood P(Xi\|Ck=0)
	for i in range(len(X_test.columns)):

	logA = (np.sqrt(2np.pi)frown_std.iloc[i])
	logB = np.square(X_test.iloc[:,i] - frown_mean.iloc[i])/(2*np.square(frown_std.iloc[i]))
	logP = np.log(logA)+logB
	#Log likelihood of observing the 17 features when class is frown
	log_P_Xi_C2[:,i] = logP

	log_P_X_C2 = -np.sum(log_P_Xi_C2,axis=1)


	#Gaussian Discriminant Function Rule
	#discriminant function implementation
	gamma_prime_1 = log_P_X_C1 + np.log(Ck)
	gamma_prime_0 = log_P_X_C2 + np.log(Ck)

	G = np.column_stack((gamma_prime_0, gamma_prime_1))
	k = np.argmax(G, axis=1)

	return k

	if __name__ == "__main__":

	#Load data
	test_path = r'/Users/adwaitnaik/test-part-2.csv' #insert path to the test file
	training_path = r'/Users/adwaitnaik/training-part-2.csv' #insert path to the train file'

	test_data = pd.read_csv(test_path)
	train_data = pd.read_csv(training_path)

	#Separate data according to class
	smile_train = train_data.loc[train_data["Class"] == 'smile']
	frown_train = train_data.loc[train_data["Class"] == 'frown']

	#Separate features from class of test data
	X_test = test_data.iloc[:,0:17]
	y_test = test_data.iloc[:,17]

	y_test[y_test == 'smile'] = 1
	y_test[y_test == 'frown'] = 0

	Ck = 0.5

	#Training and testing
	result = train_test(smile_train, frown_train, X_test)

	classified_total = len(y_test)
	classified_correct = 0
	classified_incorrect = 0

	for i in range(classified_total):
	if y_test[i]==result[i]:
	classified_correct = classified_correct + 1 #smile
	else:
	classified_incorrect = classified_incorrect + 1 #frown

	accuracy = classified_correct/classified_total
	error_rate = 1 - accuracy

	print("The accuracy is:",accuracy)
	print("The error rate is:",error_rate)