Evan Gertis EvanGertis

## loading the training data
test_set_Bayes = pd.read_csv("Assignment 2--Training set for Bayes.csv")
training_set_Bayes = pd.read_csv("Assignment 2--Test set for Bayes.csv")

## BayesClassifier
def BayesClassifier(training_set,test_set):
    classAttribute = 'Volume'
    products = []
    max = -math.inf
    classWithMaxValue = ""
    for x in training_set[classAttribute].unique():
        D = len(training_set[classAttribute].index)
        d = len(training_set[training_set[classAttribute] == x].index)
        pClassAttribute = d/D
        print("********")

## prob_continuous_value
def prob_continous_value(A, v, classAttribute, dataset, x):
    # calcuate the average for all values of A in dataset with class = x
    a = dataset[dataset[classAttribute] == x][A].mean()
    # calculate the standard deviation for all values A in dataset with class = x
    stdev = 1
    stdev = dataset[dataset[classAttribute] == x][A].std()
    v = dataset[A].iloc[0]
    if stdev == 0.0:
        stdev = 0.00000000000001
    return (1/(math.sqrt(2*math.pi)*stdev))*math.exp(-((v-a)*(v-a))/(2*stdev*stdev))

## calculate_metrics
def calculate_metrics(training_set,test_set,classAttribute,classValue):
    # calculate the accuracy, error rate, sensitivity, specificity, and precision for the selected classifier in reference to the corresponding test set.
    tp = len(training_set[training_set[classAttribute] == classValue].index)
    fp = len(test_set[test_set[classAttribute] == classValue].index)
    tn = len(training_set[training_set[classAttribute] == classValue].index)
    fn = len(test_set[test_set[classAttribute] != classValue].index)
    p  = tp + fp
    n  = tn + fn
    print(f" \t      \t\t {classValue} \t not {classValue} \t \t TOTAL")
    print(f" \t      \t\t  \t  \t \t ")

## pca.py
def pca(s):
    # Normalize each s
    A1 = s[['A1']].to_numpy()
    A2 = s[['A2']].to_numpy()

    print(A1.ndim)
    if 'A3' in s:
        A3 = s[['A3']].to_numpy()
        A3_norm = A3/np.linalg.norm(A3)

## correlation.py
def calculate_correlation(s):
    s_temp = s[['A1','A3']]
    correlation = s_temp.corr().iloc[1,0]
    print("******************************")
    print(f'Correlation between A1 & A3: {correlation}')
    print("******************************")
    # if correlation > 0.6 or correlation < 0.6 remove A3
    if correlation > 0.6 or correlation < -0.6:
        s = s.drop(['A3'], axis=1)
        print(f'A3 was removed {correlation} > 0.6 or {correlation} < -0.6')

## segmentation_by_natural_partitioning.py
def segmentation_by_natural_partitioning(s):
    # calculate 5th and 95th percentiles.
    s_as_array = np.array(s)
    fith_percentile = np.percentile(s_as_array, 5)
    nienty_fith_percentile = np.percentile(s_as_array, 95)
    print(f"range [{s['A2'].max()},{s['A2'].min()}]")
    print()
    print("*****************************")
    print(f'fith_percentile {fith_percentile}')
    print(f'nienty_fith_percentile {nienty_fith_percentile}')

## entropy_disc.py
def entropy(s):
    print("calculating the entropy for s")
    print("*****************************")
    print(s)
    print("*****************************")

    # initialize ent
    ent = 0

    # calculate the number of classes in s

## utility.py
def maxf(s):
    return s.max()

def minf(s):
    return s.min()

def uniqueValue(s):
    # are records in s the same? return true
    if s.nunique()['Class'] == 1:
        return False

## processemail.js
function processResponse(){
    if(asyncRequest.readyState==4 && asyncRequest.status==200 && asyncRequest.responseXML){
        var emails = asyncRequest.responseXML.getElementsByTagName("email");
        var removeWarning = false;
        for(var i=0; i < emails.length;i++){
            removeWarning = true;
            if(email.value==emails[i].textContent){
                addBlackListEmailWarning(email, " |black listed|");
                removeWarning = false;
            }
	test_set_Bayes = pd.read_csv("Assignment 2--Training set for Bayes.csv")
	training_set_Bayes = pd.read_csv("Assignment 2--Test set for Bayes.csv")
	def BayesClassifier(training_set,test_set):
	classAttribute = 'Volume'
	products = []
	max = -math.inf
	classWithMaxValue = ""
	for x in training_set[classAttribute].unique():
	D = len(training_set[classAttribute].index)
	d = len(training_set[training_set[classAttribute] == x].index)
	pClassAttribute = d/D
	print("********")
	def prob_continous_value(A, v, classAttribute, dataset, x):
	# calcuate the average for all values of A in dataset with class = x
	a = dataset[dataset[classAttribute] == x][A].mean()
	# calculate the standard deviation for all values A in dataset with class = x
	stdev = 1
	stdev = dataset[dataset[classAttribute] == x][A].std()
	v = dataset[A].iloc[0]
	if stdev == 0.0:
	stdev = 0.00000000000001
	return (1/(math.sqrt(2math.pi)stdev))math.exp(-((v-a)(v-a))/(2stdevstdev))
	def calculate_metrics(training_set,test_set,classAttribute,classValue):
	# calculate the accuracy, error rate, sensitivity, specificity, and precision for the selected classifier in reference to the corresponding test set.
	tp = len(training_set[training_set[classAttribute] == classValue].index)
	fp = len(test_set[test_set[classAttribute] == classValue].index)
	tn = len(training_set[training_set[classAttribute] == classValue].index)
	fn = len(test_set[test_set[classAttribute] != classValue].index)
	p = tp + fp
	n = tn + fn
	print(f" \t \t\t {classValue} \t not {classValue} \t \t TOTAL")
	print(f" \t \t\t \t \t \t ")
	def pca(s):
	# Normalize each s
	A1 = s[['A1']].to_numpy()
	A2 = s[['A2']].to_numpy()

	print(A1.ndim)
	if 'A3' in s:
	A3 = s[['A3']].to_numpy()
	A3_norm = A3/np.linalg.norm(A3)
	def calculate_correlation(s):
	s_temp = s[['A1','A3']]
	correlation = s_temp.corr().iloc[1,0]
	print("******************************")
	print(f'Correlation between A1 & A3: {correlation}')
	print("******************************")
	# if correlation > 0.6 or correlation < 0.6 remove A3
	if correlation > 0.6 or correlation < -0.6:
	s = s.drop(['A3'], axis=1)
	print(f'A3 was removed {correlation} > 0.6 or {correlation} < -0.6')
	def segmentation_by_natural_partitioning(s):
	# calculate 5th and 95th percentiles.
	s_as_array = np.array(s)
	fith_percentile = np.percentile(s_as_array, 5)
	nienty_fith_percentile = np.percentile(s_as_array, 95)
	print(f"range [{s['A2'].max()},{s['A2'].min()}]")
	print()
	print("*****************************")
	print(f'fith_percentile {fith_percentile}')
	print(f'nienty_fith_percentile {nienty_fith_percentile}')
	def entropy(s):
	print("calculating the entropy for s")
	print("*****************************")
	print(s)
	print("*****************************")

	# initialize ent
	ent = 0

	# calculate the number of classes in s
	def maxf(s):
	return s.max()

	def minf(s):
	return s.min()

	def uniqueValue(s):
	# are records in s the same? return true
	if s.nunique()['Class'] == 1:
	return False
	function processResponse(){
	if(asyncRequest.readyState==4 && asyncRequest.status==200 && asyncRequest.responseXML){
	var emails = asyncRequest.responseXML.getElementsByTagName("email");
	var removeWarning = false;
	for(var i=0; i < emails.length;i++){
	removeWarning = true;
	if(email.value==emails[i].textContent){
	addBlackListEmailWarning(email, " \|black listed\|");
	removeWarning = false;
	}