This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
test_set_Bayes = pd.read_csv("Assignment 2--Training set for Bayes.csv") | |
training_set_Bayes = pd.read_csv("Assignment 2--Test set for Bayes.csv") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def BayesClassifier(training_set,test_set): | |
classAttribute = 'Volume' | |
products = [] | |
max = -math.inf | |
classWithMaxValue = "" | |
for x in training_set[classAttribute].unique(): | |
D = len(training_set[classAttribute].index) | |
d = len(training_set[training_set[classAttribute] == x].index) | |
pClassAttribute = d/D | |
print("********") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def prob_continous_value(A, v, classAttribute, dataset, x): | |
# calcuate the average for all values of A in dataset with class = x | |
a = dataset[dataset[classAttribute] == x][A].mean() | |
# calculate the standard deviation for all values A in dataset with class = x | |
stdev = 1 | |
stdev = dataset[dataset[classAttribute] == x][A].std() | |
v = dataset[A].iloc[0] | |
if stdev == 0.0: | |
stdev = 0.00000000000001 | |
return (1/(math.sqrt(2*math.pi)*stdev))*math.exp(-((v-a)*(v-a))/(2*stdev*stdev)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def calculate_metrics(training_set,test_set,classAttribute,classValue): | |
# calculate the accuracy, error rate, sensitivity, specificity, and precision for the selected classifier in reference to the corresponding test set. | |
tp = len(training_set[training_set[classAttribute] == classValue].index) | |
fp = len(test_set[test_set[classAttribute] == classValue].index) | |
tn = len(training_set[training_set[classAttribute] == classValue].index) | |
fn = len(test_set[test_set[classAttribute] != classValue].index) | |
p = tp + fp | |
n = tn + fn | |
print(f" \t \t\t {classValue} \t not {classValue} \t \t TOTAL") | |
print(f" \t \t\t \t \t \t ") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def pca(s): | |
# Normalize each s | |
A1 = s[['A1']].to_numpy() | |
A2 = s[['A2']].to_numpy() | |
print(A1.ndim) | |
if 'A3' in s: | |
A3 = s[['A3']].to_numpy() | |
A3_norm = A3/np.linalg.norm(A3) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def calculate_correlation(s): | |
s_temp = s[['A1','A3']] | |
correlation = s_temp.corr().iloc[1,0] | |
print("******************************") | |
print(f'Correlation between A1 & A3: {correlation}') | |
print("******************************") | |
# if correlation > 0.6 or correlation < 0.6 remove A3 | |
if correlation > 0.6 or correlation < -0.6: | |
s = s.drop(['A3'], axis=1) | |
print(f'A3 was removed {correlation} > 0.6 or {correlation} < -0.6') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def segmentation_by_natural_partitioning(s): | |
# calculate 5th and 95th percentiles. | |
s_as_array = np.array(s) | |
fith_percentile = np.percentile(s_as_array, 5) | |
nienty_fith_percentile = np.percentile(s_as_array, 95) | |
print(f"range [{s['A2'].max()},{s['A2'].min()}]") | |
print() | |
print("*****************************") | |
print(f'fith_percentile {fith_percentile}') | |
print(f'nienty_fith_percentile {nienty_fith_percentile}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def entropy(s): | |
print("calculating the entropy for s") | |
print("*****************************") | |
print(s) | |
print("*****************************") | |
# initialize ent | |
ent = 0 | |
# calculate the number of classes in s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def maxf(s): | |
return s.max() | |
def minf(s): | |
return s.min() | |
def uniqueValue(s): | |
# are records in s the same? return true | |
if s.nunique()['Class'] == 1: | |
return False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function processResponse(){ | |
if(asyncRequest.readyState==4 && asyncRequest.status==200 && asyncRequest.responseXML){ | |
var emails = asyncRequest.responseXML.getElementsByTagName("email"); | |
var removeWarning = false; | |
for(var i=0; i < emails.length;i++){ | |
removeWarning = true; | |
if(email.value==emails[i].textContent){ | |
addBlackListEmailWarning(email, " |black listed|"); | |
removeWarning = false; | |
} |
NewerOlder