Skip to content

Instantly share code, notes, and snippets.

View EvanGertis's full-sized avatar
crush it.

Evan Gertis EvanGertis

crush it.
View GitHub Profile
test_set_Bayes = pd.read_csv("Assignment 2--Training set for Bayes.csv")
training_set_Bayes = pd.read_csv("Assignment 2--Test set for Bayes.csv")
@EvanGertis
EvanGertis / BayesClassifier
Created January 16, 2022 15:10
BayesianClassifier
def BayesClassifier(training_set,test_set):
classAttribute = 'Volume'
products = []
max = -math.inf
classWithMaxValue = ""
for x in training_set[classAttribute].unique():
D = len(training_set[classAttribute].index)
d = len(training_set[training_set[classAttribute] == x].index)
pClassAttribute = d/D
print("********")
def prob_continous_value(A, v, classAttribute, dataset, x):
# calcuate the average for all values of A in dataset with class = x
a = dataset[dataset[classAttribute] == x][A].mean()
# calculate the standard deviation for all values A in dataset with class = x
stdev = 1
stdev = dataset[dataset[classAttribute] == x][A].std()
v = dataset[A].iloc[0]
if stdev == 0.0:
stdev = 0.00000000000001
return (1/(math.sqrt(2*math.pi)*stdev))*math.exp(-((v-a)*(v-a))/(2*stdev*stdev))
@EvanGertis
EvanGertis / calculate_metrics
Created January 16, 2022 15:08
Calculates the metrics for a Bayesian classifier
def calculate_metrics(training_set,test_set,classAttribute,classValue):
# calculate the accuracy, error rate, sensitivity, specificity, and precision for the selected classifier in reference to the corresponding test set.
tp = len(training_set[training_set[classAttribute] == classValue].index)
fp = len(test_set[test_set[classAttribute] == classValue].index)
tn = len(training_set[training_set[classAttribute] == classValue].index)
fn = len(test_set[test_set[classAttribute] != classValue].index)
p = tp + fp
n = tn + fn
print(f" \t \t\t {classValue} \t not {classValue} \t \t TOTAL")
print(f" \t \t\t \t \t \t ")
def pca(s):
# Normalize each s
A1 = s[['A1']].to_numpy()
A2 = s[['A2']].to_numpy()
print(A1.ndim)
if 'A3' in s:
A3 = s[['A3']].to_numpy()
A3_norm = A3/np.linalg.norm(A3)
def calculate_correlation(s):
s_temp = s[['A1','A3']]
correlation = s_temp.corr().iloc[1,0]
print("******************************")
print(f'Correlation between A1 & A3: {correlation}')
print("******************************")
# if correlation > 0.6 or correlation < 0.6 remove A3
if correlation > 0.6 or correlation < -0.6:
s = s.drop(['A3'], axis=1)
print(f'A3 was removed {correlation} > 0.6 or {correlation} < -0.6')
​​def segmentation_by_natural_partitioning(s):
# calculate 5th and 95th percentiles.
s_as_array = np.array(s)
fith_percentile = np.percentile(s_as_array, 5)
nienty_fith_percentile = np.percentile(s_as_array, 95)
print(f"range [{s['A2'].max()},{s['A2'].min()}]")
print()
print("*****************************")
print(f'fith_percentile {fith_percentile}')
print(f'nienty_fith_percentile {nienty_fith_percentile}')
def entropy(s):
print("calculating the entropy for s")
print("*****************************")
print(s)
print("*****************************")
# initialize ent
ent = 0
# calculate the number of classes in s
def maxf(s):
return s.max()
def minf(s):
return s.min()
def uniqueValue(s):
# are records in s the same? return true
if s.nunique()['Class'] == 1:
return False
function processResponse(){
if(asyncRequest.readyState==4 && asyncRequest.status==200 && asyncRequest.responseXML){
var emails = asyncRequest.responseXML.getElementsByTagName("email");
var removeWarning = false;
for(var i=0; i < emails.length;i++){
removeWarning = true;
if(email.value==emails[i].textContent){
addBlackListEmailWarning(email, " |black listed|");
removeWarning = false;
}