Skip to content

Instantly share code, notes, and snippets.

@JohnDeJesus22
JohnDeJesus22 / jb_scratch.py
Created August 5, 2022 02:13
jb_scratch.py
def jb_test_statistic(column):
"""
Function for creating JB test statistic equation from pandas dataframe column
"""
# have all nulls removed, get the data length, and convert column values to an array
column = column.dropna()
n = column.shape[0]
column_values = column.values
@JohnDeJesus22
JohnDeJesus22 / jb_each_year.py
Created August 5, 2022 01:47
jb_each_year.py
# apply Jarque-Bera Test to baby_counts for each year
years = baby_names["Year of Birth"].sort_values().unique().tolist()
for year in years:
baby_year = baby_names[baby_names["Year of Birth"] == year]
jb_results = jarque_bera(baby_year["Count"])
print(f"Jarque-Bera Count Results for {year}: {jb_results}")
@JohnDeJesus22
JohnDeJesus22 / jb-applied.py
Created August 4, 2022 03:49
jb_applied.py
# compute jarque_bera for Counts column
jarque_bera(baby_names["Count"])
@JohnDeJesus22
JohnDeJesus22 / jb_set_up.py
Last active August 5, 2022 02:55
jb_set_up.py
# import libraries
import pandas as pd
from scipy.stats import jarque_bera
# load data
baby_names = pd.read_csv("Popular_Baby_Names.csv")
# inspect data
baby_names.info()
@JohnDeJesus22
JohnDeJesus22 / fmi.py
Created July 19, 2020 03:49
fowlkes-mallows index
recall = cm[1,1]/(cm[1,1] + cm[1,0])
precision = cm[1,1]/(cm[1,1] + cm[0, 1])
fmi = sqrt(precision * recall)
@JohnDeJesus22
JohnDeJesus22 / prevalence_threshold.py
Created July 19, 2020 03:47
prevalence threshold
recall = cm[1,1]/(cm[1,1] + cm[1,0])
specificity = cm[0,0]/(cm[0,0] + cm[0,1])
numerator = sqrt(recall * (-specificity+1)) + specificity - 1
denominator = recall + specificity - 1
prevalence_threshold = numerator/denominator
@JohnDeJesus22
JohnDeJesus22 / mcc.py
Created July 19, 2020 03:44
Matthew's correlation coefficient
numerator = (cm[1,1] * cm[0,0]) - (cm[0,1] * cm[1,0])
denominator = sqrt((cm[1,1] + cm[0,1]) * (cm[1,1] + cm[1,0]) * (cm[0,0] + cm[0,1]) * (cm[0,0] + cm[1,0]))
mcc = numerator/denominator
precision = cm[1, 1]/(cm[1,1] + cm[0, 1])
negative_predictive_value = cm[0, 0]/(cm[0,0] + cm[1,0])
markedness = precision + negative_predictive_value - 1
recall = cm[1, 1]/(cm[1,1] + cm[1,0])
specificity = cm[0, 0]/(cm[0,0] + cm[0,1])
informedness = recall + specificity - 1
precision = cm[1,1]/(cm[1,1] + cm[0,1])
recall = cm[1,1]/(cm[1,1] + cm[1,0])
numerator = precision*recall
denominator = precision + recall
f1_score = 2* numerator/denominator