Skip to content

Instantly share code, notes, and snippets.

View michelkana's full-sized avatar

Michel Kana michelkana

View GitHub Profile
# akaike information criterion
aic_A = 2*5 - 2*math.log(ml_A)
aic_B = 2*5 - 2*math.log(ml_B)
print("AIC for Vaccine A: {:.2f}".format(aic_A))
print("AIC for Vaccine B: {:.2f}".format(aic_B))
import math
import numpy as np
def f(n):
return math.factorial(n)
def mle(R, theta):
return (f(np.sum(R)) / (np.prod([f(r) for r in R]))) * \
np.prod([theta[i]**R[i] for i in range(len(R))])
# count of stars in reviews
R_A = [10,6,10,27,109]
R_B = [57,33,29,45,246]
# maximum likelihood estimators
theta_A = R_A / np.sum(R_A)
theta_B = R_B / np.sum(R_B)
print("MLE for Product A: ", np.round(theta_A,3))
print("MLE for Product B: ", np.round(theta_B,3))
import matplotlib.pyplot as plt
def running_predictions(prediction_dataset, targets):
n_trees = prediction_dataset.shape[1]
running_percent_1s = np.cumsum(prediction_dataset, axis=1)/np.arange(1,n_trees+1)
running_conclusions = running_percent_1s > 0.5
running_correctnesss = running_conclusions == targets.reshape(-1,1)
return np.mean(running_correctnesss, axis=0)
def plot_bagging_predictions(bagging_size, bagging_train, y_train, bagging_test, y_test,
from sklearn.metrics import accuracy_score
def run_bagging(X_train, y_train, X_test, y_test, tree_depth, bagging_size=45):
bagging_train = np.zeros((X_train.shape[0], bagging_size))
bagging_test = np.zeros((X_test.shape[0], bagging_size))
trees = []
for i in range(bagging_size):
X_train_bootstrap, y_train_bootstrap = resample(X_train, y_train)
tree = DecisionTreeClassifier(max_depth=tree_depth).fit(X_train_bootstrap,
y_train_bootstrap)
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import pandas as pd
def run_single_tree(X_train, y_train, X_test, y_test, depth):
model = DecisionTreeClassifier(max_depth=depth).fit(X_train, y_train)
accuracy_train = model.score(X_train, y_train)
accuracy_test = model.score(X_test, y_test)
print('Single tree depth: ', depth)
print('Accuracy, Training Set: ', round(accuracy_train*100,5), '%')
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from sklearn.metrics import r2_score
# neural network with 1 neuron and a linear activation
model = Sequential()
model.add(Dense(1, activation = 'linear', input_dim = 1))
import numpy as np
import matplotlib.pyplot as plt
# Let's create a sample line
x = np.sort(np.random.uniform(-1.5, 4.0, 100))
y = -0.2*x + 0.9
# Let's add normal noise to the line
y = y + np.random.normal(0, 1, 100)
import numpy as np
def disparate_impact(B, m, fairness_threshold=.8):
if len(B) != len(m):
raise ValueError('Input arrays do not have same number of entries')
# "positive class" are those where predictions = Charge Off
# "majority class" are those where protected class status = 1
indices_pos_class, = np.where(B == 1)
import numpy as np
def statistical_parity(B, m, fairness_threshold=.01):
if len(B) != len(m):
raise ValueError('Input arrays do not have same number of entries')
indices_pos_class, = np.where(B == 1)
indices_neg_class, = np.where(B == 0)