This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# akaike information criterion | |
aic_A = 2*5 - 2*math.log(ml_A) | |
aic_B = 2*5 - 2*math.log(ml_B) | |
print("AIC for Vaccine A: {:.2f}".format(aic_A)) | |
print("AIC for Vaccine B: {:.2f}".format(aic_B)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import numpy as np | |
def f(n): | |
return math.factorial(n) | |
def mle(R, theta): | |
return (f(np.sum(R)) / (np.prod([f(r) for r in R]))) * \ | |
np.prod([theta[i]**R[i] for i in range(len(R))]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# count of stars in reviews | |
R_A = [10,6,10,27,109] | |
R_B = [57,33,29,45,246] | |
# maximum likelihood estimators | |
theta_A = R_A / np.sum(R_A) | |
theta_B = R_B / np.sum(R_B) | |
print("MLE for Product A: ", np.round(theta_A,3)) | |
print("MLE for Product B: ", np.round(theta_B,3)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
def running_predictions(prediction_dataset, targets): | |
n_trees = prediction_dataset.shape[1] | |
running_percent_1s = np.cumsum(prediction_dataset, axis=1)/np.arange(1,n_trees+1) | |
running_conclusions = running_percent_1s > 0.5 | |
running_correctnesss = running_conclusions == targets.reshape(-1,1) | |
return np.mean(running_correctnesss, axis=0) | |
def plot_bagging_predictions(bagging_size, bagging_train, y_train, bagging_test, y_test, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import accuracy_score | |
def run_bagging(X_train, y_train, X_test, y_test, tree_depth, bagging_size=45): | |
bagging_train = np.zeros((X_train.shape[0], bagging_size)) | |
bagging_test = np.zeros((X_test.shape[0], bagging_size)) | |
trees = [] | |
for i in range(bagging_size): | |
X_train_bootstrap, y_train_bootstrap = resample(X_train, y_train) | |
tree = DecisionTreeClassifier(max_depth=tree_depth).fit(X_train_bootstrap, | |
y_train_bootstrap) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.ensemble import RandomForestClassifier | |
import numpy as np | |
import pandas as pd | |
def run_single_tree(X_train, y_train, X_test, y_test, depth): | |
model = DecisionTreeClassifier(max_depth=depth).fit(X_train, y_train) | |
accuracy_train = model.score(X_train, y_train) | |
accuracy_test = model.score(X_test, y_test) | |
print('Single tree depth: ', depth) | |
print('Accuracy, Training Set: ', round(accuracy_train*100,5), '%') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import keras | |
from keras.models import Sequential | |
from keras.layers import Dense | |
from keras.optimizers import SGD | |
from sklearn.metrics import r2_score | |
# neural network with 1 neuron and a linear activation | |
model = Sequential() | |
model.add(Dense(1, activation = 'linear', input_dim = 1)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
# Let's create a sample line | |
x = np.sort(np.random.uniform(-1.5, 4.0, 100)) | |
y = -0.2*x + 0.9 | |
# Let's add normal noise to the line | |
y = y + np.random.normal(0, 1, 100) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def disparate_impact(B, m, fairness_threshold=.8): | |
if len(B) != len(m): | |
raise ValueError('Input arrays do not have same number of entries') | |
# "positive class" are those where predictions = Charge Off | |
# "majority class" are those where protected class status = 1 | |
indices_pos_class, = np.where(B == 1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def statistical_parity(B, m, fairness_threshold=.01): | |
if len(B) != len(m): | |
raise ValueError('Input arrays do not have same number of entries') | |
indices_pos_class, = np.where(B == 1) | |
indices_neg_class, = np.where(B == 0) |