Michel Kana michelkana

## gist:f03cbea2f0c701690649dd3d41e3ace4
# akaike information criterion
aic_A = 2*5 - 2*math.log(ml_A)
aic_B = 2*5 - 2*math.log(ml_B)

print("AIC for Vaccine A: {:.2f}".format(aic_A))
print("AIC for Vaccine B: {:.2f}".format(aic_B))

## ml_products.py
import math
import numpy as np

def f(n):
  return math.factorial(n)

def mle(R, theta):
  return (f(np.sum(R)) / (np.prod([f(r) for r in R]))) * \
         np.prod([theta[i]**R[i] for i in range(len(R))])

## mle_products.py
# count of stars in reviews
R_A = [10,6,10,27,109]
R_B = [57,33,29,45,246]

# maximum likelihood estimators
theta_A = R_A / np.sum(R_A)
theta_B = R_B / np.sum(R_B)

print("MLE for Product A: ", np.round(theta_A,3))
print("MLE for Product B: ", np.round(theta_B,3))

## bagging_model_size.py
import matplotlib.pyplot as plt

def running_predictions(prediction_dataset, targets):
    n_trees = prediction_dataset.shape[1]
    running_percent_1s = np.cumsum(prediction_dataset, axis=1)/np.arange(1,n_trees+1)
    running_conclusions = running_percent_1s > 0.5
    running_correctnesss = running_conclusions == targets.reshape(-1,1)
    return np.mean(running_correctnesss, axis=0)

def plot_bagging_predictions(bagging_size, bagging_train, y_train, bagging_test, y_test,

## bagging_model.py
from sklearn.metrics import accuracy_score

def run_bagging(X_train, y_train, X_test, y_test, tree_depth, bagging_size=45):
    bagging_train = np.zeros((X_train.shape[0], bagging_size))
    bagging_test = np.zeros((X_test.shape[0], bagging_size))
    trees = []
    for i in range(bagging_size):
        X_train_bootstrap, y_train_bootstrap = resample(X_train, y_train)
        tree = DecisionTreeClassifier(max_depth=tree_depth).fit(X_train_bootstrap,
                                                                y_train_bootstrap)

## bagging_single_tree_overfit.py
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import pandas as pd

def run_single_tree(X_train, y_train, X_test, y_test, depth):
    model = DecisionTreeClassifier(max_depth=depth).fit(X_train, y_train)
    accuracy_train = model.score(X_train, y_train)
    accuracy_test = model.score(X_test, y_test)
    print('Single tree depth: ', depth)
    print('Accuracy, Training Set: ', round(accuracy_train*100,5), '%')

## linreg_keras.py
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from sklearn.metrics import r2_score

# neural network with 1 neuron and a linear activation
model = Sequential()
model.add(Dense(1, activation = 'linear', input_dim = 1))

## sample_line.py
import numpy as np
import matplotlib.pyplot as plt

# Let's create a sample line
x = np.sort(np.random.uniform(-1.5, 4.0, 100))
y = -0.2*x + 0.9

# Let's add normal noise to the line
y = y + np.random.normal(0, 1, 100)

## ml_fairness_check2.py
import numpy as np

def disparate_impact(B, m, fairness_threshold=.8):
    if len(B) != len(m):
        raise ValueError('Input arrays do not have same number of entries')

    # "positive class" are those where predictions = Charge Off
    # "majority class" are those where protected class status = 1

    indices_pos_class, = np.where(B == 1)

## ml_fairness_check1.py
import numpy as np

def statistical_parity(B, m, fairness_threshold=.01):

    if len(B) != len(m):
        raise ValueError('Input arrays do not have same number of entries')

    indices_pos_class, = np.where(B == 1)
    indices_neg_class, = np.where(B == 0)
	# akaike information criterion
	aic_A = 25 - 2math.log(ml_A)
	aic_B = 25 - 2math.log(ml_B)

	print("AIC for Vaccine A: {:.2f}".format(aic_A))
	print("AIC for Vaccine B: {:.2f}".format(aic_B))
	import math
	import numpy as np

	def f(n):
	return math.factorial(n)

	def mle(R, theta):
	return (f(np.sum(R)) / (np.prod([f(r) for r in R]))) * \
	np.prod([theta[i]**R[i] for i in range(len(R))])
	# count of stars in reviews
	R_A = [10,6,10,27,109]
	R_B = [57,33,29,45,246]

	# maximum likelihood estimators
	theta_A = R_A / np.sum(R_A)
	theta_B = R_B / np.sum(R_B)

	print("MLE for Product A: ", np.round(theta_A,3))
	print("MLE for Product B: ", np.round(theta_B,3))
	import matplotlib.pyplot as plt

	def running_predictions(prediction_dataset, targets):
	n_trees = prediction_dataset.shape[1]
	running_percent_1s = np.cumsum(prediction_dataset, axis=1)/np.arange(1,n_trees+1)
	running_conclusions = running_percent_1s > 0.5
	running_correctnesss = running_conclusions == targets.reshape(-1,1)
	return np.mean(running_correctnesss, axis=0)

	def plot_bagging_predictions(bagging_size, bagging_train, y_train, bagging_test, y_test,
	from sklearn.metrics import accuracy_score

	def run_bagging(X_train, y_train, X_test, y_test, tree_depth, bagging_size=45):
	bagging_train = np.zeros((X_train.shape[0], bagging_size))
	bagging_test = np.zeros((X_test.shape[0], bagging_size))
	trees = []
	for i in range(bagging_size):
	X_train_bootstrap, y_train_bootstrap = resample(X_train, y_train)
	tree = DecisionTreeClassifier(max_depth=tree_depth).fit(X_train_bootstrap,
	y_train_bootstrap)
	from sklearn.ensemble import RandomForestClassifier
	import numpy as np
	import pandas as pd

	def run_single_tree(X_train, y_train, X_test, y_test, depth):
	model = DecisionTreeClassifier(max_depth=depth).fit(X_train, y_train)
	accuracy_train = model.score(X_train, y_train)
	accuracy_test = model.score(X_test, y_test)
	print('Single tree depth: ', depth)
	print('Accuracy, Training Set: ', round(accuracy_train*100,5), '%')
	import keras
	from keras.models import Sequential
	from keras.layers import Dense
	from keras.optimizers import SGD
	from sklearn.metrics import r2_score

	# neural network with 1 neuron and a linear activation
	model = Sequential()
	model.add(Dense(1, activation = 'linear', input_dim = 1))
	import numpy as np
	import matplotlib.pyplot as plt

	# Let's create a sample line
	x = np.sort(np.random.uniform(-1.5, 4.0, 100))
	y = -0.2*x + 0.9

	# Let's add normal noise to the line
	y = y + np.random.normal(0, 1, 100)
	import numpy as np

	def disparate_impact(B, m, fairness_threshold=.8):
	if len(B) != len(m):
	raise ValueError('Input arrays do not have same number of entries')

	# "positive class" are those where predictions = Charge Off
	# "majority class" are those where protected class status = 1

	indices_pos_class, = np.where(B == 1)
	import numpy as np

	def statistical_parity(B, m, fairness_threshold=.01):

	if len(B) != len(m):
	raise ValueError('Input arrays do not have same number of entries')

	indices_pos_class, = np.where(B == 1)
	indices_neg_class, = np.where(B == 0)