This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def confidence_intervals(data, confidence_level=0.99): | |
low_end = (1 - confidence_level) / 2 | |
high_end = 1 - low_end | |
bottom_percentile = np.round(data.iloc[:, 0].quantile(low_end), 4) | |
top_percentile = np.round(data.iloc[:, 0].quantile(high_end), 4) | |
print('The {}% confidence interval is [{}, {}]'.format( | |
confidence_level * 100, bottom_percentile, top_percentile)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def bootstrap(data, col, st_dev=False, rep=1000): | |
if not st_dev: | |
means = [] | |
n = len(data) | |
for i in range(rep): | |
sample = data.sample(n=n, replace=True) | |
mean = sample[col].mean() | |
means.append(mean) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def permutation_test(control, treatment, alpha, r=1000): | |
""" | |
Runs a permutation test to check whether the difference in means | |
between control and treatment is statistically significant. | |
Parameters: | |
control: pd.Series | |
A pandas series with all the control (A) observations | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import numpy as np | |
import pandas as pd | |
import random | |
from datetime import date | |
def expected(A, B): | |
""" | |
Calculate expected score of team A in a match against team B |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plot_learning_curves(estimator, X_train, y_train, X_val, y_val, | |
suptitle='', title='', xlabel='', ylabel=''): | |
""" | |
Plots learning curves for a given estimator. | |
Parameters | |
---------- | |
estimator : sklearn estimator | |
X_train : pd.DataFrame |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# convert X and y to numpy arrays | |
X = X.as_matrix() | |
y = y.as_matrix() | |
# create stratified k-fold split generators for inner and outer loops | |
outer_kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=12) | |
inner_kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=12) | |
# set up hyperparameter tuning | |
Cs = 10.0 ** np.arange(-4,3) |