Last active
July 2, 2022 23:27
-
-
Save Rob-wine/525494f4ea9370fe830b630bccb885e4 to your computer and use it in GitHub Desktop.
Bootstrapping in Pandas example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import random | |
def bootstrapped_conf_interval(data, metric, num_runs=1000, conf=.95): | |
""" purpose: Calculate confidence interval for model performance (metric) | |
Parameters | |
---------------- | |
data: list of data points in a format that is acceptable to get_metric | |
metric: options include ‘accuracy’, ‘precision’,’recall’ and ‘f1-score’ | |
conf: how certain you want to be about the range of possible values of your metrc | |
num_runs: int designating how many bootstrapped samples of data you would like | |
Returns | |
---------- | |
Confidence tuple with floats as entries ex: (.2,.3) | |
""" | |
results = [] | |
# get num _runs bootstrapped samples of unlabeled and labeled data | |
for i in range(num_runs): | |
bootstrapped_data = np.random.choice(data, len(data)) | |
results.append(get_metric(bootstrapped_data, metric)) | |
results.sort() | |
bootstrapped_mean = sum(results) / float(len(results)) | |
x_bar = get_metric(data, metric) | |
# how much of the measured metrics do you want to cut off on either end | |
left_index = int(num_runs * (1 - conf) / 2) | |
# deviations from bootstrapped means | |
delta_interval = [results[left_index] - bootstrapped_mean, | |
results[-left_index] - bootstrapped_mean] | |
# deviations from mean from actual sample, not bootstrapped sample | |
interval = [delta_interval[0] + x_bar, delta_interval[1] + x_bar] | |
return interval |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Bootstrapping example for "What is Bootstrapping Article."