Rob-wine/gist:525494f4ea9370fe830b630bccb885e4

## gistfile1.txt
import numpy as np
import pandas as pd
import random


def bootstrapped_conf_interval(data, metric, num_runs=1000, conf=.95):
   """ purpose: Calculate confidence interval for model performance (metric)

  Parameters
  ----------------
  data: list of data points in a format that is acceptable to get_metric
  metric: options include ‘accuracy’, ‘precision’,’recall’ and ‘f1-score’
  conf: how certain you want to be about the range of possible values of your metrc
  num_runs:  int designating how many bootstrapped samples of data you would like

  Returns
  ----------
  Confidence tuple with floats as entries ex: (.2,.3)
    """

    results = []
    # get num _runs bootstrapped samples of unlabeled and labeled data
    for i in range(num_runs):
        bootstrapped_data = np.random.choice(data, len(data))

        results.append(get_metric(bootstrapped_data, metric))

    results.sort()

    bootstrapped_mean = sum(results) / float(len(results))
    x_bar = get_metric(data, metric)

    # how much of the measured metrics do you want to cut off on either end
    left_index = int(num_runs * (1 - conf) / 2)

    # deviations from bootstrapped means
    delta_interval = [results[left_index] - bootstrapped_mean,
                      results[-left_index] - bootstrapped_mean]
    # deviations from mean from actual sample, not bootstrapped sample
    interval = [delta_interval[0] + x_bar, delta_interval[1] + x_bar]

    return interval
	import numpy as np
	import pandas as pd
	import random


	def bootstrapped_conf_interval(data, metric, num_runs=1000, conf=.95):
	""" purpose: Calculate confidence interval for model performance (metric)

	Parameters
	----------------
	data: list of data points in a format that is acceptable to get_metric
	metric: options include ‘accuracy’, ‘precision’,’recall’ and ‘f1-score’
	conf: how certain you want to be about the range of possible values of your metrc
	num_runs: int designating how many bootstrapped samples of data you would like

	Returns
	----------
	Confidence tuple with floats as entries ex: (.2,.3)
	"""

	results = []
	# get num _runs bootstrapped samples of unlabeled and labeled data
	for i in range(num_runs):
	bootstrapped_data = np.random.choice(data, len(data))

	results.append(get_metric(bootstrapped_data, metric))

	results.sort()

	bootstrapped_mean = sum(results) / float(len(results))
	x_bar = get_metric(data, metric)

	# how much of the measured metrics do you want to cut off on either end
	left_index = int(num_runs * (1 - conf) / 2)

	# deviations from bootstrapped means
	delta_interval = [results[left_index] - bootstrapped_mean,
	results[-left_index] - bootstrapped_mean]
	# deviations from mean from actual sample, not bootstrapped sample
	interval = [delta_interval[0] + x_bar, delta_interval[1] + x_bar]

	return interval