roncho12/bootstrap_estimate_and_ci.py

## bootstrap_estimate_and_ci.py
import numpy as np
from mlxtend.evaluate import bootstrap_point632_score


def bootstrap_estimate_and_ci(estimator, X, y, scoring_func=None, random_seed=0,
                              method='.632', alpha=0.05, n_splits=200):
    scores = bootstrap_point632_score(estimator, X, y, scoring_func=scoring_func,
                                      n_splits=n_splits, random_seed=random_seed,
                                      method=method)
    estimate = np.mean(scores)
    lower_bound = np.percentile(scores, 100*(alpha/2))
    upper_bound = np.percentile(scores, 100*(1-alpha/2))
    stderr = np.std(scores)

    return estimate, lower_bound, upper_bound, stderr


#================#
#    Examples    #
#================#
from sklearn.base import clone
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import recall_score, f1_score, precision_score, roc_auc_score


X, y = make_classification(n_redundant=0)
estimator = LogisticRegression(solver='lbfgs')

# Calculate a bootstrap estimate for accuracy and a 95% confidence interval
est, low, up, stderr = bootstrap_estimate_and_ci(estimator, X, y)

print(f"estimate: {est:.2f}, confidence interval: [{low:.2f}, {up:.2f}], "
      f"standard error: {stderr:.2f}")

# Calculate a bootstrap estimate for recall and a 95% confidence interval
est, low, up, stderr = bootstrap_estimate_and_ci(estimator, X, y,
                                                 scoring_func=recall_score)

# Calculate a bootstrap estimate for precision and a 99% confidence interval
est, low, up, stderr = bootstrap_estimate_and_ci(estimator, X, y,
                                                 scoring_func=precision_score,
                                                 alpha=0.01)

# Calculate a bootstrap estimate for f1-score and a 90% confidence interval
est, low, up, stderr = bootstrap_estimate_and_ci(estimator, X, y,
                                                 scoring_func=f1_score,
                                                 alpha=0.1)

# Calculate a bootstrap estimate for ROC AUC and a 95% confidence interval
# It's a hack, but it's short and simple.
cloned_estimator = clone(estimator)
cloned_estimator.predict = cloned_estimator.decision_function
est, low, up, stderr = bootstrap_estimate_and_ci(cloned_estimator, X, y,
                                                 scoring_func=roc_auc_score)
	import numpy as np
	from mlxtend.evaluate import bootstrap_point632_score


	def bootstrap_estimate_and_ci(estimator, X, y, scoring_func=None, random_seed=0,
	method='.632', alpha=0.05, n_splits=200):
	scores = bootstrap_point632_score(estimator, X, y, scoring_func=scoring_func,
	n_splits=n_splits, random_seed=random_seed,
	method=method)
	estimate = np.mean(scores)
	lower_bound = np.percentile(scores, 100*(alpha/2))
	upper_bound = np.percentile(scores, 100*(1-alpha/2))
	stderr = np.std(scores)

	return estimate, lower_bound, upper_bound, stderr


	#================#
	# Examples #
	#================#
	from sklearn.base import clone
	from sklearn.datasets import make_classification
	from sklearn.linear_model import LogisticRegression
	from sklearn.metrics import recall_score, f1_score, precision_score, roc_auc_score


	X, y = make_classification(n_redundant=0)
	estimator = LogisticRegression(solver='lbfgs')

	# Calculate a bootstrap estimate for accuracy and a 95% confidence interval
	est, low, up, stderr = bootstrap_estimate_and_ci(estimator, X, y)

	print(f"estimate: {est:.2f}, confidence interval: [{low:.2f}, {up:.2f}], "
	f"standard error: {stderr:.2f}")

	# Calculate a bootstrap estimate for recall and a 95% confidence interval
	est, low, up, stderr = bootstrap_estimate_and_ci(estimator, X, y,
	scoring_func=recall_score)

	# Calculate a bootstrap estimate for precision and a 99% confidence interval
	est, low, up, stderr = bootstrap_estimate_and_ci(estimator, X, y,
	scoring_func=precision_score,
	alpha=0.01)

	# Calculate a bootstrap estimate for f1-score and a 90% confidence interval
	est, low, up, stderr = bootstrap_estimate_and_ci(estimator, X, y,
	scoring_func=f1_score,
	alpha=0.1)

	# Calculate a bootstrap estimate for ROC AUC and a 95% confidence interval
	# It's a hack, but it's short and simple.
	cloned_estimator = clone(estimator)
	cloned_estimator.predict = cloned_estimator.decision_function
	est, low, up, stderr = bootstrap_estimate_and_ci(cloned_estimator, X, y,
	scoring_func=roc_auc_score)