Dpananos/optimism_corrected_calibration.py

## optimism_corrected_calibration.py
import numpy as np
from statsmodels.nonparametric.smoothers_lowess import lowess

from sklearn.datasets import load_breast_cancer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, RepeatedKFold, GridSearchCV, cross_val_score
from sklearn.metrics import make_scorer, brier_score_loss
from sklearn.utils import resample

import matplotlib.pyplot as plt

# Load in the data to be used.
data = load_breast_cancer()
X, y = data['data'], data['target']

# The model we will be using will be an L2 penalized logistic regression.
# We will have to select the appropriate penalty strength, and that requires cross validation.
# There are some convergence issues, so pick a stupid large max iteration

pipeline_components = [
    ('scaling',StandardScaler()),
    ('logistic_regression', LogisticRegression(penalty='l2', max_iter = 1_000_000))
]

a_model = Pipeline(pipeline_components)

param_grid = {'logistic_regression__C': 1/np.logspace(-2, 2, base = np.exp(1))}

# Choose a proper scoring rule for model selection
brier_score = make_scorer(brier_score_loss, needs_proba=True, pos_label=1)

# According to Frank Harrell, 100 repeats of 10-fold CV is about as good as optimism corrected bootstrap
outer_cv = RepeatedKFold(n_splits=10,  n_repeats=100)

# We will choose our regularization strength through 10 fold cv
inner_cv = KFold(n_splits=10)

gscv = GridSearchCV(model, param_grid=param_grid, cv = inner_cv, scoring = brier_score, verbose=0)

# Now let's estimate how well a model selected via 10 fold cv would do by using repeated cv
# This is going to take a long time.
if False:
    cv_results = cross_val_score(gscv, X, y, cv=outer_cv, scoring = brier_score)
    print(cv_results.mean())


# Construct the apparent calibration
prange = np.linspace(0, 1, 25)

# Fit our model on all the data
best_model = gscv.fit(X, y).best_estimator_

# Estimate the risks from the best model
predicted_p = best_model.predict_proba(X)[:, 1]

# Compute the apparent calibration
apparent_cal = lowess(y, predicted_p, it=0, xvals=prange)

# To the Optimism Corrected Bootstrap for the Calibration Curve


nsim = 500
optimism = np.zeros((nsim, prange.size))

for i in range(nsim):
    # Bootstrap the original dataset
    Xb, yb = resample(X, y)

    # Fit the model, including the hyperparameter selection, on the bootstrapped data
    fit = gscv.fit(Xb, yb).best_estimator_

    # Get the risk estimates from the model fit on the bootstrapped predictions
    predicted_risk_bs = fit.predict_proba(Xb)[:, 1]

    # Fit a calibration curve to the predicted risk on bootstrapped data
    smooth_p_bs = lowess(yb, predicted_risk_bs, it=0, xvals=prange)

    # Apply the bootstrap model on the original data
    predicted_risk_orig = fit.predict_proba(X)[:, 1]

    # Fit a calibration curve on the original data using predictions from bootstrapped model
    smooth_p_bs_orig = lowess(y, predicted_risk_orig, it=0, xvals=prange)


    optimism[i] = smooth_p_bs - smooth_p_bs_orig

# Here is the bias corrected calibration curve
bias_corrected_cal = apparent_cal - optimism.mean(0)

# and finally the plot

fig, ax = plt.subplots(dpi=240)
ax.set_aspect('equal')
plt.scatter(predicted_p, y, s=1, alpha = 0.4, c='k')
plt.plot(prange, apparent_cal, 'red', label = 'Non-Parametric Estimate')
plt.plot([0, 1], [0, 1], 'k--', label = 'Perfect Calibration')
plt.plot(prange, bias_corrected_cal, 'C0', label = 'Optimism Corrected Calibration')
plt.legend(loc = 'upper left', prop = {'size':6})
bias_corrected_cal = apparent_cal - optimism.mean(0)
	import numpy as np
	from statsmodels.nonparametric.smoothers_lowess import lowess

	from sklearn.datasets import load_breast_cancer
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import StandardScaler
	from sklearn.linear_model import LogisticRegression
	from sklearn.model_selection import KFold, RepeatedKFold, GridSearchCV, cross_val_score
	from sklearn.metrics import make_scorer, brier_score_loss
	from sklearn.utils import resample

	import matplotlib.pyplot as plt

	# Load in the data to be used.
	data = load_breast_cancer()
	X, y = data['data'], data['target']

	# The model we will be using will be an L2 penalized logistic regression.
	# We will have to select the appropriate penalty strength, and that requires cross validation.
	# There are some convergence issues, so pick a stupid large max iteration

	pipeline_components = [
	('scaling',StandardScaler()),
	('logistic_regression', LogisticRegression(penalty='l2', max_iter = 1_000_000))
	]

	a_model = Pipeline(pipeline_components)

	param_grid = {'logistic_regression__C': 1/np.logspace(-2, 2, base = np.exp(1))}

	# Choose a proper scoring rule for model selection
	brier_score = make_scorer(brier_score_loss, needs_proba=True, pos_label=1)

	# According to Frank Harrell, 100 repeats of 10-fold CV is about as good as optimism corrected bootstrap
	outer_cv = RepeatedKFold(n_splits=10, n_repeats=100)

	# We will choose our regularization strength through 10 fold cv
	inner_cv = KFold(n_splits=10)

	gscv = GridSearchCV(model, param_grid=param_grid, cv = inner_cv, scoring = brier_score, verbose=0)

	# Now let's estimate how well a model selected via 10 fold cv would do by using repeated cv
	# This is going to take a long time.
	if False:
	cv_results = cross_val_score(gscv, X, y, cv=outer_cv, scoring = brier_score)
	print(cv_results.mean())



	# Construct the apparent calibration
	prange = np.linspace(0, 1, 25)

	# Fit our model on all the data
	best_model = gscv.fit(X, y).best_estimator_

	# Estimate the risks from the best model
	predicted_p = best_model.predict_proba(X)[:, 1]

	# Compute the apparent calibration
	apparent_cal = lowess(y, predicted_p, it=0, xvals=prange)

	# To the Optimism Corrected Bootstrap for the Calibration Curve


	nsim = 500
	optimism = np.zeros((nsim, prange.size))

	for i in range(nsim):
	# Bootstrap the original dataset
	Xb, yb = resample(X, y)

	# Fit the model, including the hyperparameter selection, on the bootstrapped data
	fit = gscv.fit(Xb, yb).best_estimator_

	# Get the risk estimates from the model fit on the bootstrapped predictions
	predicted_risk_bs = fit.predict_proba(Xb)[:, 1]

	# Fit a calibration curve to the predicted risk on bootstrapped data
	smooth_p_bs = lowess(yb, predicted_risk_bs, it=0, xvals=prange)

	# Apply the bootstrap model on the original data
	predicted_risk_orig = fit.predict_proba(X)[:, 1]

	# Fit a calibration curve on the original data using predictions from bootstrapped model
	smooth_p_bs_orig = lowess(y, predicted_risk_orig, it=0, xvals=prange)


	optimism[i] = smooth_p_bs - smooth_p_bs_orig

	# Here is the bias corrected calibration curve
	bias_corrected_cal = apparent_cal - optimism.mean(0)

	# and finally the plot

	fig, ax = plt.subplots(dpi=240)
	ax.set_aspect('equal')
	plt.scatter(predicted_p, y, s=1, alpha = 0.4, c='k')
	plt.plot(prange, apparent_cal, 'red', label = 'Non-Parametric Estimate')
	plt.plot([0, 1], [0, 1], 'k--', label = 'Perfect Calibration')
	plt.plot(prange, bias_corrected_cal, 'C0', label = 'Optimism Corrected Calibration')
	plt.legend(loc = 'upper left', prop = {'size':6})
	bias_corrected_cal = apparent_cal - optimism.mean(0)