Skip to content

Instantly share code, notes, and snippets.

@halflearned
Created March 16, 2023 21:12
Show Gist options
  • Save halflearned/e95967b8b9d3098f775ad2d421b63ffb to your computer and use it in GitHub Desktop.
Save halflearned/e95967b8b9d3098f775ad2d421b63ffb to your computer and use it in GitHub Desktop.
Quantile estimation with confidence interval
import numpy as np
from scipy.stats import norm, expon
def quantile_with_ci(x, q, alpha=.05, num_boot=10_000):
n = len(x)
quantile_estimate = np.quantile(x, q)
idx_boot = np.random.randint(n, size=(num_boot, n))
quantile_boot = np.quantile(x[idx_boot], q=q, axis=1)
quantile_se = np.std(quantile_boot)
half_ci = norm.ppf(1 - alpha/2) * quantile_se
quantile_ci = (
quantile_estimate - half_ci,
quantile_estimate + half_ci
)
return quantile_estimate, quantile_se, quantile_ci
# Check!
# True parameter we're trying to enclose with our confidence interval
np.random.seed(1)
q90 = expon.ppf(.9)
coverage = []
for i in range(1000):
if i % 10 == 0:
print(i, end=" ")
# random data from Expon(1) distribution
data = expon.rvs(size=500)
# compute 90-quantile confidence interval as above
q90_est, q90_se, (q90_ci_lower, q90_ci_upper) = quantile_with_ci(data, q=.9)
# Since we're computing a 95% confidence interval,
# it should contain the truth about 95% of the time
coverage.append(
q90_ci_lower < q90 < q90_ci_upper
)
# How often does the 95% CI contain the truth?
# [Should be around 95%, and tend to it as n -> \infty]
print("Coverage:", np.mean(coverage))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment