halflearned/quantile_with_ci.py

## quantile_with_ci.py
import numpy as np
from scipy.stats import norm, expon

def quantile_with_ci(x, q, alpha=.05, num_boot=10_000):
    n = len(x)
    quantile_estimate = np.quantile(x, q)
    idx_boot = np.random.randint(n, size=(num_boot, n))
    quantile_boot = np.quantile(x[idx_boot], q=q, axis=1)
    quantile_se = np.std(quantile_boot)
    half_ci = norm.ppf(1 - alpha/2) * quantile_se
    quantile_ci = (
        quantile_estimate - half_ci,
        quantile_estimate + half_ci
    )
    return quantile_estimate, quantile_se, quantile_ci

# Check!
# True parameter we're trying to enclose with our confidence interval
np.random.seed(1)
q90 = expon.ppf(.9)
coverage = []
for i in range(1000):
    if i % 10 == 0:
        print(i, end=" ")
    # random data from Expon(1) distribution
    data = expon.rvs(size=500)

    # compute 90-quantile confidence interval as above
    q90_est, q90_se, (q90_ci_lower, q90_ci_upper) = quantile_with_ci(data, q=.9)

    # Since we're computing a 95% confidence interval,
    # it should contain the truth about 95% of the time
    coverage.append(
        q90_ci_lower < q90 < q90_ci_upper
    )

# How often does the 95% CI contain the truth?
# [Should be around 95%, and tend to it as n -> \infty]
print("Coverage:", np.mean(coverage))
	import numpy as np
	from scipy.stats import norm, expon

	def quantile_with_ci(x, q, alpha=.05, num_boot=10_000):
	n = len(x)
	quantile_estimate = np.quantile(x, q)
	idx_boot = np.random.randint(n, size=(num_boot, n))
	quantile_boot = np.quantile(x[idx_boot], q=q, axis=1)
	quantile_se = np.std(quantile_boot)
	half_ci = norm.ppf(1 - alpha/2) * quantile_se
	quantile_ci = (
	quantile_estimate - half_ci,
	quantile_estimate + half_ci
	)
	return quantile_estimate, quantile_se, quantile_ci

	# Check!
	# True parameter we're trying to enclose with our confidence interval
	np.random.seed(1)
	q90 = expon.ppf(.9)
	coverage = []
	for i in range(1000):
	if i % 10 == 0:
	print(i, end=" ")
	# random data from Expon(1) distribution
	data = expon.rvs(size=500)

	# compute 90-quantile confidence interval as above
	q90_est, q90_se, (q90_ci_lower, q90_ci_upper) = quantile_with_ci(data, q=.9)

	# Since we're computing a 95% confidence interval,
	# it should contain the truth about 95% of the time
	coverage.append(
	q90_ci_lower < q90 < q90_ci_upper
	)

	# How often does the 95% CI contain the truth?
	# [Should be around 95%, and tend to it as n -> \infty]
	print("Coverage:", np.mean(coverage))