Skip to content

Instantly share code, notes, and snippets.

@johnmyleswhite
Last active September 8, 2016 17:38
Show Gist options
  • Save johnmyleswhite/dbbf242c6a017dd217036abac3ddef91 to your computer and use it in GitHub Desktop.
Save johnmyleswhite/dbbf242c6a017dd217036abac3ddef91 to your computer and use it in GitHub Desktop.
import Distributions: LogNormal, Normal, cdf, cquantile
function coverage(cis, truth)
s = 0
n = length(cis)
for ci in cis
s += ci[1] <= truth <= ci[2]
end
return s / n
end
function normal_ci(x)
m = mean(x)
sem = std(x) / sqrt(length(x))
c_l = quantile(Normal(0, 1), 0.05 / 2)
c_u = cquantile(Normal(0, 1), 0.05 / 2)
return m + c_l * sem, m + c_u * sem
end
function clt_bootstrap_ci(x, n_replicates)
replicates = Array(Float64, n_replicates)
x_resampled = similar(x)
n = length(x)
for replicate in 1:n_replicates
for i in 1:n
j = rand(1:n)
x_resampled[i] = x[j]
end
replicates[replicate] = mean(x_resampled)
end
m, s = mean(replicates), std(replicates)
l = quantile(Normal(m, s), 0.05 / 2)
u = cquantile(Normal(m, s), 0.05 / 2)
return l, u
end
function percentile_bootstrap_ci(x, n_replicates)
replicates = Array(Float64, n_replicates)
x_resampled = similar(x)
n = length(x)
for replicate in 1:n_replicates
for i in 1:n
j = rand(1:n)
x_resampled[i] = x[j]
end
replicates[replicate] = mean(x_resampled)
end
l = quantile(replicates, 0.05 / 2)
u = quantile(replicates, 1 - 0.05 / 2)
return l, u
end
function simulate()
src = LogNormal(0, 1)
ns = 2:30
n_sims = 100_000
means = Array(Float64, n_sims)
normal_cis = Array(Tuple{Float64, Float64}, n_sims)
bootstrap_1000_cis = Array(Tuple{Float64, Float64}, n_sims)
bootstrap_10000_cis = Array(Tuple{Float64, Float64}, n_sims)
bootstrap_clt_cis = Array(Tuple{Float64, Float64}, n_sims)
io = open("coverage.tsv", "w")
@printf(io, "n\tcoverage\tmethod\n")
for n in ns
x = rand(src, n)
for sim in 1:n_sims
rand!(src, x)
means[sim] = mean(x)
normal_cis[sim] = normal_ci(x)
bootstrap_1000_cis[sim] = percentile_bootstrap_ci(x, 1_000)
bootstrap_10000_cis[sim] = percentile_bootstrap_ci(x, 10_000)
bootstrap_clt_cis[sim] = clt_bootstrap_ci(x, 100)
end
empirical_mean = mean(means)
true_mean = mean(src)
empirical_sem = std(means)
true_sem = sqrt(var(src) / n)
@printf(
io,
"%d\t%s\t%s\n",
n,
coverage(normal_cis, true_mean),
"CLT Normal",
)
@printf(
io,
"%d\t%s\t%s\n",
n,
coverage(bootstrap_1000_cis, true_mean),
"Percentile Bootstrap w/ 1,000 Replicates",
)
@printf(
io,
"%d\t%s\t%s\n",
n,
coverage(bootstrap_10000_cis, true_mean),
"Percentile Bootstrap w/ 10,000 Replicates",
)
@printf(
io,
"%d\t%s\t%s\n",
n,
coverage(bootstrap_clt_cis, true_mean),
"CLT Bootstrap w/ 100 Replicates",
)
println(n)
end
close(io)
end
simulate()
library("ggplot2")
library("reshape2")
coverage <- read.csv("coverage.tsv", sep = "\t")
ggplot(coverage, aes(x = n, y = coverage, color = method)) +
geom_hline(yintercept = 0.95, alpha = 0.25) +
geom_line() +
xlab("N") +
ylab("Empirical Coverage Probability") +
ggtitle("Anti-Conservative Coverage Probabilities for Small N") +
ylim(0.4, 1) +
theme_bw()
ggsave("coverage.png", height = 8, width = 12)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment