Created
December 8, 2016 17:30
-
-
Save johnmyleswhite/429dfb1b618e6f45f83fc42ef5b0fd94 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Ground rules: | |
# | |
# (1) Must collect at least 10 observations | |
# (2) After that, keep collecting observations until | |
# we satisfy condition: s / sqrt(n) < tolerance | |
# (3) Then output a tuple of s and n. | |
using Distributions | |
function run_simulation() | |
# Run enough simulations to have a reasonable sense of joint P(s, n) | |
n_sims = 100_000 | |
# Per-simulation estimates of standard deviation. | |
ss = Array(Float64, n_sims) | |
# Per-simulation estimates of sample size required. | |
ns = Array(Int, n_sims) | |
# Each individual observation is a draw from this distribution. | |
dgp = Normal(10, 1) | |
# We stop when the SEM is smaller than this tolerance. | |
tolerance = 0.10 | |
# Or we stop when we collect our full budget of samples. | |
max_n = 1_000 | |
# Monte Carlo loop | |
for sim in 1:n_sims | |
# Collect initial 10 observations. | |
x = rand(dgp, 10) | |
# Estimate initial parameters. | |
s = std(x) | |
n = 10 | |
sem = s / sqrt(n) | |
# Keep going until we hit tolerance or run out of budget. | |
while sem > tolerance && n < max_n | |
# Add one more observation. | |
push!(x, rand(dgp)) | |
# Update parameter estimates. | |
s = std(x) | |
n += 1 | |
sem = s / sqrt(n) | |
end | |
# Store final outcome as a single draw from joint distribution. | |
ss[sim] = s | |
ns[sim] = n | |
end | |
return ss, ns | |
end | |
ss, ns = run_simulation() | |
# Write data to disk for later analysis in R. | |
writecsv("sequential.csv", hcat(ss, ns)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("ggplot2") | |
library("dplyr") | |
tuples <- read.csv("sequential.csv", header = FALSE) | |
names(tuples) <- c("s", "n") | |
ggplot(tuples, aes(x = n, y = s)) + | |
geom_point() | |
biases <- tuples %>% | |
group_by(n) %>% | |
summarize(bias = mean(s - 1)) | |
ggplot(biases, aes(x = n, y = bias)) + | |
geom_line() | |
tuples %>% | |
summarize( | |
bias = mean(s - 1) | |
) | |
t.test(tuples$s - 1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment