Skip to content

Instantly share code, notes, and snippets.

@roualdes
Last active November 8, 2018 18:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save roualdes/1de1c9a4a26581ba18a7ae9b96019970 to your computer and use it in GitHub Desktop.
Save roualdes/1de1c9a4a26581ba18a7ae9b96019970 to your computer and use it in GitHub Desktop.
Bootstrap Confidence Interval with dplyr
library(boot)
library(dplyr)
## assumed symmetric bootstrapped confidence interval
# function: bootstrap estimated standard error
boot_sd <- function(x, fun=mean, R=1001) {
fun <- match.fun(fun)
bfoo <- function(data, idx) {
fun(data[idx])
}
b <- boot(x, bfoo, R=R)
sd(b$t)
}
# example:
# confidence interval for median of MPG grouped by number of cylinders
mtcars %>%
group_by(cyl) %>%
summarise(median = median(mpg),
n = n(),
meMPG = qt(0.975, n-1) * boot_sd(mpg, median, 1001),
lower_bound = median - meMPG,
upper_bound = median + meMPG) %>%
select(cyl, lower_bound, median, upper_bound, -meMPG, -n)
## bootstrapped percentile confidence interval
# standard bootstrap function required by boot
boot_fn <- function(d, i) {
mean(d[i])
}
# function: calculate percentiles and return dataframe
percentile <- function(b, probs=c(0.025, 0.5, 0.975),
nms=c("lower_bound", "median", "upper_bound")) {
b$t %>%
quantile(probs=probs) %>%
as.list %>%
setNames(nm=nms) %>%
data.frame
}
# example:
# confidence interval for median of MPG grouped by number of cylinders
mtcars %>%
group_by(cyl) %>%
summarise(bsamples = list(boot(mpg, boot_fn, R=1001))) %>%
mutate(bs = lapply(bsamples, percentile)) %>%
select(-bsamples) %>%
tidyr::unnest(bs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment