Last active
September 27, 2017 05:38
-
-
Save t-redactyl/5ffef9200ea51cc81510 to your computer and use it in GitHub Desktop.
Code associated with blog post: http://t-redactyl.github.io/blog/2015/09/a-gentle-introduction-to-bootstrapping.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(ggplot2); require(gridExtra) | |
# Set the colours for the graphs | |
barfill <- "#4271AE" | |
barlines <- "#1F3552" | |
line1 <- "black" | |
line2 <- "#FF3721" | |
# Plotting histogram of sample of daily page views | |
g1 <- ggplot(data=as.data.frame(sample), aes(sample)) + | |
geom_histogram(aes(y = ..density..), binwidth = 6, | |
col = barlines, | |
fill = barfill) + | |
xlab("Number of page views per day") + | |
ylab("Density") + | |
theme_bw() + | |
ggtitle("Sample distribution of page views over 30 days") + | |
theme(plot.title = element_text(lineheight=.8, face="bold")) | |
print(g1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Plotting a histogram with the +/- 1 and 2 standard error intervals. | |
sem1 <- data.frame(SEMs="+/- 1 SEM", | |
vals = c(mean(mn_vector) - sd(mn_vector), mean(mn_vector) + sd(mn_vector))) | |
sem2 <- data.frame(SEMs="+/- 2 SEMs", | |
vals = c(mean(mn_vector) - 2 * sd(mn_vector), mean(mn_vector) + 2 * sd(mn_vector))) | |
sems <- rbind(sem1, sem2) | |
g1 <- ggplot(data=as.data.frame(mn_vector), aes(mn_vector)) + | |
geom_histogram(aes(y = ..density..), binwidth = 1, | |
col = barlines, | |
fill = barfill) + | |
xlab("Mean of each resample") + | |
ylab("Density") + | |
theme_bw() + | |
ggtitle("Distribution of Means of 1,000 Resamples") + | |
theme(plot.title = element_text(lineheight=.8, face="bold")) + | |
geom_vline(data=sems, aes(xintercept=vals, linetype = SEMs, | |
colour = SEMs), size = 1, show_guide = TRUE) + | |
scale_color_manual(values=c("+/- 1 SEM" = line1, | |
"+/- 2 SEMs" = line2)) | |
print(g1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Setting seed | |
set.seed(567) | |
# Generate the mean of each resample and store in a vector, and store each resample in a dataframe | |
mn_vector <- NULL | |
resample_frame <- data.frame(row.names = seq(from = 1, to = 30, by = 1)) | |
for (i in 1 : 1000) { | |
s <- sample(sample, 30, replace = TRUE) | |
resample_frame <- cbind(resample_frame, s) | |
mn_vector <- c(mn_vector, mean(s)) | |
} | |
# Name the columns in the resample dataframe | |
names(resample_frame) <- paste0("n", seq(from = 1, to = 1000, by = 1)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Setting seed and drawing non-Poisson sample | |
set.seed(567) | |
sample_np <- c(rpois(10, lambda = 220), runif(20, min = 180, max = 245)) | |
# Generate the mean of each resample and store in a vector | |
mn_vector_np <- NULL | |
for (i in 1 : 1000) { | |
s <- sample(sample_np, 30, replace = TRUE) | |
mn_vector_np <- c(mn_vector_np, mean(s)) | |
} | |
# Generate bootstrapped SEM | |
b_sem <- sd(mn_vector_np) | |
# Generate formula-derived SEM | |
f_sem <- sqrt(mean(sample_np) / 30) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Defining lambda and n | |
lambda <- mean(sample) | |
n <- 30 | |
# Calculating SEM | |
sem <- sqrt(lambda / n) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mean1 <- data.frame(Means="Population mean", vals = 220) | |
mean2 <- data.frame(Means="Sample mean", vals = mean(resample_frame$n1)) | |
means <- rbind(mean1, mean2) | |
g1 <- ggplot(data=resample_frame, aes(resample_frame$n1)) + | |
geom_histogram(aes(y = ..density..), | |
binwidth = 5, fill = barfill, colour = barlines) + | |
xlab("Daily page views") + | |
ylab("Density") + | |
theme_bw() + | |
ggtitle("Sample 1") + | |
theme(plot.title = element_text(lineheight=1.1, face="bold")) + | |
geom_vline(data=means, aes(xintercept=vals, linetype = Means, | |
colour = Means), size = 1, show_guide = TRUE) + | |
scale_color_manual(values=c("Population mean" = line1, "Sample mean" = line2)) | |
mean1 <- data.frame(Means="Population mean", vals = 220) | |
mean2 <- data.frame(Means="Sample mean", vals = mean(resample_frame$n2)) | |
means <- rbind(mean1, mean2) | |
g2 <- ggplot(data=resample_frame, aes(resample_frame$n2)) + | |
geom_histogram(aes(y = ..density..), | |
binwidth = 5, fill = barfill, colour = barlines) + | |
xlab("Daily page views") + | |
ylab("Density") + | |
theme_bw() + | |
ggtitle("Sample 2") + | |
theme(plot.title = element_text(lineheight=1.1, face="bold")) + | |
geom_vline(data=means, aes(xintercept=vals, linetype = Means, | |
colour = Means), size = 1, show_guide = TRUE) + | |
scale_color_manual(values=c("Population mean" = line1, "Sample mean" = line2)) | |
grid.arrange(g1, g2, nrow = 1, ncol = 2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Setting seed and generating a single sample | |
set.seed(567) | |
sample <- rpois(30, lambda = 220) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Plotting a histogram with the +/- 1 standard error intervals. | |
sem1 <- data.frame(SEMs="+/- 1 SEM (bootstrap)", | |
vals = c(mean(mn_vector_np) - b_sem, mean(mn_vector_np) + b_sem)) | |
sem2 <- data.frame(SEMs="+/- 1 SEM (formula)", | |
vals = c(mean(mn_vector_np) - f_sem, mean(mn_vector_np) + f_sem)) | |
sems <- rbind(sem1, sem2) | |
g1 <- ggplot(data=as.data.frame(mn_vector_np), aes(mn_vector_np)) + | |
geom_histogram(aes(y = ..density..), binwidth = 1, | |
col = barlines, | |
fill = barfill) + | |
xlab("Mean of each resample") + | |
ylab("Density") + | |
theme_bw() + | |
ggtitle("Distribution of Means of 1,000 Resamples") + | |
theme(plot.title = element_text(lineheight=.8, face="bold")) + | |
geom_vline(data=sems, aes(xintercept=vals, linetype = SEMs, | |
colour = SEMs), size = 1, show_guide = TRUE) + | |
scale_color_manual(values=c("+/- 1 SEM (bootstrap)" = line1, | |
"+/- 1 SEM (formula)" = line2)) | |
print(g1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment