Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# Defining lambda and n
lambda <- 220
n <- 30
# Calculating SEM
sem <- sqrt(lambda / n)
# Clear the workspace
rm(list = ls())
# Set seed to replicate random variable generation
set.seed(567)
# Generate the mean of each sample and store in a vector, and store each sample in a dataframe
mn_vector <- NULL
sample_frame <- data.frame(row.names = seq(from = 1, to = 30, by = 1))
for (i in 1 : 1000) {
s <- rpois(30, lambda = 220)
sample_frame <- cbind(sample_frame, s)
mn_vector <- c(mn_vector, mean(s))
}
# Name the columns in the sample dataframe
names(sample_frame) <- paste0("n", seq(from = 1, to = 1000, by = 1))
# Plotting histogram of the distribution of sample means
g1 <- ggplot(data=as.data.frame(mn_vector), aes(mn_vector)) +
geom_histogram(aes(y = ..density..), binwidth = 1,
col = barlines,
fill = barfill) +
xlab("Mean of each sample") +
ylab("Density") +
theme_bw() +
ggtitle("Distribution of Means of 1,000 Samples") +
theme(plot.title = element_text(lineheight=.8, face="bold")) +
geom_line(aes(y = ..density.., colour = "Empirical"), stat = "density") +
stat_function(fun = dnorm, aes(colour = "Normal"),
arg = list(mean = 220, sd = sd(mn_vector))) +
scale_colour_manual(name = "Density", values = c(line1, line2))
print(g1)
# Plotting a histogram with the +/- 1 and 2 standard error intervals.
sem1 <- data.frame(SEMs="+/- 1 SEM",
vals = c(mean(mn_vector) - sd(mn_vector), mean(mn_vector) + sd(mn_vector)))
sem2 <- data.frame(SEMs="+/- 2 SEMs",
vals = c(mean(mn_vector) - 2 * sd(mn_vector), mean(mn_vector) + 2 * sd(mn_vector)))
sems <- rbind(sem1, sem2)
g1 <- ggplot(data=as.data.frame(mn_vector), aes(mn_vector)) +
geom_histogram(aes(y = ..density..), binwidth = 1,
col = barlines,
fill = barfill) +
xlab("Mean of each sample") +
ylab("Density") +
theme_bw() +
ggtitle("Distribution of Means of 1,000 Samples") +
theme(plot.title = element_text(lineheight=.8, face="bold")) +
geom_vline(data=sems, aes(xintercept=vals, linetype = SEMs,
colour = SEMs), size = 1, show_guide = TRUE) +
scale_color_manual(values=c("+/- 1 SEM" = line1,
"+/- 2 SEMs" = line2))
print(g1)
# Load required packages
require(ggplot2); require(gridExtra)
# Set the colours for the graphs
barfill <- "#4271AE"
barlines <- "#1F3552"
line1 <- "black"
line2 <- "#FF3721"
# Plotting histogram of sample 1
mean1 <- data.frame(Means="Population mean", vals = 220)
mean2 <- data.frame(Means="Sample mean", vals = mean(sample_frame$n1))
means <- rbind(mean1, mean2)
g1 <- ggplot(data=sample_frame, aes(sample_frame$n1)) +
geom_histogram(aes(y = ..density..),
binwidth = 4, fill = barfill, colour = barlines) +
xlab("Daily page views") +
ylab("Density") +
theme_bw() +
ggtitle("Sample 1") +
theme(plot.title = element_text(lineheight=1.1, face="bold")) +
geom_vline(data=means, aes(xintercept=vals, linetype = Means,
colour = Means), size = 1, show_guide = TRUE) +
scale_color_manual(values=c("Population mean" = line1, "Sample mean" = line2))
# Plotting histogram of sample 2
mean1 <- data.frame(Means="Population mean", vals = 220)
mean2 <- data.frame(Means="Sample mean", vals = mean(sample_frame$n2))
means <- rbind(mean1, mean2)
g2 <- ggplot(data=sample_frame, aes(sample_frame$n2)) +
geom_histogram(aes(y = ..density..),
binwidth = 4, fill = barfill, colour = barlines) +
xlab("Daily page views") +
ylab("Density") +
theme_bw() +
ggtitle("Sample 2") +
theme(plot.title = element_text(lineheight=1.1, face="bold")) +
geom_vline(data=means, aes(xintercept=vals, linetype = Means,
colour = Means), size = 1, show_guide = TRUE) +
scale_color_manual(values=c("Population mean" = line1, "Sample mean" = line2))
# Printing histograms
grid.arrange(g1, g2, nrow = 1, ncol = 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment