Last active
October 22, 2015 01:43
-
-
Save t-redactyl/3d993638a7d395b84958 to your computer and use it in GitHub Desktop.
Code associated with blog post: http://t-redactyl.github.io/blog/2015/10/two-group-hypothesis-testing-permutation-tests.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load required packages | |
require(ggplot2); require(gridExtra) | |
# Set the colours for the graphs | |
barfill <- "#4271AE" | |
barlines <- "#1F3552" | |
line1 <- "black" | |
line2 <- "#FF3721" | |
# Plotting histogram of sample 1 | |
g1 <- ggplot(data=as.data.frame(data$amount.purchased[data$group == "Campaign 1"]), | |
aes(data$amount.purchased[data$group == "Campaign 1"])) + | |
geom_histogram(binwidth = 20, fill = barfill, colour = barlines) + | |
xlab("Amount spent per site visit ($)") + | |
ylab("Frequency") + | |
theme_bw() + | |
ggtitle("Campaign 1") + | |
theme(plot.title = element_text(lineheight=1.1, face="bold")) | |
# Plotting histogram of sample 2 | |
g2 <- ggplot(data=as.data.frame(data$amount.purchased[data$group == "Campaign 2"]), | |
aes(data$amount.purchased[data$group == "Campaign 2"])) + | |
geom_histogram(binwidth = 20, fill = barfill, colour = barlines) + | |
xlab("Amount spent per site visit ($)") + | |
ylab("Frequency") + | |
theme_bw() + | |
ggtitle("Campaign 2") + | |
theme(plot.title = element_text(lineheight=1.1, face="bold")) | |
# Printing histograms | |
grid.arrange(g1, g2, nrow = 1, ncol = 2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create a function that randomly reassigns each observation to a different group and then takes the mean difference between these new groups. | |
one.test <- function(grouping, variable) { | |
resampled.group <- sample(grouping) | |
mean(variable[resampled.group == "Campaign 2"]) - | |
mean(variable[resampled.group == "Campaign 1"]) | |
} | |
# Example of how resampling works: | |
set.seed(567) | |
data$resampled.group <- sample(data$group) | |
rs.mean <- mean(data$amount.purchased[data$resampled.group == "Campaign 2"]) - | |
mean(data$amount.purchased[data$resampled.group == "Campaign 1"]) | |
head(data[ , c("group", "resampled.group", "amount.purchased")]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mean <- data.frame(Means="Test statistic", vals = diff.means) | |
g1 <- ggplot(data=as.data.frame(perm.means), aes(perm.means)) + | |
geom_histogram(binwidth = 10, fill = barfill, colour = barlines) + | |
xlab("Permuted Means") + | |
ylab("Frequency") + | |
theme_bw() + | |
ggtitle("Distribution of Permuted Means") + | |
theme(plot.title = element_text(lineheight=1.1, face="bold")) + | |
geom_vline(data=mean, aes(xintercept=vals, linetype = Means, | |
colour = Means), size = 1, show_guide = TRUE) + | |
scale_color_manual(values=c("Test statistic" = line2)) | |
g1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data <- data.frame(group = rep(c("Campaign 1", "Campaign 2"), c(40, 40)), | |
amount.purchased = numeric(length = 80)) | |
set.seed(567) | |
data$amount.purchased[data$group == "Campaign 1"] <- c(rep.int(0, 7), | |
rexp(33, rate = 1) * 100) | |
data$amount.purchased[data$group == "Campaign 2"] <- c(rep.int(0, 10), | |
rexp(30, rate = 2.5) * 100) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment