t-redactyl/perm_alt_sample_plots.R

## perm_alt_sample_plots.R
# Load required packages
require(ggplot2); require(gridExtra)

# Set the colours for the graphs
barfill <- "#4271AE"
barlines <- "#1F3552"
line1 <- "black"
line2 <- "#FF3721"

# Plotting histogram of sample 1
g1 <- ggplot(data=as.data.frame(data$amount.purchased[data$group == "Campaign 1"]),
             aes(data$amount.purchased[data$group == "Campaign 1"])) +
        geom_histogram(binwidth = 20, fill = barfill, colour = barlines) +
        xlab("Amount spent per site visit ($)") +
        ylab("Frequency") +
        theme_bw() +
        ggtitle("Campaign 1") +
        theme(plot.title = element_text(lineheight=1.1, face="bold"))

# Plotting histogram of sample 2
g2 <- ggplot(data=as.data.frame(data$amount.purchased[data$group == "Campaign 2"]),
             aes(data$amount.purchased[data$group == "Campaign 2"])) +
        geom_histogram(binwidth = 20, fill = barfill, colour = barlines) +
        xlab("Amount spent per site visit ($)") +
        ylab("Frequency") +
        theme_bw() +
        ggtitle("Campaign 2") +
        theme(plot.title = element_text(lineheight=1.1, face="bold"))

# Printing histograms
grid.arrange(g1, g2, nrow = 1, ncol = 2)

## permutation_function_and_example.R
# Create a function that randomly reassigns each observation to a different group and then takes the mean difference between these new groups.
one.test <- function(grouping, variable) {
                resampled.group <- sample(grouping)
                mean(variable[resampled.group == "Campaign 2"]) -
                mean(variable[resampled.group == "Campaign 1"])
            }

# Example of how resampling works:
set.seed(567)
data$resampled.group <- sample(data$group)
rs.mean <- mean(data$amount.purchased[data$resampled.group == "Campaign 2"]) -
           mean(data$amount.purchased[data$resampled.group == "Campaign 1"])
head(data[ , c("group", "resampled.group", "amount.purchased")])

## permutation_plot.R
mean <- data.frame(Means="Test statistic", vals = diff.means)

g1 <- ggplot(data=as.data.frame(perm.means), aes(perm.means)) +
        geom_histogram(binwidth = 10, fill = barfill, colour = barlines) +
        xlab("Permuted Means") +
        ylab("Frequency") +
        theme_bw() +
        ggtitle("Distribution of Permuted Means") +
        theme(plot.title = element_text(lineheight=1.1, face="bold")) +
        geom_vline(data=mean, aes(xintercept=vals, linetype = Means,
                             colour = Means), size = 1, show_guide = TRUE) +
        scale_color_manual(values=c("Test statistic" = line2))
g1

## simulating_data.R
data <- data.frame(group = rep(c("Campaign 1", "Campaign 2"), c(40, 40)),
                   amount.purchased = numeric(length = 80))

set.seed(567)
data$amount.purchased[data$group == "Campaign 1"] <- c(rep.int(0, 7),
                                                  rexp(33, rate = 1) * 100)
data$amount.purchased[data$group == "Campaign 2"] <- c(rep.int(0, 10),
                                                  rexp(30, rate = 2.5) * 100)
	# Load required packages
	require(ggplot2); require(gridExtra)

	# Set the colours for the graphs
	barfill <- "#4271AE"
	barlines <- "#1F3552"
	line1 <- "black"
	line2 <- "#FF3721"

	# Plotting histogram of sample 1
	g1 <- ggplot(data=as.data.frame(data$amount.purchased[data$group == "Campaign 1"]),
	aes(data$amount.purchased[data$group == "Campaign 1"])) +
	geom_histogram(binwidth = 20, fill = barfill, colour = barlines) +
	xlab("Amount spent per site visit ($)") +
	ylab("Frequency") +
	theme_bw() +
	ggtitle("Campaign 1") +
	theme(plot.title = element_text(lineheight=1.1, face="bold"))

	# Plotting histogram of sample 2
	g2 <- ggplot(data=as.data.frame(data$amount.purchased[data$group == "Campaign 2"]),
	aes(data$amount.purchased[data$group == "Campaign 2"])) +
	geom_histogram(binwidth = 20, fill = barfill, colour = barlines) +
	xlab("Amount spent per site visit ($)") +
	ylab("Frequency") +
	theme_bw() +
	ggtitle("Campaign 2") +
	theme(plot.title = element_text(lineheight=1.1, face="bold"))

	# Printing histograms
	grid.arrange(g1, g2, nrow = 1, ncol = 2)
	# Create a function that randomly reassigns each observation to a different group and then takes the mean difference between these new groups.
	one.test <- function(grouping, variable) {
	resampled.group <- sample(grouping)
	mean(variable[resampled.group == "Campaign 2"]) -
	mean(variable[resampled.group == "Campaign 1"])
	}

	# Example of how resampling works:
	set.seed(567)
	data$resampled.group <- sample(data$group)
	rs.mean <- mean(data$amount.purchased[data$resampled.group == "Campaign 2"]) -
	mean(data$amount.purchased[data$resampled.group == "Campaign 1"])
	head(data[ , c("group", "resampled.group", "amount.purchased")])
	mean <- data.frame(Means="Test statistic", vals = diff.means)

	g1 <- ggplot(data=as.data.frame(perm.means), aes(perm.means)) +
	geom_histogram(binwidth = 10, fill = barfill, colour = barlines) +
	xlab("Permuted Means") +
	ylab("Frequency") +
	theme_bw() +
	ggtitle("Distribution of Permuted Means") +
	theme(plot.title = element_text(lineheight=1.1, face="bold")) +
	geom_vline(data=mean, aes(xintercept=vals, linetype = Means,
	colour = Means), size = 1, show_guide = TRUE) +
	scale_color_manual(values=c("Test statistic" = line2))
	g1
	data <- data.frame(group = rep(c("Campaign 1", "Campaign 2"), c(40, 40)),
	amount.purchased = numeric(length = 80))

	set.seed(567)
	data$amount.purchased[data$group == "Campaign 1"] <- c(rep.int(0, 7),
	rexp(33, rate = 1) * 100)
	data$amount.purchased[data$group == "Campaign 2"] <- c(rep.int(0, 10),
	rexp(30, rate = 2.5) * 100)