t-redactyl/generate_95CI_around_t_value.R

## generate_95CI_around_t_value.R
# Generate the 95% confidence interval.
lci <- -1 * qt(c(.975), 78)
uci <- qt(c(.975), 78)

## plots_of_each_sample.R
# Load required packages
require(ggplot2); require(gridExtra)

# Set the colours for the graphs
barfill <- "#4271AE"
barlines <- "#1F3552"
line1 <- "black"
line2 <- "#FF3721"

# Plotting histogram of sample 1
g1 <- ggplot(data=as.data.frame(campaign.1), aes(campaign.1)) +
        geom_histogram(aes(y = ..density..),
                       binwidth = 25, fill = barfill, colour = barlines) +
        xlab("Amount spent per site visit ($)") +
        ylab("Density") +
        theme_bw() +
        ggtitle("Campaign 1") +
        theme(plot.title = element_text(lineheight=1.1, face="bold"))

# Plotting histogram of sample 2
g2 <- ggplot(data=as.data.frame(campaign.2), aes(campaign.2)) +
        geom_histogram(aes(y = ..density..),
                       binwidth = 20, fill = barfill, colour = barlines) +
        xlab("Amount spent per site visit ($)") +
        ylab("Density") +
        theme_bw() +
        ggtitle("Campaign 2") +
        theme(plot.title = element_text(lineheight=1.1, face="bold"))

# Printing histograms
grid.arrange(g1, g2, nrow = 1, ncol = 2)

## rejection_rejection_plots.R
require(ggplot2); require(gridExtra)

col1 <- "black"
col2 <- "#FF3721"

one.sided.rr <- qt(.95, 78)
line1 <- data.frame(Values="Critical values for 0.05 significance", vals = one.sided.rr)
line2 <- data.frame(Values="Mean", vals = 0)
lines <- rbind(line1, line2)

g1 <- ggplot(data.frame(x = c(-4, 4)), aes(x)) +
        stat_function(fun = dt, args = list(df = 28)) +
        xlab("Standardised difference in mean income") +
        ylab("Density") +
        theme_bw() +
        geom_vline(data=lines, aes(xintercept=vals, linetype = Values,
                            colour = Values), size = 1, show_guide = TRUE) +
        scale_color_manual(values=c("Critical values for 0.05 significance" = col1,
                                    "Mean" = col2))

two.sided.rr <- qt(c(.025, .975), 78)
line1 <- data.frame(Values="Critical values for 0.05 significance", vals = two.sided.rr)
line2 <- data.frame(Values="Mean", vals = 0)
lines <- rbind(line1, line2)

g2 <- ggplot(data.frame(x = c(-4, 4)), aes(x)) +
        stat_function(fun = dt, args = list(df = 28)) +
        xlab("Standardised difference in mean income") +
        ylab("Density") +
        theme_bw() +
        geom_vline(data=lines, aes(xintercept=vals, linetype = Values,
                            colour = Values), size = 1, show_guide = TRUE) +
        scale_color_manual(values=c("Critical values for 0.05 significance" = col1,
                                    "Mean" = col2))

grid.arrange(g1, g2, nrow = 2, ncol = 1)

## simulating_campaign_1_and_2_data.R
set.seed(567)
campaign.1 <- rt(40, 39) * 60 + 310
campaign.2 <- rt(40, 39) * 58 + 270

## t_test_plot.R
# Plot the t function with test statistic and relevant 95% confidence interval
require(ggplot2)

line1 <- data.frame(Values="Critical values for 0.05 significance", vals = c(lci, uci))
line2 <- data.frame(Values="T-value", vals = t.value)
lines <- rbind(line1, line2)

ggplot(data.frame(x = c(-4, 4)), aes(x)) +
    stat_function(fun = dt, args = list(df = 28)) +
    xlab("Standardised difference in mean income") +
    ylab("Density") +
    theme_bw() +
    geom_vline(data=lines, aes(xintercept=vals, linetype = Values,
                        colour = Values), size = 1, show_guide = TRUE) +
    scale_color_manual(values=c("Critical values for 0.05 significance" = col1,
                                "T-value" = col2))

## t_value_manual_calculation.R
# First calculate the pooled standard deviation (assuming equal variances, with equal sample sizes)
sp <- sqrt((sd(campaign.1)^2 + sd(campaign.2)^2)/2)

# Then calculate the standard error of the mean difference
se <- sp * (1 / length(campaign.1) + 1 / length(campaign.1))^.5

# The t-value is the difference in means divided by the standard error
t.value <- (diff.means - 0) / se
	# Generate the 95% confidence interval.
	lci <- -1 * qt(c(.975), 78)
	uci <- qt(c(.975), 78)
	# Load required packages
	require(ggplot2); require(gridExtra)

	# Set the colours for the graphs
	barfill <- "#4271AE"
	barlines <- "#1F3552"
	line1 <- "black"
	line2 <- "#FF3721"

	# Plotting histogram of sample 1
	g1 <- ggplot(data=as.data.frame(campaign.1), aes(campaign.1)) +
	geom_histogram(aes(y = ..density..),
	binwidth = 25, fill = barfill, colour = barlines) +
	xlab("Amount spent per site visit ($)") +
	ylab("Density") +
	theme_bw() +
	ggtitle("Campaign 1") +
	theme(plot.title = element_text(lineheight=1.1, face="bold"))

	# Plotting histogram of sample 2
	g2 <- ggplot(data=as.data.frame(campaign.2), aes(campaign.2)) +
	geom_histogram(aes(y = ..density..),
	binwidth = 20, fill = barfill, colour = barlines) +
	xlab("Amount spent per site visit ($)") +
	ylab("Density") +
	theme_bw() +
	ggtitle("Campaign 2") +
	theme(plot.title = element_text(lineheight=1.1, face="bold"))

	# Printing histograms
	grid.arrange(g1, g2, nrow = 1, ncol = 2)
	require(ggplot2); require(gridExtra)

	col1 <- "black"
	col2 <- "#FF3721"

	one.sided.rr <- qt(.95, 78)
	line1 <- data.frame(Values="Critical values for 0.05 significance", vals = one.sided.rr)
	line2 <- data.frame(Values="Mean", vals = 0)
	lines <- rbind(line1, line2)

	g1 <- ggplot(data.frame(x = c(-4, 4)), aes(x)) +
	stat_function(fun = dt, args = list(df = 28)) +
	xlab("Standardised difference in mean income") +
	ylab("Density") +
	theme_bw() +
	geom_vline(data=lines, aes(xintercept=vals, linetype = Values,
	colour = Values), size = 1, show_guide = TRUE) +
	scale_color_manual(values=c("Critical values for 0.05 significance" = col1,
	"Mean" = col2))

	two.sided.rr <- qt(c(.025, .975), 78)
	line1 <- data.frame(Values="Critical values for 0.05 significance", vals = two.sided.rr)
	line2 <- data.frame(Values="Mean", vals = 0)
	lines <- rbind(line1, line2)

	g2 <- ggplot(data.frame(x = c(-4, 4)), aes(x)) +
	stat_function(fun = dt, args = list(df = 28)) +
	xlab("Standardised difference in mean income") +
	ylab("Density") +
	theme_bw() +
	geom_vline(data=lines, aes(xintercept=vals, linetype = Values,
	colour = Values), size = 1, show_guide = TRUE) +
	scale_color_manual(values=c("Critical values for 0.05 significance" = col1,
	"Mean" = col2))

	grid.arrange(g1, g2, nrow = 2, ncol = 1)
	set.seed(567)
	campaign.1 <- rt(40, 39) * 60 + 310
	campaign.2 <- rt(40, 39) * 58 + 270
	# Plot the t function with test statistic and relevant 95% confidence interval
	require(ggplot2)

	line1 <- data.frame(Values="Critical values for 0.05 significance", vals = c(lci, uci))
	line2 <- data.frame(Values="T-value", vals = t.value)
	lines <- rbind(line1, line2)

	ggplot(data.frame(x = c(-4, 4)), aes(x)) +
	stat_function(fun = dt, args = list(df = 28)) +
	xlab("Standardised difference in mean income") +
	ylab("Density") +
	theme_bw() +
	geom_vline(data=lines, aes(xintercept=vals, linetype = Values,
	colour = Values), size = 1, show_guide = TRUE) +
	scale_color_manual(values=c("Critical values for 0.05 significance" = col1,
	"T-value" = col2))
	# First calculate the pooled standard deviation (assuming equal variances, with equal sample sizes)
	sp <- sqrt((sd(campaign.1)^2 + sd(campaign.2)^2)/2)

	# Then calculate the standard error of the mean difference
	se <- sp * (1 / length(campaign.1) + 1 / length(campaign.1))^.5

	# The t-value is the difference in means divided by the standard error
	t.value <- (diff.means - 0) / se