lawsofthought/power_analysis.R

## power_analysis.R
library(dplyr)
library(ggplot2)
library(tidyr)
library(pwr)

sample_sizes <- seq(5, 100, by=5)
effect_sizes <- c(0.1, 0.3, 0.5)

sample_one <- function(sequence){
  sample(size=1, sequence)
}

# Function to simulate a t-test.
# A sample size is chosen randomly
# An effect size is chosen randomly
sim_t_test <- function() {

  sample_size <- sample_one(sample_sizes)
  effect_size <- sample_one(effect_sizes)

  # Note that because true mean of x is 0
  # and sd is 1, then
  # effect_size = abs(true_mean_y - true_mean_x)/true_sd
  # And so effect size is the true effect size
  x <- rnorm(n=sample_size, mean=0, sd=1)
  y <- rnorm(n=sample_size, mean=effect_size, sd=1)

  model <- t.test(x, y, var.equal = T)

  c(sample_size,
    effect_size,
    model$p.value,
    diff(model$conf.int) # CI interval width
    )

}

# Simulate the t-test repeatedly
# More repetitions = more time = more precision
simulations <- replicate(1e5,
                         sim_t_test())

Df <- as.data.frame(t(simulations))

names(Df) <- c('sample_size', 'effect_size', 'p_value', 'interval')

# Plot power curves
Df %>% mutate(alpha_0.05 = p_value <= 0.05,
              alpha_0.01 = p_value <= 0.01,
              alpha_0.001 = p_value <= 0.001) %>%
  mutate(effect_size = factor(effect_size)) %>%
  group_by(sample_size, effect_size) %>%
  summarise(alpha_0.05 = mean(alpha_0.05),
            alpha_0.01 = mean(alpha_0.01),
            alpha_0.001 = mean(alpha_0.001)) %>%
  gather(alpha, power, alpha_0.05:alpha_0.001) %>%
  ggplot(mapping = aes(x = sample_size,
                       y = power,
                       group = effect_size,
                       col = effect_size)) +
  geom_point() +
  geom_line() +
  geom_hline(aes(yintercept=0.8), colour="red", linetype="dashed") +
  facet_wrap(~ alpha, nrow = 3)

# Use this for plotting the CI interval
mutate(Df, effect_size = factor(effect_size)) %>%
  group_by(sample_size, effect_size) %>%
  summarise(interval = mean(interval)) %>%
  ggplot(mapping=aes(x = sample_size, y = interval, group=effect_size, col=effect_size)) +
  geom_point() +
  geom_line()

# Here's pwr's t-test power calculator.
pwr.t2n.test(n1 = 50, n2= 50, d = 0.3, sig.level = 0.05)

## power_curve.R
library(pwr)

sample.size <- 5:75


power.1 <- pwr.t.test(d=1, n=sample.size, type='two.sample', alternative='greater')$power
power.2 <- pwr.t.test(d=.667, n=sample.size , type='two.sample', alternative='greater')$power

par(mar=c(4,4,.5,.5))
plot(sample.size, power.1, ylim=c(0.035,1), pch=NA, ylab='Statistical power', xlab ='Sample size per group', cex=.8)
grid()
points(sample.size, power.1, pch=1, cex =.8)
points(sample.size, power.2, pch=2, cex =.8)
lines(sample.size, power.1, lty=3, cex =.8)
lines(sample.size, power.2, lty=3, cex =.8)

legend(35, .2, legend=c(expression(paste(delta, ' = 1.000')),expression(paste(delta, ' = 0.667'))), lty=3, pch=c(1,2), bty='n', cex=.9)
	library(dplyr)
	library(ggplot2)
	library(tidyr)
	library(pwr)

	sample_sizes <- seq(5, 100, by=5)
	effect_sizes <- c(0.1, 0.3, 0.5)

	sample_one <- function(sequence){
	sample(size=1, sequence)
	}

	# Function to simulate a t-test.
	# A sample size is chosen randomly
	# An effect size is chosen randomly
	sim_t_test <- function() {

	sample_size <- sample_one(sample_sizes)
	effect_size <- sample_one(effect_sizes)

	# Note that because true mean of x is 0
	# and sd is 1, then
	# effect_size = abs(true_mean_y - true_mean_x)/true_sd
	# And so effect size is the true effect size
	x <- rnorm(n=sample_size, mean=0, sd=1)
	y <- rnorm(n=sample_size, mean=effect_size, sd=1)

	model <- t.test(x, y, var.equal = T)

	c(sample_size,
	effect_size,
	model$p.value,
	diff(model$conf.int) # CI interval width
	)

	}

	# Simulate the t-test repeatedly
	# More repetitions = more time = more precision
	simulations <- replicate(1e5,
	sim_t_test())

	Df <- as.data.frame(t(simulations))

	names(Df) <- c('sample_size', 'effect_size', 'p_value', 'interval')

	# Plot power curves
	Df %>% mutate(alpha_0.05 = p_value <= 0.05,
	alpha_0.01 = p_value <= 0.01,
	alpha_0.001 = p_value <= 0.001) %>%
	mutate(effect_size = factor(effect_size)) %>%
	group_by(sample_size, effect_size) %>%
	summarise(alpha_0.05 = mean(alpha_0.05),
	alpha_0.01 = mean(alpha_0.01),
	alpha_0.001 = mean(alpha_0.001)) %>%
	gather(alpha, power, alpha_0.05:alpha_0.001) %>%
	ggplot(mapping = aes(x = sample_size,
	y = power,
	group = effect_size,
	col = effect_size)) +
	geom_point() +
	geom_line() +
	geom_hline(aes(yintercept=0.8), colour="red", linetype="dashed") +
	facet_wrap(~ alpha, nrow = 3)

	# Use this for plotting the CI interval
	mutate(Df, effect_size = factor(effect_size)) %>%
	group_by(sample_size, effect_size) %>%
	summarise(interval = mean(interval)) %>%
	ggplot(mapping=aes(x = sample_size, y = interval, group=effect_size, col=effect_size)) +
	geom_point() +
	geom_line()

	# Here's pwr's t-test power calculator.
	pwr.t2n.test(n1 = 50, n2= 50, d = 0.3, sig.level = 0.05)
	library(pwr)

	sample.size <- 5:75


	power.1 <- pwr.t.test(d=1, n=sample.size, type='two.sample', alternative='greater')$power
	power.2 <- pwr.t.test(d=.667, n=sample.size , type='two.sample', alternative='greater')$power

	par(mar=c(4,4,.5,.5))
	plot(sample.size, power.1, ylim=c(0.035,1), pch=NA, ylab='Statistical power', xlab ='Sample size per group', cex=.8)
	grid()
	points(sample.size, power.1, pch=1, cex =.8)
	points(sample.size, power.2, pch=2, cex =.8)
	lines(sample.size, power.1, lty=3, cex =.8)
	lines(sample.size, power.2, lty=3, cex =.8)

	legend(35, .2, legend=c(expression(paste(delta, ' = 1.000')),expression(paste(delta, ' = 0.667'))), lty=3, pch=c(1,2), bty='n', cex=.9)