eddjberry/prop_test_power_curves.R

## prop_test_power_curves.R
#========================================================#
# Setup
#========================================================#

library(dplyr)
library(ggplot2)
library(here)
library(pwr)
library(scales)
library(stringr)

#========================================================#
# Functions
#========================================================#

#============================
# power_prop_test()
#============================
# a simplification of one of
# the function from pwr
# for reuse later
power_prop_test <- function(h, n_group) {
  pwr.2p.test(
    h = h,
    n = n_group,
    sig.level = 0.01,
    alternative = 'two.sided'
  )$power
}

#============================
# power_prop_test_grid()
#============================
# function to calculate power for
# prop.test from:
#   n_group: sample size in each group
#   prop_control: proportion in the control group
#   prop_effect: effect in the treatment group
power_prop_test_grid <- function(n_group, prop_control, prop_effect) {
  # get all possible combinations of
  # n_group, prop_control, and prop_effect
  expand.grid(list(
    n_group = n_group,
    prop_control = prop_control,
    prop_effect = prop_effect
  )) %>%
    mutate(
      prop_treatment = prop_control + prop_effect,
      # standardised effect size (see pwr docs)
      h = pwr::ES.h(prop_control, prop_treatment),
      # calculate the power
      power = power_prop_test(h, n_group)
    )
}

#========================================================#
# Power curve
#========================================================#

#============================
# power curve data
#============================

# proportions to try for the control group
prop_control <- seq(0.05, 0.5, 0.025)

# effect sizes to try
prop_effect <- seq(0, 0.15, by = 0.001)

# samples sizes for each group to try
n_group <- c(500, 1000, 2500, 5000)

# get the power for all combinations
df_prop_test_power <- power_prop_test_grid(n_group, prop_control, prop_effect)

#============================
# power curve plot
#============================

# create a plot theme
theme_datasci <-
  theme_minimal(
    base_size = 12,
    base_family = "mono") +
  theme(
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_blank(),
      legend.title = element_text(size = 8),
      legend.text = element_text(size = 7))

# set the theme
theme_set(theme_datasci)

# plot
ggplot(df_prop_test_power) +
  aes(
    x = prop_effect,
    y = power,
    color = prop_control,
    group = prop_control) +
  geom_line(show.legend = T) +
  # coordinates and scales ------------
  coord_cartesian(xlim = c(0, 0.15)) +
  scale_x_continuous(
    breaks = seq(0, 0.15, 0.025),
    labels = percent_format()) +
  scale_color_viridis_c(
    option = 'plasma',
    end = 0.9,
    labels = percent_format()) +
  # facet -----------------------------
  facet_wrap(
    ~ n_group,
    ncol = 2,
    # add 'N per group:' at the start of the facet labels
    labeller = as_labeller(
      function(x) { str_c('N per group: ', x)})) +
  # labels & theme --------------------
  theme(legend.position = c(0.9, 0.2)) +
  labs(
    x = 'Difference between treatment and control groups',
    y = 'Probability of detecting an effect',
    color = 'Baseline % \nin control group',
    title = 'Power curves for a prop.test',
    subtitle = 'By sample size, base-rate and treatment effect'
  )

# save
ggsave(here('figures', 'power_curve.png'), width = 8, height = 6, dpi = 'retina')
	#========================================================#
	# Setup
	#========================================================#

	library(dplyr)
	library(ggplot2)
	library(here)
	library(pwr)
	library(scales)
	library(stringr)

	#========================================================#
	# Functions
	#========================================================#

	#============================
	# power_prop_test()
	#============================
	# a simplification of one of
	# the function from pwr
	# for reuse later
	power_prop_test <- function(h, n_group) {
	pwr.2p.test(
	h = h,
	n = n_group,
	sig.level = 0.01,
	alternative = 'two.sided'
	)$power
	}

	#============================
	# power_prop_test_grid()
	#============================
	# function to calculate power for
	# prop.test from:
	# n_group: sample size in each group
	# prop_control: proportion in the control group
	# prop_effect: effect in the treatment group
	power_prop_test_grid <- function(n_group, prop_control, prop_effect) {
	# get all possible combinations of
	# n_group, prop_control, and prop_effect
	expand.grid(list(
	n_group = n_group,
	prop_control = prop_control,
	prop_effect = prop_effect
	)) %>%
	mutate(
	prop_treatment = prop_control + prop_effect,
	# standardised effect size (see pwr docs)
	h = pwr::ES.h(prop_control, prop_treatment),
	# calculate the power
	power = power_prop_test(h, n_group)
	)
	}

	#========================================================#
	# Power curve
	#========================================================#

	#============================
	# power curve data
	#============================

	# proportions to try for the control group
	prop_control <- seq(0.05, 0.5, 0.025)

	# effect sizes to try
	prop_effect <- seq(0, 0.15, by = 0.001)

	# samples sizes for each group to try
	n_group <- c(500, 1000, 2500, 5000)

	# get the power for all combinations
	df_prop_test_power <- power_prop_test_grid(n_group, prop_control, prop_effect)

	#============================
	# power curve plot
	#============================

	# create a plot theme
	theme_datasci <-
	theme_minimal(
	base_size = 12,
	base_family = "mono") +
	theme(
	panel.grid.minor = element_blank(),
	panel.grid.major.x = element_blank(),
	legend.title = element_text(size = 8),
	legend.text = element_text(size = 7))

	# set the theme
	theme_set(theme_datasci)

	# plot
	ggplot(df_prop_test_power) +
	aes(
	x = prop_effect,
	y = power,
	color = prop_control,
	group = prop_control) +
	geom_line(show.legend = T) +
	# coordinates and scales ------------
	coord_cartesian(xlim = c(0, 0.15)) +
	scale_x_continuous(
	breaks = seq(0, 0.15, 0.025),
	labels = percent_format()) +
	scale_color_viridis_c(
	option = 'plasma',
	end = 0.9,
	labels = percent_format()) +
	# facet -----------------------------
	facet_wrap(
	~ n_group,
	ncol = 2,
	# add 'N per group:' at the start of the facet labels
	labeller = as_labeller(
	function(x) { str_c('N per group: ', x)})) +
	# labels & theme --------------------
	theme(legend.position = c(0.9, 0.2)) +
	labs(
	x = 'Difference between treatment and control groups',
	y = 'Probability of detecting an effect',
	color = 'Baseline % \nin control group',
	title = 'Power curves for a prop.test',
	subtitle = 'By sample size, base-rate and treatment effect'
	)

	# save
	ggsave(here('figures', 'power_curve.png'), width = 8, height = 6, dpi = 'retina')