Skip to content

Instantly share code, notes, and snippets.

@ramnathv
Last active February 6, 2018 19:13
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ramnathv/baddb6a7a53da30a71a1 to your computer and use it in GitHub Desktop.
Save ramnathv/baddb6a7a53da30a71a1 to your computer and use it in GitHub Desktop.
A/B Testing
# fix required for R versions earlier than 3.1.2
if (R.Version()$minor < "1.2") {
source("ggplot_fix.R")
}
## Hypothesis Testing and A/B Tests
n <- 10^4
p1 <- 0.05
p2 <- p1 + 0.005
alpha <- 0.05
sd <- sqrt(p1*(1 - p1)/n)
x <- p1 + sd*seq(-6, 6, 0.01)
xa = p1 + sd*qnorm(1 - alpha, lower.tail = TRUE)
dat1 <- data.frame(
x = x,
y1 = dnorm(x, p1, sd),
y2 = dnorm(x, p2, sd)
)
plot1 = ggplot(dat1, aes(x = x)) +
theme(legend.position = c(0.12, 0.88), legend.title = element_blank()) +
geom_line(aes(y = y1, colour = 'H0: No Effect'), size = 1.2) +
geom_area(aes(y = y1, x = ifelse(x > xa, x, NA)), fill = 'darkred') +
geom_line(aes(y = y2, colour = 'H1: Positive Effect'), size = 1.2) +
geom_area(aes(y = y2, x = ifelse(x > xa, x, NA)), fill = 'blue', alpha = 0.3) +
xlab("") + ylab("") +
scale_colour_manual(values = c("steelblue", "darkred")) +
geom_vline(xintercept = c(p1, p2), linetype = 'dotted')
# Power of Test
power.prop.test(
n = 10^4,
p1 = p1,
p2 = p2,
sig.level = alpha
)
# Sample Size Calculations
power.prop.test(
p1 = p1,
p2 = p2,
sig.level = alpha,
power = 0.80
)
# Sample Size Requirements at Different Levels of Detectable Difference ----
# Reference: http://multithreaded.stitchfix.com/blog/2015/05/26/significant-sample/
p1 <- 0.05 # base conversion rate
alpha <- 0.05 # significance level
beta <- 0.80 # power
de <- seq(from = 0.005, to = 0.03, by = 0.0001)
n <- sapply(de, function(de){
power.prop.test(sig.level = alpha, p1 = p1, p2 = p1 - de, power = beta)$n
})
df <- data.frame(de = de, n = n)
ggplot(df, aes(x = de, y = n)) +
geom_line() +
xlab("Detectable Effect") +
ylab("Sample Size (per variation)") +
geom_point(data = df[ceiling(df$n/10)*10 == 5020,], color = 'steelblue', size = 5)
## Live: Build an Interactive Sample Size Calculator
library(shiny)
ui <- pageWithSidebar(
headerPanel('Sample Size Calculator'),
sidebarPanel(
sliderInput("p1", "Baseline Conversion Rate", min = 0, max = 1, value = 0.5),
sliderInput("de", "Minimum Detected Effect", min = 0, max = 1, value = 0.005),
sliderInput("alpha", "Significance Level", min = 0, max = 1, value = 0.05),
sliderInput("power", "Desired Power", min = 0, max = 1, value = 0.80)
),
mainPanel(
verbatimTextOutput('sample_size')
)
)
server <- function(input, output, session){
output$sample_size <- renderPrint({
power.prop.test(
p1 = input$p1,
p2 = input$p1 + input$de,
sig.level = input$alpha,
power = input$power
)
})
}
shinyApp(ui = ui, server = server)
# Use this fix only for R versions < 3.1.2
ggproto_ <- function(`_class` = NULL, `_inherit` = NULL, ...) {
# new fun
e <- new.env(parent = emptyenv())
members <- list(...)
if (length(members) != sum(nzchar(names(members)))) {
stop("All members of a ggproto object must be named.")
}
# R <3.1.2 will error when list2env() is given an empty list, so we need to
# check length. https://github.com/hadley/ggplot2/issues/1444
if (length(members) > 0) {
list2env(members, envir = e)
}
if (!is.null(`_inherit`)) {
if (!is.ggproto(`_inherit`)) {
stop("`_inherit` must be a ggproto object.")
}
e$super <- `_inherit`
class(e) <- c(`_class`, class(`_inherit`))
} else {
class(e) <- c(`_class`, "ggproto")
}
e
}
library(ggplot2)
unlockBinding("ggproto", env = asNamespace("ggplot2"))
assign("ggproto", ggproto_, envir = asNamespace("ggplot2"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment