eytan/gist:03cfaf99203b8b73e367

## gistfile1.txt
library(dplyr)
library(broom)
library(foreach)
library(ggplot2)

N <- 1e4

# generates simulated experiment data where we have two experiments,
# where E1=1 with probability p0, and E2=0 with probability p1
# interaction is the size of the treatment interaction, expressed in terms
# of delta1, and delta2 is the treatment effect of E2.
sim.interactions <- function(interaction, p0=0.5, p1=0.5, delta2=0.02) {
	delta1 <- 0.01
	sim.data <- data.frame(
	  E1=rbinom(N, 1, p0),
	  E2=rbinom(N,1, p1)
	) %>%
    mutate(
      y=0.1 + E1*delta1 + E2*delta2 + E1*E2*delta1*interaction +
      rnorm(N, 0, 0.05)
    )
    rbind(
      cbind(tidy(lm(y ~ E1, data=sim.data)), experiment='E1'),
      cbind(tidy(lm(y ~ E2, data=sim.data)), experiment='E2')
    ) %>%
    filter(term %in% c('E1','E2'))
}


# Here, we have two 50/50 A/B tests, E1 and E2. We keep the treatment
# effect of E1 constant at 0.1, and look at how running E1 and E2 at
# the same time affects our estimate of the ATE for E1 as we consider
# different effect sizes for E2, ranging from 0 to 10x that of E1.
d2 <- foreach(delta2=seq(0,0.1, 0.01), .combine=rbind) %do% {
  cbind(delta2=delta2, sim.interactions(0, 0.5, 0.5, delta2))
}

# You can see that this adds very little variance to our estimate of
# the effect of E1, even in cases when E2 is 10x stronger than E1.
qplot(delta2, estimate, data=d2, facets=.~term) + geom_pointrange(aes(ymin=estimate-1.96*std.error, ymax=estimate+1.96*std.error)) + geom_hline(yintercept=0.01)
	library(dplyr)
	library(broom)
	library(foreach)
	library(ggplot2)

	N <- 1e4

	# generates simulated experiment data where we have two experiments,
	# where E1=1 with probability p0, and E2=0 with probability p1
	# interaction is the size of the treatment interaction, expressed in terms
	# of delta1, and delta2 is the treatment effect of E2.
	sim.interactions <- function(interaction, p0=0.5, p1=0.5, delta2=0.02) {
	delta1 <- 0.01
	sim.data <- data.frame(
	E1=rbinom(N, 1, p0),
	E2=rbinom(N,1, p1)
	) %>%
	mutate(
	y=0.1 + E1delta1 + E2delta2 + E1E2delta1*interaction +
	rnorm(N, 0, 0.05)
	)
	rbind(
	cbind(tidy(lm(y ~ E1, data=sim.data)), experiment='E1'),
	cbind(tidy(lm(y ~ E2, data=sim.data)), experiment='E2')
	) %>%
	filter(term %in% c('E1','E2'))
	}


	# Here, we have two 50/50 A/B tests, E1 and E2. We keep the treatment
	# effect of E1 constant at 0.1, and look at how running E1 and E2 at
	# the same time affects our estimate of the ATE for E1 as we consider
	# different effect sizes for E2, ranging from 0 to 10x that of E1.
	d2 <- foreach(delta2=seq(0,0.1, 0.01), .combine=rbind) %do% {
	cbind(delta2=delta2, sim.interactions(0, 0.5, 0.5, delta2))
	}

	# You can see that this adds very little variance to our estimate of
	# the effect of E1, even in cases when E2 is 10x stronger than E1.
	qplot(delta2, estimate, data=d2, facets=.~term) + geom_pointrange(aes(ymin=estimate-1.96std.error, ymax=estimate+1.96std.error)) + geom_hline(yintercept=0.01)