Skip to content

Instantly share code, notes, and snippets.

@tvladeck
Created August 21, 2018 16:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tvladeck/cc06831a9183ad2892f9705ad7d61d2d to your computer and use it in GitHub Desktop.
Save tvladeck/cc06831a9183ad2892f9705ad7d61d2d to your computer and use it in GitHub Desktop.
Computing power of covariate in max diff experience
library(foreach)
library(doParallel)
library(tidyverse)
library(magrittr)
registerDoParallel(40)
runs <- 100
N_test <- c(250, 300, 350, 400, 500, 600) # sample in each group
lift_test <- c(.2, .3, .4, .5)
# .2 ~ raising average prob from 20% to 22.5%
# .3 ~ to 24.4%
# .4 ~ to 26%
# .5 ~ to 27%
times_immigration_seen <- 3
power_grid <- foreach(N=N_test,
.combine = cbind) %:%
foreach(lift = lift_test,
.combine = c) %dopar% {
result <- replicate(
n = runs,
expr = {
# number of rows of data in each case
nn <- N * times_immigration_seen
#### BASE ####
# this is the immigration variable in the non-treatment case
immigration_base <- rnorm(nn)
# all other coefficients summed together will be
# sum of normally distributed variables
# then we have to take the sumexp of them to fit into the softmax function
other_covars_base <- 1:nn %>%
map(~ rnorm(4)) %>%
map(~ exp(.x)) %>%
map(~ reduce(.x, sum)) %>%
unlist
prob_base <- exp(immigration_base)/(exp(immigration_base) + other_covars_base)
#### TREATMENT ####
immigration_treatment <- rnorm(nn) + lift * rnorm(nn, mean = 1)
other_covars_treatment <- 1:nn %>%
map(~ rnorm(4)) %>%
map(~ exp(.x)) %>%
map(~ reduce(.x, sum)) %>%
unlist
prob_treatment <- exp(immigration_treatment)/(exp(immigration_treatment) + other_covars_treatment)
#### Diagnostics ####
mean(prob_treatment)
mean(prob_base)
mean(prob_treatment) / mean(prob_base)
#### concatenating the two cases ####
prob <- c(prob_base, prob_treatment)
# immigration_utils <- c(immigration_base, immigration_treatment)
# this is our indicator variable
xtest <- c(rep(0, nn), rep(1, nn))
# this gets us our dichotomous outcome variables
runis <- runif(nn*2,0,1)
ytest <- ifelse(runis < prob,1,0)
# build the model
model <- glm(ytest ~ xtest, family = "binomial")
# extract relevant coefficient and see its p-value
summary(model)$coefficients[2,4] < .05
}
)
power = sum(result) / runs
}
colnames(power_grid) <- N_test
rownames(power_grid) <- c(
"20 to 22%",
"to 24.5%",
"to 26%",
"to 27%"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment