tvladeck/max-diff-power-analysis.R

## max-diff-power-analysis.R
library(foreach)
library(doParallel)
library(tidyverse)
library(magrittr)

registerDoParallel(40)

runs <- 100

N_test <- c(250, 300, 350, 400, 500, 600) # sample in each group
lift_test <- c(.2, .3, .4, .5)
# .2 ~ raising average prob from 20% to 22.5%
# .3 ~ to 24.4%
# .4 ~ to 26%
# .5 ~ to 27%

times_immigration_seen <- 3

power_grid <- foreach(N=N_test,
                      .combine = cbind) %:%
  foreach(lift = lift_test,
          .combine = c) %dopar% {
            result <- replicate(
              n = runs,
              expr = {

                # number of rows of data in each case
                nn <- N * times_immigration_seen

                #### BASE ####

                # this is the immigration variable in the non-treatment case
                immigration_base <- rnorm(nn)

                # all other coefficients summed together will be
                # sum of normally distributed variables
                # then we have to take the sumexp of them to fit into the softmax function
                other_covars_base <- 1:nn %>%
                  map(~ rnorm(4)) %>%
                  map(~ exp(.x)) %>%
                  map(~ reduce(.x, sum)) %>%
                  unlist

                prob_base <- exp(immigration_base)/(exp(immigration_base) + other_covars_base)

                #### TREATMENT ####

                immigration_treatment <- rnorm(nn) + lift * rnorm(nn, mean = 1)

                other_covars_treatment <- 1:nn %>%
                  map(~ rnorm(4)) %>%
                  map(~ exp(.x)) %>%
                  map(~ reduce(.x, sum)) %>%
                  unlist

                prob_treatment <- exp(immigration_treatment)/(exp(immigration_treatment) + other_covars_treatment)


                #### Diagnostics ####
                mean(prob_treatment)
                mean(prob_base)
                mean(prob_treatment) / mean(prob_base)

                #### concatenating the two cases ####

                prob <- c(prob_base, prob_treatment)

                # immigration_utils <- c(immigration_base, immigration_treatment)

                # this is our indicator variable
                xtest <- c(rep(0, nn), rep(1, nn))

                # this gets us our dichotomous outcome variables
                runis <- runif(nn*2,0,1)
                ytest <- ifelse(runis < prob,1,0)

                # build the model
                model <- glm(ytest ~ xtest, family = "binomial")

                # extract relevant coefficient and see its p-value
                summary(model)$coefficients[2,4] < .05
              }
            )

            power = sum(result) / runs
          }

colnames(power_grid) <- N_test
rownames(power_grid) <- c(
  "20 to 22%",
  "to 24.5%",
  "to 26%",
  "to 27%"
)
	library(foreach)
	library(doParallel)
	library(tidyverse)
	library(magrittr)

	registerDoParallel(40)

	runs <- 100

	N_test <- c(250, 300, 350, 400, 500, 600) # sample in each group
	lift_test <- c(.2, .3, .4, .5)
	# .2 ~ raising average prob from 20% to 22.5%
	# .3 ~ to 24.4%
	# .4 ~ to 26%
	# .5 ~ to 27%

	times_immigration_seen <- 3

	power_grid <- foreach(N=N_test,
	.combine = cbind) %:%
	foreach(lift = lift_test,
	.combine = c) %dopar% {
	result <- replicate(
	n = runs,
	expr = {

	# number of rows of data in each case
	nn <- N * times_immigration_seen

	#### BASE ####

	# this is the immigration variable in the non-treatment case
	immigration_base <- rnorm(nn)

	# all other coefficients summed together will be
	# sum of normally distributed variables
	# then we have to take the sumexp of them to fit into the softmax function
	other_covars_base <- 1:nn %>%
	map(~ rnorm(4)) %>%
	map(~ exp(.x)) %>%
	map(~ reduce(.x, sum)) %>%
	unlist

	prob_base <- exp(immigration_base)/(exp(immigration_base) + other_covars_base)

	#### TREATMENT ####

	immigration_treatment <- rnorm(nn) + lift * rnorm(nn, mean = 1)

	other_covars_treatment <- 1:nn %>%
	map(~ rnorm(4)) %>%
	map(~ exp(.x)) %>%
	map(~ reduce(.x, sum)) %>%
	unlist

	prob_treatment <- exp(immigration_treatment)/(exp(immigration_treatment) + other_covars_treatment)


	#### Diagnostics ####
	mean(prob_treatment)
	mean(prob_base)
	mean(prob_treatment) / mean(prob_base)

	#### concatenating the two cases ####

	prob <- c(prob_base, prob_treatment)

	# immigration_utils <- c(immigration_base, immigration_treatment)

	# this is our indicator variable
	xtest <- c(rep(0, nn), rep(1, nn))

	# this gets us our dichotomous outcome variables
	runis <- runif(nn*2,0,1)
	ytest <- ifelse(runis < prob,1,0)

	# build the model
	model <- glm(ytest ~ xtest, family = "binomial")

	# extract relevant coefficient and see its p-value
	summary(model)$coefficients[2,4] < .05
	}
	)

	power = sum(result) / runs
	}

	colnames(power_grid) <- N_test
	rownames(power_grid) <- c(
	"20 to 22%",
	"to 24.5%",
	"to 26%",
	"to 27%"
	)