Skip to content

Instantly share code, notes, and snippets.

@macartan
Created September 15, 2018 19:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save macartan/61334630d1527babc4e7731c10bc70fd to your computer and use it in GitHub Desktop.
Save macartan/61334630d1527babc4e7731c10bc70fd to your computer and use it in GitHub Desktop.
library(DesignLibrary)
library(knitr)
# We are interested in a 5 arm trial with clusters. The 5 arm trial admits 5*4/2 comparisons of arms (10!). Plus one might be interested in various estimands that group arms together in different ways.
# To get going lets look at standard code for a 5 arm trial from the multiarm designer:
# get_design_code(multi_arm_designer(m_arms = 5))
# Now lets base a design off that, but add in clusters. Note that there is an individual level shock and a cluster shock: the ICC is determined by the relative size of these.
# These are arguments that you might want to modify to tailor your design:
N <- 30
outcome_means <- c(0, 0, 0, 1, 1)
sd_i <- 1
sd_cluster <- 0
outcome_sds <- c(0, 0, 0, 0, 0)
N_clusters <- 20
N_i_in_clusters <- 10
# Design
population <- declare_population(
clusters = add_level(N = N_clusters, u_c = rnorm(N)*sd_cluster),
individuals = add_level(N = N_i_in_clusters,
u_1 = rnorm(N, 0, outcome_sds[1L]),
u_2 = rnorm(N, 0, outcome_sds[2L]), u_3 = rnorm(N, 0, outcome_sds[3L]),
u_4 = rnorm(N, 0, outcome_sds[4L]), u_5 = rnorm(N, 0, outcome_sds[5L]),
u = rnorm(N) * sd_i))
potential_outcomes <- declare_potential_outcomes(formula = Y ~ (outcome_means[1] +
u_1) * (Z == "1") + (outcome_means[2] + u_2) * (Z == "2") +
(outcome_means[3] + u_3) * (Z == "3") + (outcome_means[4] +
u_4) * (Z == "4") + (outcome_means[5] + u_5) * (Z == "5") +
+ u_c + u, conditions = c("1", "2", "3", "4", "5"), assignment_variables = Z)
estimand <- declare_estimands(ate_Y_2_1 = mean(Y_Z_2 - Y_Z_1), ate_Y_3_1 = mean(Y_Z_3 -
Y_Z_1), ate_Y_4_1 = mean(Y_Z_4 - Y_Z_1), ate_Y_5_1 = mean(Y_Z_5 -
Y_Z_1), ate_Y_3_2 = mean(Y_Z_3 - Y_Z_2), ate_Y_4_2 = mean(Y_Z_4 -
Y_Z_2), ate_Y_5_2 = mean(Y_Z_5 - Y_Z_2), ate_Y_4_3 = mean(Y_Z_4 -
Y_Z_3), ate_Y_5_3 = mean(Y_Z_5 - Y_Z_3), ate_Y_5_4 = mean(Y_Z_5 -
Y_Z_4))
assignment <- declare_assignment(num_arms = 5, conditions = c("1", "2", "3",
"4", "5"), clusters = clusters, assignment_variable = Z)
reveal_Y <- declare_reveal(assignment_variables = Z)
estimator <- declare_estimator(handler = function(data) {
estimates <- rbind.data.frame(
ate_Y_2_1 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
condition1 = "1", condition2 = "2"),
ate_Y_3_1 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
condition1 = "1", condition2 = "3"),
ate_Y_4_1 = difference_in_means(formula = Y ~
Z, data = data, clusters = clusters, condition1 = "1", condition2 = "4"),
ate_Y_5_1 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
condition1 = "1", condition2 = "5"),
ate_Y_3_2 = difference_in_means(formula = Y ~
Z, data = data, clusters = clusters, condition1 = "2", condition2 = "3"),
ate_Y_4_2 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
condition1 = "2", condition2 = "4"),
ate_Y_5_2 = difference_in_means(formula = Y ~
Z, data = data, clusters = clusters, condition1 = "2", condition2 = "5"),
ate_Y_4_3 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
condition1 = "3", condition2 = "4"),
ate_Y_5_3 = difference_in_means(formula = Y ~
Z, data = data, clusters = clusters, condition1 = "3", condition2 = "5"),
ate_Y_5_4 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
condition1 = "4", condition2 = "5"))
estimates$estimator_label <- "DIM"
estimates$estimand_label <- rownames(estimates)
estimates$estimate <- estimates$coefficients
estimates$term <- NULL
return(estimates)
})
multi_arm__cluster_design <- population + potential_outcomes + assignment +
reveal_Y + estimand + estimator
# Simulated data for this design looks like this
kable(head(draw_data(multi_arm__cluster_design)))
# A diagnosis looks like this:
sims <- 50
diagnosis <- diagnose_design(multi_arm__cluster_design, sims = sims)
kable(reshape_diagnosis(diagnosis))
# A diagnosis for different numbers of clusters would look like this:
designs <- redesign(multi_arm__cluster_design, N_clusters = c(40, 50))
diagnoses <- diagnose_designs(designs, sims = sims)
kable(reshape_diagnosis(diagnoses))
# ===
# Notes
# This has been declared as a 5 arm trial but I think you are thinking of this as a 2*2 + 1 factorial. In that case all the above still holds, but you might want to rethink the estimands and estimators. The current estimands compare each cell to each other but you might instead be interested in comparing combinations of cells. To get a sense of factorial estimands like this have a look at the `simple_factorial_designer` code.
# The code above should work fine for the development version which will be patched on CRAN in a couple of days. WIth the current CRAN version you will notice an estimation bug. If you want to get going without using the developer version you can use the below less elegant code for estimation
estimator <- declare_estimator(handler = function(data) {
estimates <- rbind.data.frame(
ate_Y_2_1 = tidy(lm_robust(formula = Y ~ Z,
data = dplyr::filter(data, (Z == "1" | Z == "2")), clusters = clusters))[2,],
ate_Y_3_1 = tidy(lm_robust(formula = Y ~ Z,
data = dplyr::filter(data, (Z == "1" | Z == "3")), clusters = clusters))[2,],
ate_Y_4_1 = tidy(lm_robust(formula = Y ~ Z,
data = dplyr::filter(data, (Z == "1" | Z == "4")), clusters = clusters))[2,],
ate_Y_5_1 = tidy(lm_robust(formula = Y ~ Z,
data = dplyr::filter(data, (Z == "1" | Z == "5")), clusters = clusters))[2,],
ate_Y_3_2 = tidy(lm_robust(formula = Y ~ Z, ,
data = dplyr::filter(data, (Z == "2" | Z == "3")), clusters = clusters))[2,],
ate_Y_4_2 = tidy(lm_robust(formula = Y ~ Z,
data = dplyr::filter(data, (Z == "2" | Z == "4")), clusters = clusters))[2,],
ate_Y_5_2 = tidy(lm_robust(formula = Y ~Z, ,
data = dplyr::filter(data, (Z == "2" | Z == "5")), clusters = clusters))[2,],
ate_Y_4_3 = tidy(lm_robust(formula = Y ~ Z, ,
data = dplyr::filter(data, (Z == "3" | Z == "4")), clusters = clusters))[2,],
ate_Y_5_3 = tidy(lm_robust(formula = Y ~ Z,
data = dplyr::filter(data, (Z == "3" | Z == "5")), clusters = clusters))[2,],
ate_Y_5_4 = tidy(lm_robust(formula = Y ~ Z, ,
data = dplyr::filter(data, (Z == "4" | Z == "5")), clusters = clusters))[2,])
estimates$estimator_label <- "DIM"
estimates$estimand_label <- rownames(estimates)
estimates$term <- NULL
return(estimates)
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment