macartan/5 arm cluster experiment

## 5 arm cluster experiment
library(DesignLibrary)
library(knitr)

# We are interested in a 5 arm trial with clusters. The 5 arm trial admits 5*4/2 comparisons of arms (10!). Plus one might be interested in various estimands that group arms together in different ways.

# To get going lets look at standard code for a 5 arm trial from the multiarm designer:

# get_design_code(multi_arm_designer(m_arms = 5))

# Now lets base a design off that, but add in clusters. Note that there is an individual level shock and a cluster shock: the ICC is determined by the relative size of these.


# These are arguments that you might want to modify to tailor your design:
N  <-  30
outcome_means  <-  c(0, 0, 0, 1, 1)
sd_i  <-  1
sd_cluster <- 0
outcome_sds  <-  c(0, 0, 0, 0, 0)
N_clusters <- 20
N_i_in_clusters <- 10


# Design

population <- declare_population(
  clusters = add_level(N = N_clusters, u_c = rnorm(N)*sd_cluster),
  individuals = add_level(N = N_i_in_clusters,
    u_1 = rnorm(N, 0, outcome_sds[1L]),
    u_2 = rnorm(N, 0, outcome_sds[2L]), u_3 = rnorm(N, 0, outcome_sds[3L]),
    u_4 = rnorm(N, 0, outcome_sds[4L]), u_5 = rnorm(N, 0, outcome_sds[5L]),
    u = rnorm(N) * sd_i))

potential_outcomes <- declare_potential_outcomes(formula = Y ~ (outcome_means[1] +
    u_1) * (Z == "1") + (outcome_means[2] + u_2) * (Z == "2") +
    (outcome_means[3] + u_3) * (Z == "3") + (outcome_means[4] +
    u_4) * (Z == "4") + (outcome_means[5] + u_5) * (Z == "5") +
    + u_c + u, conditions = c("1", "2", "3", "4", "5"), assignment_variables = Z)

estimand <- declare_estimands(ate_Y_2_1 = mean(Y_Z_2 - Y_Z_1), ate_Y_3_1 = mean(Y_Z_3 -
    Y_Z_1), ate_Y_4_1 = mean(Y_Z_4 - Y_Z_1), ate_Y_5_1 = mean(Y_Z_5 -
    Y_Z_1), ate_Y_3_2 = mean(Y_Z_3 - Y_Z_2), ate_Y_4_2 = mean(Y_Z_4 -
    Y_Z_2), ate_Y_5_2 = mean(Y_Z_5 - Y_Z_2), ate_Y_4_3 = mean(Y_Z_4 -
    Y_Z_3), ate_Y_5_3 = mean(Y_Z_5 - Y_Z_3), ate_Y_5_4 = mean(Y_Z_5 -
    Y_Z_4))

assignment <- declare_assignment(num_arms = 5, conditions = c("1", "2", "3",
"4", "5"), clusters = clusters, assignment_variable = Z)

reveal_Y <- declare_reveal(assignment_variables = Z)

estimator <- declare_estimator(handler = function(data) {
    estimates <- rbind.data.frame(
        ate_Y_2_1 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
            condition1 = "1", condition2 = "2"),
        ate_Y_3_1 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
            condition1 = "1", condition2 = "3"),
        ate_Y_4_1 = difference_in_means(formula = Y ~
            Z, data = data, clusters = clusters, condition1 = "1", condition2 = "4"),
        ate_Y_5_1 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
            condition1 = "1", condition2 = "5"),
        ate_Y_3_2 = difference_in_means(formula = Y ~
            Z, data = data, clusters = clusters, condition1 = "2", condition2 = "3"),
        ate_Y_4_2 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
            condition1 = "2", condition2 = "4"),
        ate_Y_5_2 = difference_in_means(formula = Y ~
            Z, data = data, clusters = clusters, condition1 = "2", condition2 = "5"),
        ate_Y_4_3 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
            condition1 = "3", condition2 = "4"),
        ate_Y_5_3 = difference_in_means(formula = Y ~
            Z, data = data, clusters = clusters, condition1 = "3", condition2 = "5"),
        ate_Y_5_4 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
            condition1 = "4", condition2 = "5"))
    estimates$estimator_label <- "DIM"
    estimates$estimand_label <- rownames(estimates)
    estimates$estimate <- estimates$coefficients
    estimates$term <- NULL
    return(estimates)
})

multi_arm__cluster_design <- population + potential_outcomes + assignment +
    reveal_Y + estimand + estimator

# Simulated data for this design  looks like this

kable(head(draw_data(multi_arm__cluster_design)))

# A diagnosis looks like this:

sims <- 50

diagnosis <- diagnose_design(multi_arm__cluster_design, sims = sims)

kable(reshape_diagnosis(diagnosis))

# A diagnosis for different numbers of clusters would look like this:

designs <- redesign(multi_arm__cluster_design, N_clusters = c(40, 50))
diagnoses <- diagnose_designs(designs, sims = sims)
kable(reshape_diagnosis(diagnoses))

# ===
# Notes

# This has been declared as a 5 arm trial but I think you are thinking of this as a 2*2 + 1 factorial. In that case all the above still holds, but you might want to rethink the estimands and estimators. The current estimands compare each cell to each other but you might instead be interested in comparing combinations of cells. To get a sense of factorial estimands like this have a look at the `simple_factorial_designer` code.

# The code above should work fine for the development version which will be patched on CRAN in a couple of days. WIth the current CRAN version you will notice an estimation bug. If you want to get going without using the developer version you can use the below less elegant code for estimation

estimator <- declare_estimator(handler = function(data) {
    estimates <- rbind.data.frame(
        ate_Y_2_1 = tidy(lm_robust(formula = Y ~ Z,
            data = dplyr::filter(data, (Z == "1" | Z == "2")), clusters = clusters))[2,],
        ate_Y_3_1 = tidy(lm_robust(formula = Y ~ Z,
            data = dplyr::filter(data, (Z == "1" | Z == "3")), clusters = clusters))[2,],
        ate_Y_4_1 = tidy(lm_robust(formula = Y ~ Z,
            data = dplyr::filter(data, (Z == "1" | Z == "4")), clusters = clusters))[2,],
        ate_Y_5_1 = tidy(lm_robust(formula = Y ~ Z,
            data = dplyr::filter(data, (Z == "1" | Z == "5")), clusters = clusters))[2,],
        ate_Y_3_2 = tidy(lm_robust(formula = Y ~ Z, ,
            data = dplyr::filter(data, (Z == "2" | Z == "3")), clusters = clusters))[2,],
        ate_Y_4_2 = tidy(lm_robust(formula = Y ~ Z,
            data = dplyr::filter(data, (Z == "2" | Z == "4")), clusters = clusters))[2,],
        ate_Y_5_2 = tidy(lm_robust(formula = Y ~Z, ,
            data = dplyr::filter(data, (Z == "2" | Z == "5")), clusters = clusters))[2,],
        ate_Y_4_3 = tidy(lm_robust(formula = Y ~ Z, ,
            data = dplyr::filter(data, (Z == "3" | Z == "4")), clusters = clusters))[2,],
        ate_Y_5_3 = tidy(lm_robust(formula = Y ~ Z,
            data = dplyr::filter(data, (Z == "3" | Z == "5")), clusters = clusters))[2,],
        ate_Y_5_4 = tidy(lm_robust(formula = Y ~ Z,                               ,
            data = dplyr::filter(data, (Z == "4" | Z == "5")), clusters = clusters))[2,])
    estimates$estimator_label <- "DIM"
    estimates$estimand_label <- rownames(estimates)
    estimates$term <- NULL
    return(estimates)
})
	library(DesignLibrary)
	library(knitr)

	# We are interested in a 5 arm trial with clusters. The 5 arm trial admits 5*4/2 comparisons of arms (10!). Plus one might be interested in various estimands that group arms together in different ways.

	# To get going lets look at standard code for a 5 arm trial from the multiarm designer:

	# get_design_code(multi_arm_designer(m_arms = 5))

	# Now lets base a design off that, but add in clusters. Note that there is an individual level shock and a cluster shock: the ICC is determined by the relative size of these.


	# These are arguments that you might want to modify to tailor your design:
	N <- 30
	outcome_means <- c(0, 0, 0, 1, 1)
	sd_i <- 1
	sd_cluster <- 0
	outcome_sds <- c(0, 0, 0, 0, 0)
	N_clusters <- 20
	N_i_in_clusters <- 10


	# Design

	population <- declare_population(
	clusters = add_level(N = N_clusters, u_c = rnorm(N)*sd_cluster),
	individuals = add_level(N = N_i_in_clusters,
	u_1 = rnorm(N, 0, outcome_sds[1L]),
	u_2 = rnorm(N, 0, outcome_sds[2L]), u_3 = rnorm(N, 0, outcome_sds[3L]),
	u_4 = rnorm(N, 0, outcome_sds[4L]), u_5 = rnorm(N, 0, outcome_sds[5L]),
	u = rnorm(N) * sd_i))

	potential_outcomes <- declare_potential_outcomes(formula = Y ~ (outcome_means[1] +
	u_1) * (Z == "1") + (outcome_means[2] + u_2) * (Z == "2") +
	(outcome_means[3] + u_3) * (Z == "3") + (outcome_means[4] +
	u_4) * (Z == "4") + (outcome_means[5] + u_5) * (Z == "5") +
	+ u_c + u, conditions = c("1", "2", "3", "4", "5"), assignment_variables = Z)

	estimand <- declare_estimands(ate_Y_2_1 = mean(Y_Z_2 - Y_Z_1), ate_Y_3_1 = mean(Y_Z_3 -
	Y_Z_1), ate_Y_4_1 = mean(Y_Z_4 - Y_Z_1), ate_Y_5_1 = mean(Y_Z_5 -
	Y_Z_1), ate_Y_3_2 = mean(Y_Z_3 - Y_Z_2), ate_Y_4_2 = mean(Y_Z_4 -
	Y_Z_2), ate_Y_5_2 = mean(Y_Z_5 - Y_Z_2), ate_Y_4_3 = mean(Y_Z_4 -
	Y_Z_3), ate_Y_5_3 = mean(Y_Z_5 - Y_Z_3), ate_Y_5_4 = mean(Y_Z_5 -
	Y_Z_4))

	assignment <- declare_assignment(num_arms = 5, conditions = c("1", "2", "3",
	"4", "5"), clusters = clusters, assignment_variable = Z)

	reveal_Y <- declare_reveal(assignment_variables = Z)

	estimator <- declare_estimator(handler = function(data) {
	estimates <- rbind.data.frame(
	ate_Y_2_1 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
	condition1 = "1", condition2 = "2"),
	ate_Y_3_1 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
	condition1 = "1", condition2 = "3"),
	ate_Y_4_1 = difference_in_means(formula = Y ~
	Z, data = data, clusters = clusters, condition1 = "1", condition2 = "4"),
	ate_Y_5_1 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
	condition1 = "1", condition2 = "5"),
	ate_Y_3_2 = difference_in_means(formula = Y ~
	Z, data = data, clusters = clusters, condition1 = "2", condition2 = "3"),
	ate_Y_4_2 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
	condition1 = "2", condition2 = "4"),
	ate_Y_5_2 = difference_in_means(formula = Y ~
	Z, data = data, clusters = clusters, condition1 = "2", condition2 = "5"),
	ate_Y_4_3 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
	condition1 = "3", condition2 = "4"),
	ate_Y_5_3 = difference_in_means(formula = Y ~
	Z, data = data, clusters = clusters, condition1 = "3", condition2 = "5"),
	ate_Y_5_4 = difference_in_means(formula = Y ~ Z, data = data, clusters = clusters,
	condition1 = "4", condition2 = "5"))
	estimates$estimator_label <- "DIM"
	estimates$estimand_label <- rownames(estimates)
	estimates$estimate <- estimates$coefficients
	estimates$term <- NULL
	return(estimates)
	})

	multi_arm__cluster_design <- population + potential_outcomes + assignment +
	reveal_Y + estimand + estimator

	# Simulated data for this design looks like this

	kable(head(draw_data(multi_arm__cluster_design)))

	# A diagnosis looks like this:

	sims <- 50

	diagnosis <- diagnose_design(multi_arm__cluster_design, sims = sims)

	kable(reshape_diagnosis(diagnosis))

	# A diagnosis for different numbers of clusters would look like this:

	designs <- redesign(multi_arm__cluster_design, N_clusters = c(40, 50))
	diagnoses <- diagnose_designs(designs, sims = sims)
	kable(reshape_diagnosis(diagnoses))

	# ===
	# Notes

	# This has been declared as a 5 arm trial but I think you are thinking of this as a 2*2 + 1 factorial. In that case all the above still holds, but you might want to rethink the estimands and estimators. The current estimands compare each cell to each other but you might instead be interested in comparing combinations of cells. To get a sense of factorial estimands like this have a look at the `simple_factorial_designer` code.

	# The code above should work fine for the development version which will be patched on CRAN in a couple of days. WIth the current CRAN version you will notice an estimation bug. If you want to get going without using the developer version you can use the below less elegant code for estimation

	estimator <- declare_estimator(handler = function(data) {
	estimates <- rbind.data.frame(
	ate_Y_2_1 = tidy(lm_robust(formula = Y ~ Z,
	data = dplyr::filter(data, (Z == "1" \| Z == "2")), clusters = clusters))[2,],
	ate_Y_3_1 = tidy(lm_robust(formula = Y ~ Z,
	data = dplyr::filter(data, (Z == "1" \| Z == "3")), clusters = clusters))[2,],
	ate_Y_4_1 = tidy(lm_robust(formula = Y ~ Z,
	data = dplyr::filter(data, (Z == "1" \| Z == "4")), clusters = clusters))[2,],
	ate_Y_5_1 = tidy(lm_robust(formula = Y ~ Z,
	data = dplyr::filter(data, (Z == "1" \| Z == "5")), clusters = clusters))[2,],
	ate_Y_3_2 = tidy(lm_robust(formula = Y ~ Z, ,
	data = dplyr::filter(data, (Z == "2" \| Z == "3")), clusters = clusters))[2,],
	ate_Y_4_2 = tidy(lm_robust(formula = Y ~ Z,
	data = dplyr::filter(data, (Z == "2" \| Z == "4")), clusters = clusters))[2,],
	ate_Y_5_2 = tidy(lm_robust(formula = Y ~Z, ,
	data = dplyr::filter(data, (Z == "2" \| Z == "5")), clusters = clusters))[2,],
	ate_Y_4_3 = tidy(lm_robust(formula = Y ~ Z, ,
	data = dplyr::filter(data, (Z == "3" \| Z == "4")), clusters = clusters))[2,],
	ate_Y_5_3 = tidy(lm_robust(formula = Y ~ Z,
	data = dplyr::filter(data, (Z == "3" \| Z == "5")), clusters = clusters))[2,],
	ate_Y_5_4 = tidy(lm_robust(formula = Y ~ Z, ,
	data = dplyr::filter(data, (Z == "4" \| Z == "5")), clusters = clusters))[2,])
	estimates$estimator_label <- "DIM"
	estimates$estimand_label <- rownames(estimates)
	estimates$term <- NULL
	return(estimates)
	})