Created
May 1, 2019 16:44
-
-
Save cddesja/3b32f61198f7fabd0d494af681bf95c9 to your computer and use it in GitHub Desktop.
R code used in the power analysis talk for RMCC
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## ----include = FALSE, message = FALSE, warning = FALSE------------------- | |
library(tidyverse) | |
cohen.d <- function(samp1, samp2){ | |
xbar1 <- mean(samp1) | |
xbar2 <- mean(samp2) | |
n1 <- length(samp1) | |
n2 <- length(samp2) | |
s1 <- var(samp1) | |
s2 <- var(samp2) | |
pooled.sd <- sqrt(((n1 - 1)*s1 + (n2 - 1)*s2) / (n1 + n2 - 2)) | |
(xbar1 - xbar2) / pooled.sd | |
} | |
set.seed(12351234) | |
dat.tmp <- data.frame(Trt = rnorm(100000, mean = 21.04, sd = 5.2), | |
Ctrl = rnorm(100000, mean = 20, sd = 5.2)) | |
dat.l <- gather(dat.tmp) | |
library(ggplot2) | |
## ----echo = FALSE-------------------------------------------------------- | |
ggplot(dat.l, aes(x = value, fill = key)) + | |
geom_density(alpha = .4) + | |
scale_fill_discrete("") + | |
xlab("ACT") + | |
ylab("Density") + | |
theme_bw() + | |
annotate("segment", x = 21.04, xend = 21.04, y = 0, yend = .001, colour = "blue")+ | |
annotate("segment", x = 20, xend = 20, y = 0, yend = .001, colour = "blue") | |
## ----echo = FALSE-------------------------------------------------------- | |
ggplot(dat.l, aes(y = value, x = key)) + | |
geom_boxplot() + | |
xlab("ACT") + | |
ylab("Act Score") + | |
theme_bw() | |
## ----include = FALSE, message = FALSE, warning = FALSE------------------- | |
set.seed(12351234) | |
dat.tmp <- data.frame(Trt = rnorm(100000, mean = 22.6, sd = 5.2), | |
Ctrl = rnorm(100000, mean = 20, sd = 5.2)) | |
dat.l <- gather(dat.tmp) | |
## ----echo = FALSE-------------------------------------------------------- | |
ggplot(dat.l, aes(x = value, fill = key)) + | |
geom_density(alpha = .4) + | |
scale_fill_discrete("") + | |
xlab("ACT") + | |
ylab("Density") + | |
theme_bw() + | |
annotate("segment", x = 22.6, xend = 22.6, y = 0, yend = .001, colour = "blue")+ | |
annotate("segment", x = 20, xend = 20, y = 0, yend = .001, colour = "blue") | |
## ----echo = FALSE-------------------------------------------------------- | |
ggplot(dat.l, aes(y = value, x = key)) + | |
geom_boxplot() + | |
xlab("ACT") + | |
ylab("Act Score") + | |
theme_bw() | |
## ----include = FALSE, message = FALSE, warning = FALSE------------------- | |
set.seed(12351234) | |
dat.tmp <- data.frame(Trt = rnorm(100000, mean = 24.16, sd = 5.2), | |
Ctrl = rnorm(100000, mean = 20, sd = 5.2)) | |
dat.l <- gather(dat.tmp) | |
## ----echo = FALSE-------------------------------------------------------- | |
ggplot(dat.l, aes(x = value, fill = key)) + | |
geom_density(alpha = .4) + | |
scale_fill_discrete("") + | |
xlab("ACT") + | |
ylab("Density") + | |
theme_bw() + | |
annotate("segment", x = 24.16, xend = 24.16, y = 0, yend = .001, colour = "blue")+ | |
annotate("segment", x = 20, xend = 20, y = 0, yend = .001, colour = "blue") | |
## ----echo = FALSE-------------------------------------------------------- | |
ggplot(dat.l, aes(y = value, x = key)) + | |
geom_boxplot() + | |
xlab("ACT") + | |
ylab("Act Score") + | |
theme_bw() | |
## ----echo = TRUE--------------------------------------------------------- | |
pwr::pwr.t.test(d = 0.2, sig.level = .05, power = .80, | |
type = "two.sample", alternative = "greater") | |
## ----cache = TRUE, echo = TRUE------------------------------------------- | |
set.seed(512019) | |
run_sim <- replicate(5000, expr = { | |
trt <- rnorm(310, mean = 21.04, sd = 5.2) | |
ctrl <- rnorm(310, mean = 20, sd = 5.2) | |
test <- t.test(trt, ctrl, var.equal = FALSE, alternative = "greater") | |
d <- cohen.d(trt, ctrl) | |
c(test$p.value, d) | |
}) | |
run_sim <- data.frame(t(run_sim)) | |
colnames(run_sim) <- c("p.value", "d") | |
mean(run_sim$d) # effect size | |
mean(run_sim$p.value < .05) # empirical power | |
## ----cache = TRUE, echo = TRUE------------------------------------------- | |
set.seed(512019) | |
run_sim <- replicate(5000, expr = { | |
trt <- rnorm(310, mean = 21.04, sd = 5.2) | |
ctrl <- rnorm(310, mean = 20, sd = 5.2) | |
y <- c(trt, ctrl) | |
x <- rep(c("Trt", "Ctrl"), each = 310) | |
mod <- lm(y ~ x) | |
pt(summary(mod)$coef[2, 3], df = 618, lower.tail = FALSE) | |
}) | |
mean(run_sim < .05) # empirical power | |
## ----cache = TRUE, echo = TRUE------------------------------------------- | |
n <- seq(50, 200, by = 10) | |
calc_p <- function(n){ | |
trt <- rnorm(n, mean = 22.6, sd = 5.2) | |
ctrl <- rnorm(n, mean = 20, sd = 5.2) | |
test <- t.test(trt, ctrl, mu = 1.04, | |
var.equal = FALSE, alternative = "greater") | |
test$p.value | |
} | |
set.seed(512019) | |
power <- NULL | |
for(samp in n){ | |
tmp <- replicate(5000, calc_p(samp)) | |
power <- c(power, mean(tmp < .05)) | |
} | |
## ------------------------------------------------------------------------ | |
plot(power ~ n, type = "b", ylab = "Power", xlab = "Sample Size Per Group", | |
ylim = c(0, 1)) | |
abline(h = 0.8, lty = 2) | |
## ------------------------------------------------------------------------ | |
dat <- data.frame(Trt = rgamma(100000, shape = 5.65, scale = 4), | |
Ctrl = rgamma(100000, shape = 2, scale = 10)) | |
dat_l <- tidyr::gather(dat) | |
ggplot(dat_l, aes(x = value, fill = key)) + | |
geom_density(alpha = .4) + | |
scale_fill_discrete("") + | |
xlab("Test score") | |
ylab("Density") + | |
theme_bw() | |
## ----cache = TRUE, echo = TRUE------------------------------------------- | |
n <- seq(50, 200, by = 10) | |
calc_p <- function(n){ | |
trt <- rgamma(n, shape = 5.65, scale = 4) | |
ctrl <- rgamma(n, shape = 2, scale = 10) | |
test <- t.test(trt, ctrl, mu = 1.04, | |
var.equal = FALSE, alternative = "greater") | |
test$p.value | |
} | |
set.seed(512019) | |
power <- NULL | |
for(samp in n){ | |
tmp <- replicate(5000, calc_p(samp)) | |
power <- c(power, mean(tmp < .05)) | |
} | |
## ------------------------------------------------------------------------ | |
plot(power ~ n, type = "b", ylab = "Power", xlab = "Sample Size Per Group", | |
ylim = c(0, 1)) | |
abline(h = 0.8, lty = 2) | |
## ----echo = TRUE--------------------------------------------------------- | |
set.seed(05012019) | |
library(lavaan) | |
# relationship between X and M, M and Y, and X and Y | |
a <- .562 | |
b <- .562 | |
c <- .14 | |
# Fit the model | |
mod <- ' | |
M ~ a*X # M regressed onto X | |
Y ~ b*M + c*X # Y regressed onto M and X | |
v := (a*b)*(a*b) # indirect effect squared | |
' | |
# How many observations should we generate. | |
n <- 100 | |
# Generate X to be a random normal variable, M, and Y | |
X <- rnorm(n = n, mean = 0, sd = 1) | |
M <- a*X + rnorm(n, mean = 0, sd = sqrt(1 - a^2)) | |
Y <- b*M + c*X + rnorm(n, mean = 0, sd = sqrt(1 - (b^2 + c^2 + 2*b*c*a))) | |
dat <- data.frame(Y, X, M) | |
# Fit the model | |
fit <- sem(model = mod, data = dat) | |
# Extract the relevant information | |
param <- parameterEstimates(fit) | |
subset(param, label == "v", pvalue) | |
## ----sim2, cache = TRUE, echo = TRUE------------------------------------- | |
sample_size <- seq(50, 120, by = 10) | |
alpha <- .05 | |
run_sim <- function(N){ | |
# How many observations should we generate. | |
n <- N | |
# Generate X to be a random normal variable, M, and Y | |
X <- rnorm(n = n, mean = 0, sd = 1) | |
M <- a*X + rnorm(n, mean = 0, sd = sqrt(1 - a^2)) | |
Y <- b*M + c*X + rnorm(n, mean = 0, sd = sqrt(1 - (b^2 + c^2 + 2*b*c*a))) | |
dat <- data.frame(Y, X, M) | |
# Fit the model | |
fit <- sem(model = mod, data = dat) | |
# Extract the relevant information | |
param <- parameterEstimates(fit) | |
subset(param, label == "v", pvalue, drop = TRUE) | |
} | |
set.seed(05012019) | |
power <- NULL | |
for(samp in sample_size){ | |
p_values <- replicate(2000, run_sim(samp)) | |
power <- c(power, mean(p_values < alpha)) | |
} | |
## ------------------------------------------------------------------------ | |
plot(power ~ sample_size, type = "b", xlab = "Sample size", ylab = "Power", | |
ylim = c(0, 1)) | |
abline(h = 0.8, lty = 2) | |
## ----eval = TRUE, include = TRUE, echo = TRUE---------------------------- | |
nobs <- 100 | |
ran_effs <- c(5, .5) | |
cor_mat <- matrix(c(1, -.2, -.2, 1), nrow = 2) | |
cov_mat <- cor2cov(cor_mat, ran_effs) | |
int_mean <- 20 | |
slope_mean <- 1.5 # this is our parameter of interest! | |
fact_scores <- MASS::mvrnorm(n = nobs, mu = c(int_mean, slope_mean), Sigma = cov_mat) | |
ach1 <- 1*fact_scores[, 1] + 0*fact_scores[, 2] + rnorm(nobs, sd = .5) | |
ach2 <- 1*fact_scores[, 1] + 1*fact_scores[, 2] + rnorm(nobs, sd = .5) | |
ach3 <- 1*fact_scores[, 1] + 2*fact_scores[, 2] + rnorm(nobs, sd = .5) | |
ach4 <- 1*fact_scores[, 1] + 3*fact_scores[, 2] + rnorm(nobs, sd = .5) | |
ach5 <- 1*fact_scores[, 1] + 4*fact_scores[, 2] + rnorm(nobs, sd = .5) | |
dat <- data.frame(id = 1:nobs, ach1, ach2, ach3, ach4, ach5) | |
dat_l <- reshape(dat, direction = "long", varying = 2:6, | |
timevar = "year", v.names = "score", times = 0:4) | |
## ----eval = TRUE, include = TRUE, echo = TRUE---------------------------- | |
mod <- " | |
int =~ 1*ach1 + 1*ach2 + 1*ach3 + 1*ach4 + 1*ach5 | |
slope =~ 0*ach1 + 1*ach2 + 2*ach3 + 3*ach4 + 4*ach5 | |
int ~ 1 | |
slope ~ eff*1 | |
int ~~ int + slope | |
slope ~~ slope | |
ach1 ~~ e*ach1 | |
ach2 ~~ e*ach2 | |
ach3 ~~ e*ach3 | |
ach4 ~~ e*ach4 | |
ach5 ~~ e*ach5 | |
" | |
## ----eval = FALSE, echo = TRUE, include = TRUE--------------------------- | |
## # as a latent growth curve model | |
## lgcm.fit <- growth(mod, data = dat) | |
## summary(lgcm.fit) | |
## | |
## # as a mixed effects model | |
## me.fit <- lme4::lmer(score ~ 1 + year + (1 + year | id), data = dat_l) | |
## summary(me.fit) | |
## ----sim3, eval = TRUE, include = TRUE, echo = TRUE, cache = TRUE------- | |
alpha <- .5 | |
nobs <- 100 | |
ran_effs <- c(5, .5) | |
cor_mat <- matrix(c(1, -.2, -.2, 1), nrow = 2) | |
cov_mat <- cor2cov(cor_mat, ran_effs) | |
int_mean <- 20 | |
eff_sizes <- seq(.02, .1, by = .01) | |
run_sim <- function(param){ | |
slope_mean <- param # this is our parameter of interest! | |
fact_scores <- MASS::mvrnorm(n = nobs, mu = c(int_mean, slope_mean), Sigma = cov_mat) | |
ach1 <- 1*fact_scores[, 1] + 0*fact_scores[, 2] + rnorm(nobs, sd = .5) | |
ach2 <- 1*fact_scores[, 1] + 1*fact_scores[, 2] + rnorm(nobs, sd = .5) | |
ach3 <- 1*fact_scores[, 1] + 2*fact_scores[, 2] + rnorm(nobs, sd = .5) | |
ach4 <- 1*fact_scores[, 1] + 3*fact_scores[, 2] + rnorm(nobs, sd = .5) | |
ach5 <- 1*fact_scores[, 1] + 4*fact_scores[, 2] + rnorm(nobs, sd = .5) | |
dat <- data.frame(id = 1:nobs, ach1, ach2, ach3, ach4, ach5) | |
lgcm.fit <- growth(mod, data = dat) | |
params <- parameterEstimates(lgcm.fit) | |
subset(params, label == "eff", pvalue, drop = TRUE) | |
} | |
set.seed(05012019) | |
power <- NULL | |
for(param in eff_sizes){ | |
p_values <- replicate(2000, run_sim(param)) | |
power <- c(power, mean(p_values < alpha)) | |
} | |
## ------------------------------------------------------------------------ | |
plot(power ~ eff_sizes, type = "b", xlab = "Unstandardized parameter", ylab = "Power", | |
ylim = c(0, 1)) | |
abline(h = 0.8, lty = 2) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment