Skip to content

Instantly share code, notes, and snippets.

@cddesja
Created May 1, 2019 16:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cddesja/3b32f61198f7fabd0d494af681bf95c9 to your computer and use it in GitHub Desktop.
Save cddesja/3b32f61198f7fabd0d494af681bf95c9 to your computer and use it in GitHub Desktop.
R code used in the power analysis talk for RMCC
## ----include = FALSE, message = FALSE, warning = FALSE-------------------
library(tidyverse)
cohen.d <- function(samp1, samp2){
xbar1 <- mean(samp1)
xbar2 <- mean(samp2)
n1 <- length(samp1)
n2 <- length(samp2)
s1 <- var(samp1)
s2 <- var(samp2)
pooled.sd <- sqrt(((n1 - 1)*s1 + (n2 - 1)*s2) / (n1 + n2 - 2))
(xbar1 - xbar2) / pooled.sd
}
set.seed(12351234)
dat.tmp <- data.frame(Trt = rnorm(100000, mean = 21.04, sd = 5.2),
Ctrl = rnorm(100000, mean = 20, sd = 5.2))
dat.l <- gather(dat.tmp)
library(ggplot2)
## ----echo = FALSE--------------------------------------------------------
ggplot(dat.l, aes(x = value, fill = key)) +
geom_density(alpha = .4) +
scale_fill_discrete("") +
xlab("ACT") +
ylab("Density") +
theme_bw() +
annotate("segment", x = 21.04, xend = 21.04, y = 0, yend = .001, colour = "blue")+
annotate("segment", x = 20, xend = 20, y = 0, yend = .001, colour = "blue")
## ----echo = FALSE--------------------------------------------------------
ggplot(dat.l, aes(y = value, x = key)) +
geom_boxplot() +
xlab("ACT") +
ylab("Act Score") +
theme_bw()
## ----include = FALSE, message = FALSE, warning = FALSE-------------------
set.seed(12351234)
dat.tmp <- data.frame(Trt = rnorm(100000, mean = 22.6, sd = 5.2),
Ctrl = rnorm(100000, mean = 20, sd = 5.2))
dat.l <- gather(dat.tmp)
## ----echo = FALSE--------------------------------------------------------
ggplot(dat.l, aes(x = value, fill = key)) +
geom_density(alpha = .4) +
scale_fill_discrete("") +
xlab("ACT") +
ylab("Density") +
theme_bw() +
annotate("segment", x = 22.6, xend = 22.6, y = 0, yend = .001, colour = "blue")+
annotate("segment", x = 20, xend = 20, y = 0, yend = .001, colour = "blue")
## ----echo = FALSE--------------------------------------------------------
ggplot(dat.l, aes(y = value, x = key)) +
geom_boxplot() +
xlab("ACT") +
ylab("Act Score") +
theme_bw()
## ----include = FALSE, message = FALSE, warning = FALSE-------------------
set.seed(12351234)
dat.tmp <- data.frame(Trt = rnorm(100000, mean = 24.16, sd = 5.2),
Ctrl = rnorm(100000, mean = 20, sd = 5.2))
dat.l <- gather(dat.tmp)
## ----echo = FALSE--------------------------------------------------------
ggplot(dat.l, aes(x = value, fill = key)) +
geom_density(alpha = .4) +
scale_fill_discrete("") +
xlab("ACT") +
ylab("Density") +
theme_bw() +
annotate("segment", x = 24.16, xend = 24.16, y = 0, yend = .001, colour = "blue")+
annotate("segment", x = 20, xend = 20, y = 0, yend = .001, colour = "blue")
## ----echo = FALSE--------------------------------------------------------
ggplot(dat.l, aes(y = value, x = key)) +
geom_boxplot() +
xlab("ACT") +
ylab("Act Score") +
theme_bw()
## ----echo = TRUE---------------------------------------------------------
pwr::pwr.t.test(d = 0.2, sig.level = .05, power = .80,
type = "two.sample", alternative = "greater")
## ----cache = TRUE, echo = TRUE-------------------------------------------
set.seed(512019)
run_sim <- replicate(5000, expr = {
trt <- rnorm(310, mean = 21.04, sd = 5.2)
ctrl <- rnorm(310, mean = 20, sd = 5.2)
test <- t.test(trt, ctrl, var.equal = FALSE, alternative = "greater")
d <- cohen.d(trt, ctrl)
c(test$p.value, d)
})
run_sim <- data.frame(t(run_sim))
colnames(run_sim) <- c("p.value", "d")
mean(run_sim$d) # effect size
mean(run_sim$p.value < .05) # empirical power
## ----cache = TRUE, echo = TRUE-------------------------------------------
set.seed(512019)
run_sim <- replicate(5000, expr = {
trt <- rnorm(310, mean = 21.04, sd = 5.2)
ctrl <- rnorm(310, mean = 20, sd = 5.2)
y <- c(trt, ctrl)
x <- rep(c("Trt", "Ctrl"), each = 310)
mod <- lm(y ~ x)
pt(summary(mod)$coef[2, 3], df = 618, lower.tail = FALSE)
})
mean(run_sim < .05) # empirical power
## ----cache = TRUE, echo = TRUE-------------------------------------------
n <- seq(50, 200, by = 10)
calc_p <- function(n){
trt <- rnorm(n, mean = 22.6, sd = 5.2)
ctrl <- rnorm(n, mean = 20, sd = 5.2)
test <- t.test(trt, ctrl, mu = 1.04,
var.equal = FALSE, alternative = "greater")
test$p.value
}
set.seed(512019)
power <- NULL
for(samp in n){
tmp <- replicate(5000, calc_p(samp))
power <- c(power, mean(tmp < .05))
}
## ------------------------------------------------------------------------
plot(power ~ n, type = "b", ylab = "Power", xlab = "Sample Size Per Group",
ylim = c(0, 1))
abline(h = 0.8, lty = 2)
## ------------------------------------------------------------------------
dat <- data.frame(Trt = rgamma(100000, shape = 5.65, scale = 4),
Ctrl = rgamma(100000, shape = 2, scale = 10))
dat_l <- tidyr::gather(dat)
ggplot(dat_l, aes(x = value, fill = key)) +
geom_density(alpha = .4) +
scale_fill_discrete("") +
xlab("Test score")
ylab("Density") +
theme_bw()
## ----cache = TRUE, echo = TRUE-------------------------------------------
n <- seq(50, 200, by = 10)
calc_p <- function(n){
trt <- rgamma(n, shape = 5.65, scale = 4)
ctrl <- rgamma(n, shape = 2, scale = 10)
test <- t.test(trt, ctrl, mu = 1.04,
var.equal = FALSE, alternative = "greater")
test$p.value
}
set.seed(512019)
power <- NULL
for(samp in n){
tmp <- replicate(5000, calc_p(samp))
power <- c(power, mean(tmp < .05))
}
## ------------------------------------------------------------------------
plot(power ~ n, type = "b", ylab = "Power", xlab = "Sample Size Per Group",
ylim = c(0, 1))
abline(h = 0.8, lty = 2)
## ----echo = TRUE---------------------------------------------------------
set.seed(05012019)
library(lavaan)
# relationship between X and M, M and Y, and X and Y
a <- .562
b <- .562
c <- .14
# Fit the model
mod <- '
M ~ a*X # M regressed onto X
Y ~ b*M + c*X # Y regressed onto M and X
v := (a*b)*(a*b) # indirect effect squared
'
# How many observations should we generate.
n <- 100
# Generate X to be a random normal variable, M, and Y
X <- rnorm(n = n, mean = 0, sd = 1)
M <- a*X + rnorm(n, mean = 0, sd = sqrt(1 - a^2))
Y <- b*M + c*X + rnorm(n, mean = 0, sd = sqrt(1 - (b^2 + c^2 + 2*b*c*a)))
dat <- data.frame(Y, X, M)
# Fit the model
fit <- sem(model = mod, data = dat)
# Extract the relevant information
param <- parameterEstimates(fit)
subset(param, label == "v", pvalue)
## ----sim2, cache = TRUE, echo = TRUE-------------------------------------
sample_size <- seq(50, 120, by = 10)
alpha <- .05
run_sim <- function(N){
# How many observations should we generate.
n <- N
# Generate X to be a random normal variable, M, and Y
X <- rnorm(n = n, mean = 0, sd = 1)
M <- a*X + rnorm(n, mean = 0, sd = sqrt(1 - a^2))
Y <- b*M + c*X + rnorm(n, mean = 0, sd = sqrt(1 - (b^2 + c^2 + 2*b*c*a)))
dat <- data.frame(Y, X, M)
# Fit the model
fit <- sem(model = mod, data = dat)
# Extract the relevant information
param <- parameterEstimates(fit)
subset(param, label == "v", pvalue, drop = TRUE)
}
set.seed(05012019)
power <- NULL
for(samp in sample_size){
p_values <- replicate(2000, run_sim(samp))
power <- c(power, mean(p_values < alpha))
}
## ------------------------------------------------------------------------
plot(power ~ sample_size, type = "b", xlab = "Sample size", ylab = "Power",
ylim = c(0, 1))
abline(h = 0.8, lty = 2)
## ----eval = TRUE, include = TRUE, echo = TRUE----------------------------
nobs <- 100
ran_effs <- c(5, .5)
cor_mat <- matrix(c(1, -.2, -.2, 1), nrow = 2)
cov_mat <- cor2cov(cor_mat, ran_effs)
int_mean <- 20
slope_mean <- 1.5 # this is our parameter of interest!
fact_scores <- MASS::mvrnorm(n = nobs, mu = c(int_mean, slope_mean), Sigma = cov_mat)
ach1 <- 1*fact_scores[, 1] + 0*fact_scores[, 2] + rnorm(nobs, sd = .5)
ach2 <- 1*fact_scores[, 1] + 1*fact_scores[, 2] + rnorm(nobs, sd = .5)
ach3 <- 1*fact_scores[, 1] + 2*fact_scores[, 2] + rnorm(nobs, sd = .5)
ach4 <- 1*fact_scores[, 1] + 3*fact_scores[, 2] + rnorm(nobs, sd = .5)
ach5 <- 1*fact_scores[, 1] + 4*fact_scores[, 2] + rnorm(nobs, sd = .5)
dat <- data.frame(id = 1:nobs, ach1, ach2, ach3, ach4, ach5)
dat_l <- reshape(dat, direction = "long", varying = 2:6,
timevar = "year", v.names = "score", times = 0:4)
## ----eval = TRUE, include = TRUE, echo = TRUE----------------------------
mod <- "
int =~ 1*ach1 + 1*ach2 + 1*ach3 + 1*ach4 + 1*ach5
slope =~ 0*ach1 + 1*ach2 + 2*ach3 + 3*ach4 + 4*ach5
int ~ 1
slope ~ eff*1
int ~~ int + slope
slope ~~ slope
ach1 ~~ e*ach1
ach2 ~~ e*ach2
ach3 ~~ e*ach3
ach4 ~~ e*ach4
ach5 ~~ e*ach5
"
## ----eval = FALSE, echo = TRUE, include = TRUE---------------------------
## # as a latent growth curve model
## lgcm.fit <- growth(mod, data = dat)
## summary(lgcm.fit)
##
## # as a mixed effects model
## me.fit <- lme4::lmer(score ~ 1 + year + (1 + year | id), data = dat_l)
## summary(me.fit)
## ----sim3, eval = TRUE, include = TRUE, echo = TRUE, cache = TRUE-------
alpha <- .5
nobs <- 100
ran_effs <- c(5, .5)
cor_mat <- matrix(c(1, -.2, -.2, 1), nrow = 2)
cov_mat <- cor2cov(cor_mat, ran_effs)
int_mean <- 20
eff_sizes <- seq(.02, .1, by = .01)
run_sim <- function(param){
slope_mean <- param # this is our parameter of interest!
fact_scores <- MASS::mvrnorm(n = nobs, mu = c(int_mean, slope_mean), Sigma = cov_mat)
ach1 <- 1*fact_scores[, 1] + 0*fact_scores[, 2] + rnorm(nobs, sd = .5)
ach2 <- 1*fact_scores[, 1] + 1*fact_scores[, 2] + rnorm(nobs, sd = .5)
ach3 <- 1*fact_scores[, 1] + 2*fact_scores[, 2] + rnorm(nobs, sd = .5)
ach4 <- 1*fact_scores[, 1] + 3*fact_scores[, 2] + rnorm(nobs, sd = .5)
ach5 <- 1*fact_scores[, 1] + 4*fact_scores[, 2] + rnorm(nobs, sd = .5)
dat <- data.frame(id = 1:nobs, ach1, ach2, ach3, ach4, ach5)
lgcm.fit <- growth(mod, data = dat)
params <- parameterEstimates(lgcm.fit)
subset(params, label == "eff", pvalue, drop = TRUE)
}
set.seed(05012019)
power <- NULL
for(param in eff_sizes){
p_values <- replicate(2000, run_sim(param))
power <- c(power, mean(p_values < alpha))
}
## ------------------------------------------------------------------------
plot(power ~ eff_sizes, type = "b", xlab = "Unstandardized parameter", ylab = "Power",
ylim = c(0, 1))
abline(h = 0.8, lty = 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment