Skip to content

Instantly share code, notes, and snippets.

@mike-lawrence
Last active March 24, 2020 12:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mike-lawrence/40df0d4302f3dfbfccdd9172e8da9a1e to your computer and use it in GitHub Desktop.
Save mike-lawrence/40df0d4302f3dfbfccdd9172e8da9a1e to your computer and use it in GitHub Desktop.
LMER power simulations
library(tidyverse)
library(broom)
library(lme4)
#set default contrasts to halfsum (this matters!)
halfsum_contrasts = function (...) contr.sum(...) * 0.5
options(contrasts = c('halfsum_contrasts','contr.poly'))
run_sim = function(N,K,seed,do_checks=F){
set.seed(seed)
coefs = c(0,0,0,1)
sds = c(3,3,3,3)
noise = 4
subj_coefs = matrix(
rnorm(N*length(coefs),coefs,sds)
, nrow = N
, byrow = T
)
if(do_checks){
#check that matrix came out right
print(apply(subj_coefs,2,mean)) #should be close to coef
print(apply(subj_coefs,2,sd)) #should be close to sds
print(cor(subj_coefs)) #should be close to identity
}
#create the condition tibble and contrasts
conditions = tidyr::expand_grid(
a = factor(c('a_pre','a_post'))
, b = factor(c('b_pre','b_post'))
)
contrasts = model.matrix(
data = conditions
, object = ~ a*b
)
#compute each subject's value-per-cell
subj_vals = list()
for(i in 1:N){
subj_vals[[i]] = conditions
subj_vals[[i]]$subj = i
subj_vals[[i]]$value = as.numeric(as.matrix(contrasts) %*% subj_coefs[i,])
}
subj_vals = dplyr::bind_rows(subj_vals)
subj_vals$subj = factor(subj_vals$subj)
if(do_checks){
#compute difference between conditions
# (this is why we use halfsum contrasts)
subj_vals %>%
dplyr::group_by(
subj
) %>%
dplyr::summarise(
intercept = mean(value)
, ab = (
mean(value[(a=='a_post')&(b=='b_post')]) -
mean(value[(a=='a_pre')&(b=='b_post')])
) - (
mean(value[(a=='a_post')&(b=='b_pre')]) -
mean(value[(a=='a_pre')&(b=='b_pre')])
)
, a = mean(value[a=='a_post']) - mean(value[a=='a_pre'])
, b = mean(value[b=='b_post']) - mean(value[b=='b_pre'])
) %>%
tidyr::gather(
key = key
, value = value
, -subj
) %>%
dplyr::group_by(
key
) %>%
dplyr::summarise(
mean = mean(value)
, sd = sd(value)
) #should roughly match coefs and sds
}
if(do_checks){
#compute an lm per participant then check the mean/sd of resulting distributions
subj_vals %>%
dplyr::group_by(
subj
) %>%
dplyr::summarise(
broom::tidy(lm(value~a*b))
) %>%
dplyr::group_by(
term
) %>%
dplyr::summarise(
mean = mean(estimate)
, sd = sd(estimate)
) #should roughly match coefs and sds
}
#sample from each subject-by-condition mean with measurement noise
subj_vals %>%
dplyr::group_by(
subj
, a
, b
) %>%
dplyr::summarise(
value = rnorm(K,value,noise)
) ->
obs
if(do_checks){
#compute difference between conditions
# (this is why we use halfsum contrasts)
obs %>%
dplyr::group_by(
subj
) %>%
dplyr::summarise(
intercept = mean(value)
, ab = (
mean(value[(a=='a_post')&(b=='b_post')]) -
mean(value[(a=='a_pre')&(b=='b_post')])
) - (
mean(value[(a=='a_post')&(b=='b_pre')]) -
mean(value[(a=='a_pre')&(b=='b_pre')])
)
, a = mean(value[a=='a_post']) - mean(value[a=='a_pre'])
, b = mean(value[b=='b_post']) - mean(value[b=='b_pre'])
) %>%
tidyr::gather(
key = key
, value = value
, -subj
) %>%
dplyr::group_by(
key
) %>%
dplyr::summarise(
mean = mean(value)
, sd = sd(value)
) #should roughly match coefs and sds
}
if(do_checks){
#compute an lm per participant then check the mean/sd of resulting distributions
obs %>%
dplyr::group_by(
subj
) %>%
dplyr::summarise(
broom::tidy(lm(value~a*b))
) %>%
dplyr::group_by(
term
) %>%
dplyr::summarise(
mean = mean(estimate)
, sd = sd(estimate)
) #should roughly match coefs and sds
}
#fit and obtain intervals
fit = lme4::lmer(
formula = value ~ a*b + (a*b|subj)
, data = obs
)
ci = confint(fit,method='Wald')
if(do_checks){
print(
tibble(
N = N
, K = K
, seed = seed
, lo = ci[nrow(ci),1]
, hi = ci[nrow(ci),2]
)
)
}
return(tibble(
bound = c('lo','hi')
, value = ci[nrow(ci),]
))
}
#running once
run_sim(N=100,K=5,seed=3)
#running across a grid
tidyr::expand_grid(
N = c(,20,40,80,160)
, K = c(5,10,20)
, iteration = 1:1e3
) %>%
dplyr::mutate(
seed=1:n()
) %>%
dplyr::group_by(
N
, K
, iteration
) %>%
dplyr::summarize(
run_sim(
N = N
, K = K
, seed = seed
)
) ->
out
out %>%
tidyr::spread(
key = bound
, value = value
) %>%
dplyr::group_by(
N
, K
) %>%
dplyr::summarise(
hi_less_than_zero = mean(hi<0)
, hi_greater_than_zero = mean(hi>0)
, lo_less_than_zero = mean(lo<0)
, lo_greater_than_zero = mean(lo>0)
)
library(tidyverse)
library(broom)
library(lme4)
#set default contrasts to halfsum (this matters!)
halfsum_contrasts = function (...) contr.sum(...) * 0.5
options(contrasts = c('halfsum_contrasts','contr.poly'))
run_sim = function(N,K,seed,do_checks=F){
set.seed(seed)
coefs = c(0,1)
sds = c(1,3)
noise = 4
subj_coefs = matrix(
rnorm(N*length(coefs),coefs,sds)
, nrow = N
, byrow = T
)
if(do_checks){
#check that matrix came out right
print(apply(subj_coefs,2,mean)) #should be close to coef
print(apply(subj_coefs,2,sd)) #should be close to sds
print(cor(subj_coefs)) #should be close to identity
}
#create the condition tibble and contrasts
conditions = tidyr::expand_grid(
a = factor(c('a_pre','a_post'))
)
contrasts = model.matrix(
data = conditions
, object = ~ a
)
#compute each subject's value-per-cell
subj_vals = list()
for(i in 1:N){
subj_vals[[i]] = conditions
subj_vals[[i]]$subj = i
subj_vals[[i]]$value = as.numeric(as.matrix(contrasts) %*% subj_coefs[i,])
}
subj_vals = dplyr::bind_rows(subj_vals)
subj_vals$subj = factor(subj_vals$subj)
if(do_checks){
#compute difference between conditions
# (this is why we use halfsum contrasts)
subj_vals %>%
dplyr::group_by(
subj
) %>%
dplyr::summarise(
intercept = mean(value)
, a = mean(value[a=='a_post']) - mean(value[a=='a_pre'])
) %>%
tidyr::gather(
key = key
, value = value
, -subj
) %>%
dplyr::group_by(
key
) %>%
dplyr::summarise(
mean = mean(value)
, sd = sd(value)
) #should roughly match coefs and sds
}
if(do_checks){
#compute an lm per participant then check the mean/sd of resulting distributions
subj_vals %>%
dplyr::group_by(
subj
) %>%
dplyr::summarise(
broom::tidy(lm(value~a))
) %>%
dplyr::group_by(
term
) %>%
dplyr::summarise(
mean = mean(estimate)
, sd = sd(estimate)
) #should roughly match coefs and sds
}
#sample from each subject-by-condition mean with measurement noise
subj_vals %>%
dplyr::group_by(
subj
, a
) %>%
dplyr::summarise(
value = rnorm(K,value,noise)
) ->
obs
if(do_checks){
#compute difference between conditions
# (this is why we use halfsum contrasts)
obs %>%
dplyr::group_by(
subj
) %>%
dplyr::summarise(
intercept = mean(value)
, a = mean(value[a=='a_post']) - mean(value[a=='a_pre'])
) %>%
tidyr::gather(
key = key
, value = value
, -subj
) %>%
dplyr::group_by(
key
) %>%
dplyr::summarise(
mean = mean(value)
, sd = sd(value)
) #should roughly match coefs and sds
}
if(do_checks){
#compute an lm per participant then check the mean/sd of resulting distributions
obs %>%
dplyr::group_by(
subj
) %>%
dplyr::summarise(
broom::tidy(lm(value~a))
) %>%
dplyr::group_by(
term
) %>%
dplyr::summarise(
mean = mean(estimate)
, sd = sd(estimate)
) #should roughly match coefs and sds
}
#fit and obtain intervals
fit = lme4::lmer(
formula = value ~ a + (a|subj)
, data = obs
)
ci = confint(fit,method='Wald')
if(do_checks){
print(
tibble(
N = N
, K = K
, seed = seed
, lo = ci[nrow(ci),1]
, hi = ci[nrow(ci),2]
)
)
}
return(tibble(
bound = c('lo','hi')
, value = ci[nrow(ci),]
))
}
#running once
run_sim(N=100,K=5,seed=3)
#running across a grid
tidyr::expand_grid(
N = c(20,40,80,160)
, K = c(5,10,20)
, iteration = 1:1e3
) %>%
dplyr::mutate(
seed=1:n()
) %>%
dplyr::group_by(
N
, K
, iteration
) %>%
dplyr::summarize(
run_sim(
N = N
, K = K
, seed = seed
)
) ->
out
out %>%
tidyr::spread(
key = bound
, value = value
) %>%
dplyr::group_by(
N
, K
) %>%
dplyr::summarise(
hi_less_than_zero = mean(hi<0)
, hi_greater_than_zero = mean(hi>0)
, lo_less_than_zero = mean(lo<0)
, lo_greater_than_zero = mean(lo>0)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment