06/02/2021
- two categorical predictors X1 and X2 (2 and 3 levels respectively)
- their interaction
- one continuous predictor Z (confounder) with range [-1, 1]
- Linear predictor ~ X1*X2 + Z
- balanced design for categorical predictors
- Poisson response
- inference focusses on significance of interaction
- Intercept = 3
- X1.B - X1.A = 2
- X2.b - X2.a = -1.5
- X2.c - X2.a = 0.5
- Z = 2
- X1B:X2b - X1A:X2a = -0.7
- X1B:X2c - X1A:X2a = -0.3
- type 1 error = 0.05
# assumed coef values (must correspond to columns of model matrix X)
# adjust as needed to reflect expected effect sizes
# (could be made random within the simulation loop
# to introduce uncertainty in effect sizes)
coef.values<- c(3, 2, -1.5, 0.5, 2, -0.7, -0.3)
alpha<- 0.05
This is a basic implementation with the model hard-written in the function. You will need to adjust the function to your model or data structure.
dat.sim.pois<- function(Nrep, coef.val){
# create all factor combinations
factor.comb<- expand.grid(X1= LETTERS[1:2], X2= letters[1:3])
# now, combine factor.comb with variable Z,
# and let R automatically recycle it Nrep times
# to achieve total sample size (given by the length of Z)
dat<- data.frame(factor.comb, Z= runif(nrow(factor.comb)*Nrep, -1, 1))
# create the predictor matrix corresponding to the desired model
X<- model.matrix( ~ X1*X2 + Z, data= dat)
# compute linear predictor
LP<- X %*% coef.val
# simulate Poisson observations
Y<- rpois(n= length(LP), lambda= exp(LP))
# and use as Poisson mean:
output<- data.frame(Y, dat)
output
}
# dat.sim.norm<- function(N, coef.val, sd.resp){
# fac.comb<- expand.grid(X1= LETTERS[1:2], X2= letters[1:3])
# dat<- data.frame(fac.comb, Z= runif(nrow(fac.comb)*N, -1, 1))
# X<- model.matrix( ~ X1*X2 + Z, data= dat)
# Y<- rnorm(n= nrow(X), mean= X %*% coef.val, sd= sd.resp)
# dat<- data.frame(Y, X)
# }
N.rep<- 2:30 # replication level per (X1, X2) combination
N.sim<- 1000 # the larger the more precise the simulation
sim.out<- matrix(NA, N.sim, length(N.rep))
# set a seed for reproducibility (optional)
set.seed(7)
# set a progress bar
pb<- txtProgressBar(min = 0, max = N.sim, width = 50, style = 3)
# run the simulations
for(i in 1:N.sim){
for(j in 1:length(N.rep)){
sim.dat<- dat.sim.pois(N= N.rep[j], coef.val= coef.values)
sim.GLM<- glm(Y ~ X1*X2 + Z, data= sim.dat)
sim.out[i, j]<- anova(sim.GLM, test= "Chisq")["X1:X2", "Pr(>Chi)"]
}
setTxtProgressBar(pb, value= i)
}
close(pb)
# compute proportion significant per replication level:
out.pow<- apply(sim.out < alpha, 2, mean)
plot(out.pow ~ N.rep, xlab= "Replication", ylab= "Proportion significant interactions", type= "l", lwd= 3, col= 2, ylim= c(0, 1))
abline(h= 0.8, lty= 3, col= grey(0.5), lwd= 2)
# what is the minimum replication achieving >= 0.8 power?
min.ok<- min(which(out.pow > 0.8))
N.rep[min.ok]
## [1] 10
abline(v= N.rep[min.ok])
10 or more replicates per factor combination yield a power over 80%