Last active
August 29, 2015 14:26
-
-
Save nrode/71722932408071ea4f62 to your computer and use it in GitHub Desktop.
R - lmer drops unused levels in a random effect
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Goal of this document: show that the first step of lmer (using 'lFormula') drops unused levels in a random effect | |
## 0/ Simulates some data | |
## 1/ Shows a way to trick lmer when the number of levels is lower than the number of observations | |
## 2/ Shows how this fail when the number of levels is higher than the number of observations | |
set.seed(3) | |
library(lme4) | |
################################# | |
## 0/ simulate the data ## | |
################################# | |
N2 <- 1200 # set the number of groups (higher level units) | |
N <- 1000 # number of observations | |
NM <- 4 # number of groups per observation | |
sdgroup=sqrt(16) # Standard deviation of the random effect | |
sdres=sqrt(2) # Residual standard deviation | |
GpValue <- rnorm(N2, 0, sdgroup) # Random sampling the value of each group | |
data <- data.frame(matrix(sample(x=1:N2,size= NM*N,replace=T),ncol= NM)) | |
data[,1] <- factor(data[,1],levels=1:N2) | |
data[,2] <- factor(data[,2],levels=1:N2) | |
data[,3] <- factor(data[,3],levels=1:N2) | |
data[,4] <- factor(data[,4],levels=1:N2) | |
## For some reason, lapply does not keep unused levels | |
## Zl <- lapply(colnames(data),function(nm) Matrix:::fac2sparse(data[[nm]], "d",drop.unused.levels=T)) | |
## ZZ <- Reduce("+", Zl[-1], Zl[[1]]) | |
## So compute the sum "by hand" | |
ZZ <- Matrix:::fac2sparse(data$X1, "d",drop=FALSE)+Matrix:::fac2sparse(data$X2, "d",drop=FALSE)+Matrix:::fac2sparse(data$X3, "d",drop=FALSE)+Matrix:::fac2sparse(data$X4, "d",drop=FALSE) | |
data$y <- apply(ZZ*GpValue,2,sum)+rnorm(N, 0, sdres) | |
################################################################################################# | |
## 1/ lmer can fit two different random effects for the for the two first and two last factors ## | |
################################################################################################# | |
## For clarity, defining new factors with different level names | |
data[,6] <- factor(paste("lev1",data[,1],sep="_")) | |
data[,7] <- factor(paste("lev1",data[,2],sep="_")) | |
data[,8] <- factor(paste("lev2",data[,3],sep="_")) | |
data[,9] <- factor(paste("lev2",data[,4],sep="_")) | |
## Get the names of the two levels corresponding to the two first and two last factors respectively | |
levels1 <- sort(unique(c(levels(data[,6]),levels(data[,7])))) | |
levels2 <- sort(unique(c(levels(data[,8]),levels(data[,9])))) | |
## Number of levels is lower than number of observations | |
length(levels1) | |
length(levels2) | |
## Specify the appropriate level names | |
data[,6] <- factor(data[,6],levels=levels1) | |
data[,7] <- factor(data[,7],levels=levels1) | |
data[,8] <- factor(data[,8],levels=levels2) | |
data[,9] <- factor(data[,9],levels=levels2) | |
## Dummy variable to trick lmer | |
data$dum1 <- factor(c(levels1, levels1[1:(nrow(data)-length(levels1))]),levels=levels1) | |
data$dum2 <- factor(c(levels2, levels2[1:(nrow(data)-length(levels2))]),levels=levels2) | |
## Random effect matrices | |
ZZ1 <- Matrix:::fac2sparse(data$V6, "d",drop=FALSE)+Matrix:::fac2sparse(data$V7, "d",drop=FALSE) | |
ZZ2 <- Matrix:::fac2sparse(data$V8, "d",drop=FALSE)+Matrix:::fac2sparse(data$V9, "d",drop=FALSE) | |
nrow(ZZ1) | |
nrow(ZZ2) | |
## 1. Parse the data and formula: | |
## For some reason, lmer gets confused when given a different order for the random effects [ie when y~1+(1 | dum1)+(1 |dum2)] | |
lmod <- lFormula(y ~1+(1 | dum2)+(1 |dum1),data=data,control=lmerControl(check.nobs.vs.nlev="ignore",check.nobs.vs.nRE="ignore",check.nobs.vs.rankZ="ignore")) | |
str(lmod$reTrms$Ztlist) | |
## Extraction of the design matrix of the random effect | |
lmod$reTrms$Zt[1:length(levels2),] <- ZZ2 | |
lmod$reTrms$Ztlist$`1 | dum2` <- ZZ2 | |
lmod$reTrms$Zt[(1+length(levels2)):nrow(lmod$reTrms$Zt),] <- ZZ1 | |
lmod$reTrms$Ztlist$`1 | dum1` <- ZZ1 | |
## 2. Create the deviance function to be optimized: | |
devfun <- do.call(mkLmerDevfun, lmod) | |
## 3. Optimize the deviance function: | |
optimizeroutput <- optimizeLmer(devfun) | |
## 4. Package up the results: | |
m0 <- mkMerMod(rho=environment(devfun), opt=optimizeroutput, reTrms=lmod$reTrms, fr = lmod$fr) | |
summary(m0) | |
################################################################################################ | |
## 2/ lmer cannot fit the corresponding multiple membership model with a single random effect ## | |
################################################################################################ | |
## Get the names of the four levels | |
levels <- sort(unique(c(levels(data[,1]),levels(data[,2]),levels(data[,3]),levels(data[,4])))) | |
## Number of levels is now higher than number of observations | |
length(levels) | |
## Dummy variable for the random effect | |
data$dum3 <- factor(data[,1],levels=levels) | |
## 1. Parse the data and formula: | |
lmod <- lFormula(y ~1+(1 | dum3),data=data,control=lmerControl(check.nobs.vs.nlev="ignore",check.nobs.vs.nRE="ignore",check.nobs.vs.rankZ="ignore")) | |
## The number of rows does not seem to correspond to the number of levels of the random effect but to the | |
lmod$reTrms$Ztlist$`1 | dum3`@Dim | |
length(levels(data$dum3)) | |
length(unique(data[,1])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment