Skip to content

Instantly share code, notes, and snippets.

@nrode
Last active August 29, 2015 14:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nrode/71722932408071ea4f62 to your computer and use it in GitHub Desktop.
Save nrode/71722932408071ea4f62 to your computer and use it in GitHub Desktop.
R - lmer drops unused levels in a random effect
## Goal of this document: show that the first step of lmer (using 'lFormula') drops unused levels in a random effect
## 0/ Simulates some data
## 1/ Shows a way to trick lmer when the number of levels is lower than the number of observations
## 2/ Shows how this fail when the number of levels is higher than the number of observations
set.seed(3)
library(lme4)
#################################
## 0/ simulate the data ##
#################################
N2 <- 1200 # set the number of groups (higher level units)
N <- 1000 # number of observations
NM <- 4 # number of groups per observation
sdgroup=sqrt(16) # Standard deviation of the random effect
sdres=sqrt(2) # Residual standard deviation
GpValue <- rnorm(N2, 0, sdgroup) # Random sampling the value of each group
data <- data.frame(matrix(sample(x=1:N2,size= NM*N,replace=T),ncol= NM))
data[,1] <- factor(data[,1],levels=1:N2)
data[,2] <- factor(data[,2],levels=1:N2)
data[,3] <- factor(data[,3],levels=1:N2)
data[,4] <- factor(data[,4],levels=1:N2)
## For some reason, lapply does not keep unused levels
## Zl <- lapply(colnames(data),function(nm) Matrix:::fac2sparse(data[[nm]], "d",drop.unused.levels=T))
## ZZ <- Reduce("+", Zl[-1], Zl[[1]])
## So compute the sum "by hand"
ZZ <- Matrix:::fac2sparse(data$X1, "d",drop=FALSE)+Matrix:::fac2sparse(data$X2, "d",drop=FALSE)+Matrix:::fac2sparse(data$X3, "d",drop=FALSE)+Matrix:::fac2sparse(data$X4, "d",drop=FALSE)
data$y <- apply(ZZ*GpValue,2,sum)+rnorm(N, 0, sdres)
#################################################################################################
## 1/ lmer can fit two different random effects for the for the two first and two last factors ##
#################################################################################################
## For clarity, defining new factors with different level names
data[,6] <- factor(paste("lev1",data[,1],sep="_"))
data[,7] <- factor(paste("lev1",data[,2],sep="_"))
data[,8] <- factor(paste("lev2",data[,3],sep="_"))
data[,9] <- factor(paste("lev2",data[,4],sep="_"))
## Get the names of the two levels corresponding to the two first and two last factors respectively
levels1 <- sort(unique(c(levels(data[,6]),levels(data[,7]))))
levels2 <- sort(unique(c(levels(data[,8]),levels(data[,9]))))
## Number of levels is lower than number of observations
length(levels1)
length(levels2)
## Specify the appropriate level names
data[,6] <- factor(data[,6],levels=levels1)
data[,7] <- factor(data[,7],levels=levels1)
data[,8] <- factor(data[,8],levels=levels2)
data[,9] <- factor(data[,9],levels=levels2)
## Dummy variable to trick lmer
data$dum1 <- factor(c(levels1, levels1[1:(nrow(data)-length(levels1))]),levels=levels1)
data$dum2 <- factor(c(levels2, levels2[1:(nrow(data)-length(levels2))]),levels=levels2)
## Random effect matrices
ZZ1 <- Matrix:::fac2sparse(data$V6, "d",drop=FALSE)+Matrix:::fac2sparse(data$V7, "d",drop=FALSE)
ZZ2 <- Matrix:::fac2sparse(data$V8, "d",drop=FALSE)+Matrix:::fac2sparse(data$V9, "d",drop=FALSE)
nrow(ZZ1)
nrow(ZZ2)
## 1. Parse the data and formula:
## For some reason, lmer gets confused when given a different order for the random effects [ie when y~1+(1 | dum1)+(1 |dum2)]
lmod <- lFormula(y ~1+(1 | dum2)+(1 |dum1),data=data,control=lmerControl(check.nobs.vs.nlev="ignore",check.nobs.vs.nRE="ignore",check.nobs.vs.rankZ="ignore"))
str(lmod$reTrms$Ztlist)
## Extraction of the design matrix of the random effect
lmod$reTrms$Zt[1:length(levels2),] <- ZZ2
lmod$reTrms$Ztlist$`1 | dum2` <- ZZ2
lmod$reTrms$Zt[(1+length(levels2)):nrow(lmod$reTrms$Zt),] <- ZZ1
lmod$reTrms$Ztlist$`1 | dum1` <- ZZ1
## 2. Create the deviance function to be optimized:
devfun <- do.call(mkLmerDevfun, lmod)
## 3. Optimize the deviance function:
optimizeroutput <- optimizeLmer(devfun)
## 4. Package up the results:
m0 <- mkMerMod(rho=environment(devfun), opt=optimizeroutput, reTrms=lmod$reTrms, fr = lmod$fr)
summary(m0)
################################################################################################
## 2/ lmer cannot fit the corresponding multiple membership model with a single random effect ##
################################################################################################
## Get the names of the four levels
levels <- sort(unique(c(levels(data[,1]),levels(data[,2]),levels(data[,3]),levels(data[,4]))))
## Number of levels is now higher than number of observations
length(levels)
## Dummy variable for the random effect
data$dum3 <- factor(data[,1],levels=levels)
## 1. Parse the data and formula:
lmod <- lFormula(y ~1+(1 | dum3),data=data,control=lmerControl(check.nobs.vs.nlev="ignore",check.nobs.vs.nRE="ignore",check.nobs.vs.rankZ="ignore"))
## The number of rows does not seem to correspond to the number of levels of the random effect but to the
lmod$reTrms$Ztlist$`1 | dum3`@Dim
length(levels(data$dum3))
length(unique(data[,1]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment