nrode/multiple-membership.R

## multiple-membership.R
## Goal of this document: show that the first step of lmer (using 'lFormula') drops unused levels in a random effect
## 0/ Simulates some data
## 1/ Shows a way to trick lmer when the number of levels is lower than the number of observations
## 2/ Shows how this fail when the number of levels is higher than the number of observations

set.seed(3)
library(lme4)

#################################
##     0/ simulate the data    ##
#################################

N2 <- 1200 # set the number of groups (higher level units)
N <- 1000 # number of observations
NM <- 4 # number of groups per observation
sdgroup=sqrt(16) # Standard deviation of the random effect
sdres=sqrt(2) # Residual standard deviation
GpValue <- rnorm(N2, 0, sdgroup) # Random sampling the value of each group

data <- data.frame(matrix(sample(x=1:N2,size= NM*N,replace=T),ncol= NM))

data[,1] <- factor(data[,1],levels=1:N2)
data[,2] <- factor(data[,2],levels=1:N2)
data[,3] <- factor(data[,3],levels=1:N2)
data[,4] <- factor(data[,4],levels=1:N2)

## For some reason, lapply does not keep unused levels
## Zl <- lapply(colnames(data),function(nm) Matrix:::fac2sparse(data[[nm]], "d",drop.unused.levels=T))
## ZZ <- Reduce("+", Zl[-1], Zl[[1]])

## So compute the sum "by hand"
ZZ <- Matrix:::fac2sparse(data$X1, "d",drop=FALSE)+Matrix:::fac2sparse(data$X2, "d",drop=FALSE)+Matrix:::fac2sparse(data$X3, "d",drop=FALSE)+Matrix:::fac2sparse(data$X4, "d",drop=FALSE)

data$y <- apply(ZZ*GpValue,2,sum)+rnorm(N, 0, sdres)

#################################################################################################
## 1/ lmer can fit two different random effects for the for the two first and two last factors ##
#################################################################################################


## For clarity, defining new factors with different level names
data[,6] <- factor(paste("lev1",data[,1],sep="_"))
data[,7] <- factor(paste("lev1",data[,2],sep="_"))
data[,8] <- factor(paste("lev2",data[,3],sep="_"))
data[,9] <- factor(paste("lev2",data[,4],sep="_"))

## Get the names of the two levels corresponding to the two first and two last factors respectively
levels1 <- sort(unique(c(levels(data[,6]),levels(data[,7]))))
levels2 <- sort(unique(c(levels(data[,8]),levels(data[,9]))))

## Number of levels is lower than number of observations
length(levels1)
length(levels2)

## Specify the appropriate level names
data[,6] <- factor(data[,6],levels=levels1)
data[,7] <- factor(data[,7],levels=levels1)
data[,8] <- factor(data[,8],levels=levels2)
data[,9] <- factor(data[,9],levels=levels2)

## Dummy variable to trick lmer
data$dum1 <- factor(c(levels1, levels1[1:(nrow(data)-length(levels1))]),levels=levels1)
data$dum2 <- factor(c(levels2, levels2[1:(nrow(data)-length(levels2))]),levels=levels2)

## Random effect matrices
ZZ1 <- Matrix:::fac2sparse(data$V6, "d",drop=FALSE)+Matrix:::fac2sparse(data$V7, "d",drop=FALSE)
ZZ2 <- Matrix:::fac2sparse(data$V8, "d",drop=FALSE)+Matrix:::fac2sparse(data$V9, "d",drop=FALSE)
nrow(ZZ1)
nrow(ZZ2)

## 1.  Parse the data and formula:
## For some reason, lmer gets confused when given a different order for the random effects [ie when y~1+(1 | dum1)+(1 |dum2)]
lmod <- lFormula(y ~1+(1 | dum2)+(1 |dum1),data=data,control=lmerControl(check.nobs.vs.nlev="ignore",check.nobs.vs.nRE="ignore",check.nobs.vs.rankZ="ignore"))
str(lmod$reTrms$Ztlist)

## Extraction of the design matrix of the random effect
lmod$reTrms$Zt[1:length(levels2),] <- ZZ2
lmod$reTrms$Ztlist$`1 | dum2` <- ZZ2

lmod$reTrms$Zt[(1+length(levels2)):nrow(lmod$reTrms$Zt),] <- ZZ1
lmod$reTrms$Ztlist$`1 | dum1` <- ZZ1

## 2.  Create the deviance function to be optimized:
devfun <- do.call(mkLmerDevfun, lmod)

## 3.  Optimize the deviance function:
optimizeroutput <- optimizeLmer(devfun)


## 4.  Package up the results:
m0 <- mkMerMod(rho=environment(devfun), opt=optimizeroutput, reTrms=lmod$reTrms, fr = lmod$fr)
summary(m0)

################################################################################################
## 2/ lmer cannot fit the corresponding multiple membership model with a single random effect ##
################################################################################################

## Get the names of the four levels
levels <- sort(unique(c(levels(data[,1]),levels(data[,2]),levels(data[,3]),levels(data[,4]))))

## Number of levels is now higher than number of observations
length(levels)

## Dummy variable for the random effect
data$dum3 <- factor(data[,1],levels=levels)

## 1.  Parse the data and formula:
lmod <- lFormula(y ~1+(1 | dum3),data=data,control=lmerControl(check.nobs.vs.nlev="ignore",check.nobs.vs.nRE="ignore",check.nobs.vs.rankZ="ignore"))

## The number of rows does not seem to correspond to the number of levels of the random effect but to the
lmod$reTrms$Ztlist$`1 | dum3`@Dim
length(levels(data$dum3))
length(unique(data[,1]))
	## Goal of this document: show that the first step of lmer (using 'lFormula') drops unused levels in a random effect
	## 0/ Simulates some data
	## 1/ Shows a way to trick lmer when the number of levels is lower than the number of observations
	## 2/ Shows how this fail when the number of levels is higher than the number of observations

	set.seed(3)
	library(lme4)

	#################################
	## 0/ simulate the data ##
	#################################

	N2 <- 1200 # set the number of groups (higher level units)
	N <- 1000 # number of observations
	NM <- 4 # number of groups per observation
	sdgroup=sqrt(16) # Standard deviation of the random effect
	sdres=sqrt(2) # Residual standard deviation
	GpValue <- rnorm(N2, 0, sdgroup) # Random sampling the value of each group

	data <- data.frame(matrix(sample(x=1:N2,size= NM*N,replace=T),ncol= NM))

	data[,1] <- factor(data[,1],levels=1:N2)
	data[,2] <- factor(data[,2],levels=1:N2)
	data[,3] <- factor(data[,3],levels=1:N2)
	data[,4] <- factor(data[,4],levels=1:N2)

	## For some reason, lapply does not keep unused levels
	## Zl <- lapply(colnames(data),function(nm) Matrix:::fac2sparse(data[[nm]], "d",drop.unused.levels=T))
	## ZZ <- Reduce("+", Zl[-1], Zl[[1]])

	## So compute the sum "by hand"
	ZZ <- Matrix:::fac2sparse(data$X1, "d",drop=FALSE)+Matrix:::fac2sparse(data$X2, "d",drop=FALSE)+Matrix:::fac2sparse(data$X3, "d",drop=FALSE)+Matrix:::fac2sparse(data$X4, "d",drop=FALSE)

	data$y <- apply(ZZ*GpValue,2,sum)+rnorm(N, 0, sdres)

	#################################################################################################
	## 1/ lmer can fit two different random effects for the for the two first and two last factors ##
	#################################################################################################


	## For clarity, defining new factors with different level names
	data[,6] <- factor(paste("lev1",data[,1],sep="_"))
	data[,7] <- factor(paste("lev1",data[,2],sep="_"))
	data[,8] <- factor(paste("lev2",data[,3],sep="_"))
	data[,9] <- factor(paste("lev2",data[,4],sep="_"))

	## Get the names of the two levels corresponding to the two first and two last factors respectively
	levels1 <- sort(unique(c(levels(data[,6]),levels(data[,7]))))
	levels2 <- sort(unique(c(levels(data[,8]),levels(data[,9]))))

	## Number of levels is lower than number of observations
	length(levels1)
	length(levels2)

	## Specify the appropriate level names
	data[,6] <- factor(data[,6],levels=levels1)
	data[,7] <- factor(data[,7],levels=levels1)
	data[,8] <- factor(data[,8],levels=levels2)
	data[,9] <- factor(data[,9],levels=levels2)

	## Dummy variable to trick lmer
	data$dum1 <- factor(c(levels1, levels1[1:(nrow(data)-length(levels1))]),levels=levels1)
	data$dum2 <- factor(c(levels2, levels2[1:(nrow(data)-length(levels2))]),levels=levels2)

	## Random effect matrices
	ZZ1 <- Matrix:::fac2sparse(data$V6, "d",drop=FALSE)+Matrix:::fac2sparse(data$V7, "d",drop=FALSE)
	ZZ2 <- Matrix:::fac2sparse(data$V8, "d",drop=FALSE)+Matrix:::fac2sparse(data$V9, "d",drop=FALSE)
	nrow(ZZ1)
	nrow(ZZ2)

	## 1. Parse the data and formula:
	## For some reason, lmer gets confused when given a different order for the random effects [ie when y~1+(1 \| dum1)+(1 \|dum2)]
	lmod <- lFormula(y ~1+(1 \| dum2)+(1 \|dum1),data=data,control=lmerControl(check.nobs.vs.nlev="ignore",check.nobs.vs.nRE="ignore",check.nobs.vs.rankZ="ignore"))
	str(lmod$reTrms$Ztlist)

	## Extraction of the design matrix of the random effect
	lmod$reTrms$Zt[1:length(levels2),] <- ZZ2
	lmod$reTrms$Ztlist$`1 \| dum2` <- ZZ2

	lmod$reTrms$Zt[(1+length(levels2)):nrow(lmod$reTrms$Zt),] <- ZZ1
	lmod$reTrms$Ztlist$`1 \| dum1` <- ZZ1

	## 2. Create the deviance function to be optimized:
	devfun <- do.call(mkLmerDevfun, lmod)

	## 3. Optimize the deviance function:
	optimizeroutput <- optimizeLmer(devfun)


	## 4. Package up the results:
	m0 <- mkMerMod(rho=environment(devfun), opt=optimizeroutput, reTrms=lmod$reTrms, fr = lmod$fr)
	summary(m0)

	################################################################################################
	## 2/ lmer cannot fit the corresponding multiple membership model with a single random effect ##
	################################################################################################

	## Get the names of the four levels
	levels <- sort(unique(c(levels(data[,1]),levels(data[,2]),levels(data[,3]),levels(data[,4]))))

	## Number of levels is now higher than number of observations
	length(levels)

	## Dummy variable for the random effect
	data$dum3 <- factor(data[,1],levels=levels)

	## 1. Parse the data and formula:
	lmod <- lFormula(y ~1+(1 \| dum3),data=data,control=lmerControl(check.nobs.vs.nlev="ignore",check.nobs.vs.nRE="ignore",check.nobs.vs.rankZ="ignore"))

	## The number of rows does not seem to correspond to the number of levels of the random effect but to the
	lmod$reTrms$Ztlist$`1 \| dum3`@Dim
	length(levels(data$dum3))
	length(unique(data[,1]))