rmaia/lmer_pbmc.R

## lmer_pbmc.R
# PARAMETRIC BOOTSTRAP NULL HYPOTHESIS TESTING OF FIXED-EFFECT IN A MIXED MODEL
# Same rationale can be used to test random effects
# adapted from Faraway 2006

require(lme4)

# build the models to be tested
# note that to test fixed effects, one should use ML and not REML estimation

fm1 <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy, REML=F)
fm2 <- lmer(Reaction ~ 1 + (Days|Subject), sleepstudy, REML=F)

# obtain the empirical test statistic (in this case, the 2 x Likelihood Ratio)

obslr <- as.numeric(2*(logLik(fm1) - logLik(fm2)))

# generate data based on the null model

set.seed(1)
simdat <- simulate(fm2, 1000)

# Calculate the distribution of the test statistic under the null model
lrstat <- numeric(1000)

for(i in 1:1000){

  # fit both models...
  sfm1 <- refit(fm1, simdat[,i])
  sfm2 <- refit(fm2, simdat[,i])

  # ...calculate the test statistic
  lrstat[i] <- 2*(logLik(sfm1) - logLik(sfm2))

}

# Finally, calculate the probability of obtaining a test statistic as high as
# or higher than the empirically observed test statistic
#
# (This is a frequentist null hypothesis test - the probability of obtaining
# a test statistic as extreme as or more extreme than the one calculated, given
# that the null hypothesis is true)

mean(lrstat > obslr)

# We can plot the null distribution and see how it compares to the observed value

hist(lrstat, xlim=range(c(lrstat, obslr)))
abline(v=obslr, lty=3)

# We can also plot the distribution we generated and compare it to the distribution
# that would have been assumed (in this case, a Chi-square distribution with degrees
# of freedom equal to the difference in the number of parameters)

# difference in degrees of freedom

ddf <- diff(anova(fm1,fm2)$Df)

plot(qchisq((1:1000)/1001, df=ddf), sort(lrstat), xlab='Chi-square distribution',
  ylab='parametric bootstrap LRT', main='Q-Q plot')
abline(0,1)
	# PARAMETRIC BOOTSTRAP NULL HYPOTHESIS TESTING OF FIXED-EFFECT IN A MIXED MODEL
	# Same rationale can be used to test random effects
	# adapted from Faraway 2006

	require(lme4)

	# build the models to be tested
	# note that to test fixed effects, one should use ML and not REML estimation

	fm1 <- lmer(Reaction ~ Days + (Days\|Subject), sleepstudy, REML=F)
	fm2 <- lmer(Reaction ~ 1 + (Days\|Subject), sleepstudy, REML=F)

	# obtain the empirical test statistic (in this case, the 2 x Likelihood Ratio)

	obslr <- as.numeric(2*(logLik(fm1) - logLik(fm2)))

	# generate data based on the null model

	set.seed(1)
	simdat <- simulate(fm2, 1000)

	# Calculate the distribution of the test statistic under the null model
	lrstat <- numeric(1000)

	for(i in 1:1000){

	# fit both models...
	sfm1 <- refit(fm1, simdat[,i])
	sfm2 <- refit(fm2, simdat[,i])

	# ...calculate the test statistic
	lrstat[i] <- 2*(logLik(sfm1) - logLik(sfm2))

	}

	# Finally, calculate the probability of obtaining a test statistic as high as
	# or higher than the empirically observed test statistic
	#
	# (This is a frequentist null hypothesis test - the probability of obtaining
	# a test statistic as extreme as or more extreme than the one calculated, given
	# that the null hypothesis is true)

	mean(lrstat > obslr)

	# We can plot the null distribution and see how it compares to the observed value

	hist(lrstat, xlim=range(c(lrstat, obslr)))
	abline(v=obslr, lty=3)

	# We can also plot the distribution we generated and compare it to the distribution
	# that would have been assumed (in this case, a Chi-square distribution with degrees
	# of freedom equal to the difference in the number of parameters)

	# difference in degrees of freedom

	ddf <- diff(anova(fm1,fm2)$Df)

	plot(qchisq((1:1000)/1001, df=ddf), sort(lrstat), xlab='Chi-square distribution',
	ylab='parametric bootstrap LRT', main='Q-Q plot')
	abline(0,1)