famuvie/multilevel_inla_lmer_stan.R

## multilevel_inla_lmer_stan.R
# Comparison estimations of multi-level models between lme4 and INLA
# inspired by question in the r-inla group:
# https://groups.google.com/forum/#!topic/r-inla-discussion-group/tmNdcYQ6KHk

pacman::p_load(INLA, lme4, tidyverse, broom, GGally)

sleepstudy %>%
  ggplot(aes(Days, Reaction, color = Subject)) +
  geom_point()

mod1 <- lmer(Reaction ~ 1 + Days + (1 | Subject), sleepstudy)
print(mod1)

mod2 <- inla(
  Reaction ~ 1 + Days + f(Subject, model = 'iid'),
  data = sleepstudy,
  control.predictor = list(compute = TRUE)  # computes fitted values
)
summary(mod2)

## predictions
sleepstudy %>%
  mutate(
    pred_lme4 = unname(predict(mod1)),
    pred_inla = mod2$summary.fitted.values$mean
  ) %>%
  gather(package, prediction, starts_with("pred_")) %>%
  mutate(
    package = factor(gsub("pred_", "", package))
  ) %>%
  ggplot(aes(Reaction, prediction)) +
  geom_abline(intercept = 0, slope = 1, col = "lightgray") +
  geom_point() +
  facet_wrap("package")

## INLA is not doing a great job in fitting these data
## The difference is in the estimation of the variance components:

## Extended inverse square-root function
## Defined as 0 for x <= 0 to avoid error in numerical derivative next
ext_invsqrt <- Vectorize(function(x) if(x < 0) 0 else 1/sqrt(x))
tidy(mod1)[, 1:2] %>%
  rename(estimate_lme4 = estimate) %>%
  mutate(
    estimate_inla = signif(
      c(mod2$summary.fixed$mean,
        rev(
          sapply(mod2$marginals.hyperpar,
               ## compute the posterior mean of the standard deviations
               function(.) inla.emarginal(ext_invsqrt, marginal = .)))
      ),
      3
    )
  )

## INLA estimated nearly zero variability for the Subject effect, and
## compensated a little by overestimating the residual variability.
## Why did this happen? Priors, of course.
## Let's examine the default priors used implicitly
str(inla.models()$likelihood$normal$hyper)
str(inla.models()$latent$iid$hyper)

## In both cases, we have used a LogGamma(1, 5e-05) on the log-precision,
## or equivalently, a Gamma^{-1/2} on the standard deviation, with the same
## parameters of shape and rate
## Personally, I need to see it.
sd_prior <-
  data.frame(x = seq(1, 1e5, length = 101)) %>%
  mutate(y = dgamma(x, shape = 1, rate = 5e-05)) %>%
  inla.tmarginal(ext_invsqrt, marginal = .) %>%
  as.data.frame() %>%
  na.omit()

plot(sd_prior, type = "l")
signif(inla.qmarginal(p = c(.01, .99), marginal = sd_prior), 2)

## We are using a prior on the standard deviations of the Subject and residual
## effects that puts 98% of its mass between .0034 and .069.
## Yet, the observations have a sd of
sd(sleepstudy$Reaction)

## I'm not saying that you put your priors based on your data. Still, you need
## to honor the magnitudes of variation you are handling. For example, your
## measures vary in the hundreds. It would be reasonable to put a prior on the
## sd with most of its mass (e.g. 98th central quantile) between 10 and 200. You
## can compute what it means in terms of a gamma precision:
q_target <- rev(1/c(10, 200)**2)

discrepancy <- function(logparams) {
  ## given parameters of shape and rate, measure the discrepancy between
  ## the distribution function at the target quantiles and the expected
  ## probabilities, in the logit scale
  params <- exp(logparams)
  as.numeric(
    dist(
      rbind(
        qlogis(pgamma(q_target, params[1], rate = params[2])),
        qlogis(c(.01, .99))
      )
    )
  )
}
fit_pars <- optim(log(c(1, .001)), discrepancy)  # c(1,1) are just initial guesses
prior_pars <- exp(fit_pars$par)

## Check:
pgamma(q_target, shape = prior_pars[1], rate = prior_pars[2])  # ~ c(.01, .99)

## Let's fit the INLA model with this prior for the precisions
hyper_prior <- list(theta = list(param = prior_pars))
fm_inla <-
  inla(
    Reaction ~ 1 + Days + f(Subject, model = 'iid', hyper = hyper_prior),
    data = sleepstudy,
    control.family = list(hyper = hyper_prior),
    control.predictor = list(compute = TRUE)  # computes fitted values
  )


## Bonus: fit with rstanarm (and default priors)
library(rstanarm)
fm_stan <-
  stan_lmer(Reaction ~ 1 + Days + (1 | Subject),
          data = sleepstudy)
summary(fm_stan)


## estimates

tidy(mod1)[, 1:2] %>%
  rename(lme4 = estimate) %>%
  mutate(
    inla_defp = c(
      mod2$summary.fixed$mean,
      rev(
        sapply(mod2$marginals.hyperpar,
               ## compute the posterior mean of the standard deviations
               function(.) inla.emarginal(ext_invsqrt, marginal = .)))
    ),
    inla_prior = c(
      fm_inla$summary.fixed$mean,
      rev(
        sapply(fm_inla$marginals.hyperpar,
               ## compute the posterior mean of the standard deviations
               function(.) inla.emarginal(ext_invsqrt, marginal = .)))
    ),
    stan_defp = tidy(
      fm_stan, parameters = c("non-varying", "hierarchical"))$estimate
  )


## predictions
sleepstudy <-
  sleepstudy %>%
  mutate(
    pred_lme4 = unname(predict(mod1)),
    pred_inla_default = mod2$summary.fitted.values$mean,
    pred_inla_priors = fm_inla$summary.fitted.values$mean,
    pred_stan = unname(apply(posterior_predict(fm_stan), 2, mean))
  )

sleepstudy %>%
  gather(package, prediction, starts_with("pred_")) %>%
  mutate(
    package = factor(gsub("pred_", "", package))
  ) %>%
  ggplot(aes(Reaction, prediction)) +
  geom_abline(intercept = 0, slope = 1, col = "lightgray") +
  geom_point() +
  facet_wrap("package")

ggpairs(sleepstudy, columns = c(1, 4:7))

#### response from thierry.onkelinx@inbo.be
#### no good estimates of variance components, though.
mod3 <- inla(
  Reaction ~ 1 + Days +
    f(Subject, model = 'iid', hyper = list(theta = list(param = c(1, 1e-3)))),
  data = sleepstudy,
  control.compute = list(waic = TRUE)
)
summary(mod3)

par(mfrow = c(2, 1))
plot(
  ranef(mod1)$Subject[, 1],
  mod2$summary.random$Subject$mean
)
abline(a = 0, b = 1)
plot(
  ranef(mod1)$Subject[, 1],
  mod3$summary.random$Subject$mean
)
abline(a = 0, b = 1)


## Still, estimates are not good:
tidy(mod1)[, 1:2] %>%
  rename(lme4 = estimate) %>%
  mutate(
    inla_defp = c(
      mod2$summary.fixed$mean,
      rev(
        sapply(mod2$marginals.hyperpar,
               ## compute the posterior mean of the standard deviations
               function(.) inla.emarginal(ext_invsqrt, marginal = .)))
    ),
    inla_mod3 = c(
      mod3$summary.fixed$mean,
      rev(
        sapply(mod3$marginals.hyperpar,
               ## compute the posterior mean of the standard deviations
               function(.) inla.emarginal(ext_invsqrt, marginal = .)))
    ),
    inla_prior = c(
      fm_inla$summary.fixed$mean,
      rev(
        sapply(fm_inla$marginals.hyperpar,
               ## compute the posterior mean of the standard deviations
               function(.) inla.emarginal(ext_invsqrt, marginal = .)))
    ),
    stan_defp = tidy(
      fm_stan, parameters = c("non-varying", "hierarchical"))$estimate
  )
	# Comparison estimations of multi-level models between lme4 and INLA
	# inspired by question in the r-inla group:
	# https://groups.google.com/forum/#!topic/r-inla-discussion-group/tmNdcYQ6KHk

	pacman::p_load(INLA, lme4, tidyverse, broom, GGally)

	sleepstudy %>%
	ggplot(aes(Days, Reaction, color = Subject)) +
	geom_point()

	mod1 <- lmer(Reaction ~ 1 + Days + (1 \| Subject), sleepstudy)
	print(mod1)

	mod2 <- inla(
	Reaction ~ 1 + Days + f(Subject, model = 'iid'),
	data = sleepstudy,
	control.predictor = list(compute = TRUE) # computes fitted values
	)
	summary(mod2)

	## predictions
	sleepstudy %>%
	mutate(
	pred_lme4 = unname(predict(mod1)),
	pred_inla = mod2$summary.fitted.values$mean
	) %>%
	gather(package, prediction, starts_with("pred_")) %>%
	mutate(
	package = factor(gsub("pred_", "", package))
	) %>%
	ggplot(aes(Reaction, prediction)) +
	geom_abline(intercept = 0, slope = 1, col = "lightgray") +
	geom_point() +
	facet_wrap("package")

	## INLA is not doing a great job in fitting these data
	## The difference is in the estimation of the variance components:

	## Extended inverse square-root function
	## Defined as 0 for x <= 0 to avoid error in numerical derivative next
	ext_invsqrt <- Vectorize(function(x) if(x < 0) 0 else 1/sqrt(x))
	tidy(mod1)[, 1:2] %>%
	rename(estimate_lme4 = estimate) %>%
	mutate(
	estimate_inla = signif(
	c(mod2$summary.fixed$mean,
	rev(
	sapply(mod2$marginals.hyperpar,
	## compute the posterior mean of the standard deviations
	function(.) inla.emarginal(ext_invsqrt, marginal = .)))
	),
	3
	)
	)

	## INLA estimated nearly zero variability for the Subject effect, and
	## compensated a little by overestimating the residual variability.
	## Why did this happen? Priors, of course.
	## Let's examine the default priors used implicitly
	str(inla.models()$likelihood$normal$hyper)
	str(inla.models()$latent$iid$hyper)

	## In both cases, we have used a LogGamma(1, 5e-05) on the log-precision,
	## or equivalently, a Gamma^{-1/2} on the standard deviation, with the same
	## parameters of shape and rate
	## Personally, I need to see it.
	sd_prior <-
	data.frame(x = seq(1, 1e5, length = 101)) %>%
	mutate(y = dgamma(x, shape = 1, rate = 5e-05)) %>%
	inla.tmarginal(ext_invsqrt, marginal = .) %>%
	as.data.frame() %>%
	na.omit()

	plot(sd_prior, type = "l")
	signif(inla.qmarginal(p = c(.01, .99), marginal = sd_prior), 2)

	## We are using a prior on the standard deviations of the Subject and residual
	## effects that puts 98% of its mass between .0034 and .069.
	## Yet, the observations have a sd of
	sd(sleepstudy$Reaction)

	## I'm not saying that you put your priors based on your data. Still, you need
	## to honor the magnitudes of variation you are handling. For example, your
	## measures vary in the hundreds. It would be reasonable to put a prior on the
	## sd with most of its mass (e.g. 98th central quantile) between 10 and 200. You
	## can compute what it means in terms of a gamma precision:
	q_target <- rev(1/c(10, 200)**2)

	discrepancy <- function(logparams) {
	## given parameters of shape and rate, measure the discrepancy between
	## the distribution function at the target quantiles and the expected
	## probabilities, in the logit scale
	params <- exp(logparams)
	as.numeric(
	dist(
	rbind(
	qlogis(pgamma(q_target, params[1], rate = params[2])),
	qlogis(c(.01, .99))
	)
	)
	)
	}
	fit_pars <- optim(log(c(1, .001)), discrepancy) # c(1,1) are just initial guesses
	prior_pars <- exp(fit_pars$par)

	## Check:
	pgamma(q_target, shape = prior_pars[1], rate = prior_pars[2]) # ~ c(.01, .99)

	## Let's fit the INLA model with this prior for the precisions
	hyper_prior <- list(theta = list(param = prior_pars))
	fm_inla <-
	inla(
	Reaction ~ 1 + Days + f(Subject, model = 'iid', hyper = hyper_prior),
	data = sleepstudy,
	control.family = list(hyper = hyper_prior),
	control.predictor = list(compute = TRUE) # computes fitted values
	)



	## Bonus: fit with rstanarm (and default priors)
	library(rstanarm)
	fm_stan <-
	stan_lmer(Reaction ~ 1 + Days + (1 \| Subject),
	data = sleepstudy)
	summary(fm_stan)


	## estimates

	tidy(mod1)[, 1:2] %>%
	rename(lme4 = estimate) %>%
	mutate(
	inla_defp = c(
	mod2$summary.fixed$mean,
	rev(
	sapply(mod2$marginals.hyperpar,
	## compute the posterior mean of the standard deviations
	function(.) inla.emarginal(ext_invsqrt, marginal = .)))
	),
	inla_prior = c(
	fm_inla$summary.fixed$mean,
	rev(
	sapply(fm_inla$marginals.hyperpar,
	## compute the posterior mean of the standard deviations
	function(.) inla.emarginal(ext_invsqrt, marginal = .)))
	),
	stan_defp = tidy(
	fm_stan, parameters = c("non-varying", "hierarchical"))$estimate
	)


	## predictions
	sleepstudy <-
	sleepstudy %>%
	mutate(
	pred_lme4 = unname(predict(mod1)),
	pred_inla_default = mod2$summary.fitted.values$mean,
	pred_inla_priors = fm_inla$summary.fitted.values$mean,
	pred_stan = unname(apply(posterior_predict(fm_stan), 2, mean))
	)

	sleepstudy %>%
	gather(package, prediction, starts_with("pred_")) %>%
	mutate(
	package = factor(gsub("pred_", "", package))
	) %>%
	ggplot(aes(Reaction, prediction)) +
	geom_abline(intercept = 0, slope = 1, col = "lightgray") +
	geom_point() +
	facet_wrap("package")

	ggpairs(sleepstudy, columns = c(1, 4:7))

	#### response from thierry.onkelinx@inbo.be
	#### no good estimates of variance components, though.
	mod3 <- inla(
	Reaction ~ 1 + Days +
	f(Subject, model = 'iid', hyper = list(theta = list(param = c(1, 1e-3)))),
	data = sleepstudy,
	control.compute = list(waic = TRUE)
	)
	summary(mod3)

	par(mfrow = c(2, 1))
	plot(
	ranef(mod1)$Subject[, 1],
	mod2$summary.random$Subject$mean
	)
	abline(a = 0, b = 1)
	plot(
	ranef(mod1)$Subject[, 1],
	mod3$summary.random$Subject$mean
	)
	abline(a = 0, b = 1)


	## Still, estimates are not good:
	tidy(mod1)[, 1:2] %>%
	rename(lme4 = estimate) %>%
	mutate(
	inla_defp = c(
	mod2$summary.fixed$mean,
	rev(
	sapply(mod2$marginals.hyperpar,
	## compute the posterior mean of the standard deviations
	function(.) inla.emarginal(ext_invsqrt, marginal = .)))
	),
	inla_mod3 = c(
	mod3$summary.fixed$mean,
	rev(
	sapply(mod3$marginals.hyperpar,
	## compute the posterior mean of the standard deviations
	function(.) inla.emarginal(ext_invsqrt, marginal = .)))
	),
	inla_prior = c(
	fm_inla$summary.fixed$mean,
	rev(
	sapply(fm_inla$marginals.hyperpar,
	## compute the posterior mean of the standard deviations
	function(.) inla.emarginal(ext_invsqrt, marginal = .)))
	),
	stan_defp = tidy(
	fm_stan, parameters = c("non-varying", "hierarchical"))$estimate
	)