Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
A quick R script I knocked up to compare the glmmTMB and mgcv packages for fitting zero-inflated GLMMs to the Salamander and Owls data sets from Brooks et al (2017)
## Compare Brooks et al glmmTMB paper with mgcv
## Packages
library("glmmTMB")
library("mgcv")
library("ggplot2")
theme_set(theme_bw())
library("ggstance")
## Salamander
data(Salamanders, package = "glmmTMB")
## Poisson Models
pgam0 <- gam(count ~ spp + s(site, bs = "re"), data = Salamanders, family = poisson, method = "ML")
pgam1 <- gam(count ~ spp + mined + s(site, bs = "re"), data = Salamanders, family = poisson, method = "ML")
pgam2 <- gam(count ~ spp * mined + s(site, bs = "re"), data = Salamanders, family = poisson, method = "ML")
pm0 <- glmmTMB(count ~ spp + (1 | site), data = Salamanders, family = poisson)
pm1 <- glmmTMB(count ~ spp + mined + (1 | site), data = Salamanders, family = poisson)
pm2 <- glmmTMB(count ~ spp * mined + (1 | site), data = Salamanders, family = poisson)
AIC(pgam0, pgam1, pgam2)
AIC(pm0, pm1, pm2)
## Negative binomial models
nbgam0 <- gam(count ~ spp + s(site, bs = "re"), data = Salamanders, family = nb, method = "ML")
nbgam1 <- gam(count ~ spp + mined + s(site, bs = "re"), data = Salamanders, family = nb, method = "ML")
nbgam2 <- gam(count ~ spp * mined + s(site, bs = "re"), data = Salamanders, family = nb, method = "ML")
nbm0 <- glmmTMB(count ~ spp + (1 | site), data = Salamanders, family = nbinom2)
nbm1 <- glmmTMB(count ~ spp + mined + (1 | site), data = Salamanders, family = nbinom2)
nbm2 <- glmmTMB(count ~ spp * mined + (1 | site), data = Salamanders, family = nbinom2)
AIC(nbgam0, nbgam1, nbgam2)
AIC(nbm0, nbm1, nbm2)
## Zero-inflated Poisson
## mgcv's ziplss can only fit using REML
zipgam0 <- gam(list(count ~ spp + s(site, bs = "re"), ~ spp),
data = Salamanders, family = ziplss, method = "REML")
zipgam1 <- gam(list(count ~ spp + mined + s(site, bs = "re"), ~ spp),
data = Salamanders, family = ziplss, method = "REML")
zipgam2 <- gam(list(count ~ spp + mined + s(site, bs = "re"), ~ spp + mined),
data = Salamanders, family = ziplss, method = "REML")
zipgam3 <- gam(list(count ~ spp * mined + s(site, bs = "re"), ~ spp * mined),
data = Salamanders, family = ziplss, method = "REML")
## check the things converged
zipgam0$outer.info
zipgam1$outer.info
zipgam2$outer.info
zipgam3$outer.info
zipm0 <- glmmTMB(count ~ spp + (1 | site), zi = ~ spp, data = Salamanders, family = poisson)
zipm1 <- glmmTMB(count ~ spp + mined + (1 | site), zi = ~ spp, data = Salamanders, family = poisson)
zipm2 <- glmmTMB(count ~ spp + mined + (1 | site), zi = ~ spp + mined, data = Salamanders, family = poisson)
zipm3 <- glmmTMB(count ~ spp * mined + (1 | site), zi = ~ spp * mined, data = Salamanders, family = poisson)
AIC(zipgam0, zipgam1, zipgam2, zipgam3)
AIC(zipm0, zipm1, zipm2, zipm3)
## Newdata
newd0 <- newd <- as.data.frame(cbind(unique(Salamanders[, c("mined","spp")]), site = "R -1"))
rownames(newd0) <- rownames(newd) <- NULL
pred <- predict(zipgam3, newd, exclude = "s(site)", type = "link")
beta <- coef(zipgam3)
consts <- beta[grep("Intercept", names(beta))]
ilink <- function(eta) {
## from stats::binomial(link = cloglog)$linkinv
pmax(pmin(-expm1(-exp(eta)), 1 - .Machine$double.eps), .Machine$double.eps)
}
newd <- transform(newd, fitted = exp(pred[,1]) * ilink(pred[,2]))
ggplot(newd, aes(x = spp, y = fitted, colour = mined)) +
geom_point()
## Owls
data(Owls, package = "glmmTMB")
names(Owls) <- sub("SiblingNegotiation", "NCalls", names(Owls))
Owls <- transform(Owls, cArrivalTime = ArrivalTime - mean(ArrivalTime))
### constant zero-inflation
system.time({m1.tmb <- glmmTMB(NCalls ~ (FoodTreatment + cArrivalTime) * SexParent + offset(logBroodSize) + (1 | Nest),
ziformula = ~ 1, data = Owls, family = poisson)})
## mgcv's ziplss can only fit using REML
system.time({m1.gam <- gam(list(NCalls ~ (FoodTreatment + cArrivalTime) * SexParent + offset(logBroodSize) + s(Nest, bs = "re"),
~ 1), data = Owls, family = ziplss(), method = "REML")})
createCoeftab <- function(TMB, GAM) {
bTMB <- fixef(TMB)$cond[-1]
bGAM <- coef(GAM)[2:6]
seTMB <- diag(vcov(TMB)$cond)[-1]
seGAM <- diag(vcov(GAM))[2:6]
nms <- names(bTMB)
nms <- sub("FoodTreatment", "FT", nms)
nms <- sub("cArrivalTime", "ArrivalTime", nms)
df <- data.frame(model = rep(c("glmmTMB", "mgcv::gam"), each = 5),
term = rep(nms, 2),
estimate = unname(c(bTMB, bGAM)))
df <- transform(df,
upper = estimate + sqrt(c(seTMB, seGAM)),
lower = estimate - sqrt(c(seTMB, seGAM)))
df
}
m1.coefs <- createCoeftab(m1.tmb, m1.gam)
p1 <- ggplot(m1.coefs, aes(x = estimate, y = term, colour = model, shape = model, xmax = upper, xmin = lower)) +
geom_pointrangeh(position = position_dodgev(height = 0.3)) +
labs(y = NULL,
x = "Regression estimate",
title = "Comparing mgcv with glmmTMB",
subtitle = "Owls: ZIP with constant zero-inflation",
caption = "Bars are ±1 SE")
### Complex zero-inflation
system.time({m2.tmb <- glmmTMB(NCalls ~ (FoodTreatment + cArrivalTime) * SexParent + offset(logBroodSize) + (1 | Nest),
ziformula = ~ FoodTreatment + (1 | Nest), data = Owls, family = poisson)})
## mgcv's ziplss can only fit using REML
system.time({m2.gam <- gam(list(NCalls ~ (FoodTreatment + cArrivalTime) * SexParent + offset(logBroodSize) + s(Nest, bs = "re"),
~ FoodTreatment + s(Nest, bs = "re")), data = Owls, family = ziplss(), method = "REML")})
m2.coefs <- createCoeftab(m2.tmb, m2.gam)
p2 <- ggplot(m2.coefs, aes(x = estimate, y = term, colour = model, shape = model, xmax = upper, xmin = lower)) +
geom_pointrangeh(position = position_dodgev(height = 0.3)) +
labs(y = NULL,
x = "Regression estimate",
title = "Comparing mgcv with glmmTMB",
subtitle = "Owls: ZIP with complex zero-inflation",
caption = "Bars are ±1 SE")
ggsave("~/Downloads/owls-comparison-simple-zip.png", p1, width = 7, height = 7, dpi = 150)
ggsave("~/Downloads/owls-comparison-complex-zip.png", p2, width = 7, height = 7, dpi = 150)

ashander commented May 3, 2017

here's an idea for how to try out bigger datasets:

Salamanders$original_site = as.character(Salamanders$site)                                                                                                                                                         
                                                                                                                                                                                                                   
big_sals <- lapply(1:100, function(i) {                                                                                                                                                                            
               Salamanders$site = paste0(Salamanders$original_site, i)                                                                                                                                             
               Salamanders})                                                                                                                                                                                       
Salamanders <- do.call(rbind, big_sals)                                                                                                                                                                            
Salamanders$site <- factor(Salamanders$site)            

mebrooks commented May 3, 2017

Here's the code for the benchmarking in the glmmTMB manuscript. I simulated the datasets for figures A.1 and A.3 using simulate.glmmTMB. By default, the function resamples from the random effect distribution, but we might make that optional in the future and allow conditioning on the estimated site effects.

bbolker commented May 3, 2017

Just to clarify: as far as I can tell from ?ziplss, mgcv can do ZIP but not ZINB ... right?

Owner

gavinsimpson commented May 3, 2017

@bbolker Yes. mgcv currently has:

  1. nb() for negative binomial with theta estimated (like lme4::glmm.nb())
  2. Zip() for zero-inflated Poisson with simple intercept-only model for zero-inflation
  3. ziplss() for zero-inflated Poisson with linear predictors for the Poisson and zero-inflation parts.

NB + dispersion, ZINB, and hurdle models are not supported.

Also, I think for ziplss(), method = "ML" is ignored and hence for those models fitting is via Laplace approximate REML only (see ?mgcv::family.mgcv`).

mebrooks commented May 5, 2017

Thanks for letting us know about this. I'll add mgcv to the next draft of the manuscript.

I added mgcv to the negative binomial timing comparisons. It's definitely faster for the original data structure, but not with many more random effect levels. I'm guessing that TMB's automatic sparseness detection gives it the advantage.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment