Last active
May 22, 2022 00:23
-
-
Save bbolker/ba360abeadbe6ac1d43b95ad7e6aeb6a to your computer and use it in GitHub Desktop.
Code sketching out a 'refit()` workflow for TMB objects
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## example of updated refit, now incorporated in code | |
## * NO convergence testing/warnings/etc | |
## * won't work on weird binomial input (i.e. anything other than 0/1) | |
## * results seem only to be identical up to floating-point accuracy | |
## * doesn't yet do other clever things like resetting starting values | |
remotes::install_github("glmmTMB/glmmTMB/glmmTMB@smart_refit") | |
library(glmmTMB) | |
library(rbenchmark) | |
data("sleepstudy", package = "lme4") | |
m2 <- glmmTMB(Reaction ~ Days + (Days | Subject), sleepstudy) | |
set.seed(101) | |
y2 <- rnorm(nobs(m2)) | |
r1 <- refit(m2, y2) | |
r2 <- glmmTMB:::fast_refit.glmmTMB(m2, y2) | |
## not quite sure why these are not perfectly identical ... ? | |
all.equal(fixef(r1), fixef(r2), tolerance= 0) | |
bb <- benchmark(old = refit(m2, y2), | |
new = glmmTMB:::fast_refit.glmmTMB(m2, y2)) | |
bb | |
## test replications elapsed relative user.self sys.self user.child sys.child | |
## 2 new 100 12.348 1.000 54.307 17.757 0 0 | |
## 1 old 100 29.063 2.354 34.089 18.056 0 0 | |
## UNTESTED/conceptual machinery for refitting a glmmTMB | |
## model quickly, given a new response variable (e.g. when | |
## doing parametric bootstrap, or fitting parallel models | |
## for gene expression results from a large number of genes) | |
## use at your own risk! | |
## get data, fit model | |
library(glmmTMB) | |
data(sleepstudy, package = "lme4") | |
m1 <- glmmTMB(Reaction ~ Days + (Days|Subject), sleepstudy) | |
## this is just making up a new response | |
## (for the purposes of the example) | |
## not relevant to the machinery | |
set.seed(101) | |
new_y <- rnorm(nobs(m1)) | |
## now the fun begins. | |
## finding the 'y' variable in the environment of the | |
## TMB object, which is where it lives | |
m1$obj ## this is the TMB 'object' | |
ee <- m1$obj$env ## environment | |
names(ee$data) | |
## once we have new_y ... | |
## replace it within the data of the original fitted model | |
## environments are *mutable* (unlike most stuff in R), | |
## so we can just reach in and change it (at our own risk) | |
ee$data$yobs <- new_y | |
## refit the model with the new y value (which was all zeros) | |
with(m1$obj, nlminb(start = par, objective = fn, gradient = gr)) | |
## this could be any optimizer you want, it might be the default | |
## that glmmTMB uses. | |
## definitely want to use a gradient-based optimizer | |
## (not the default Nelder-Mead, that wastes all the cool | |
## stuff that glmmTMB does) | |
## ideally we would retrieve the info about the original | |
## optimizer used, control values, etc. | |
## this could be done via $call if we can rely on the | |
## original environment being available (not using some | |
## obfuscated set of nested functions in the workflow) | |
## alternatively | |
with(m1$obj, optim(par = par, fn = fn, gr = gr, | |
method = "BFGS")) | |
## hypothetically, this is all we need to do. We might | |
## also want to wrap the results back up as a proper 'glmmTMB' | |
## object so we can use all the machinery | |
## we don't really use the Hessian until later | |
## line 1473 of R/glmmTMB.R starts with the machinery we | |
## need to wrap | |
## we might be able to re-build everything just with this | |
## obj is the $obj element from the original fit | |
## sdr is the result of calling sdreport() | |
## fit is the result of the optimization | |
## I think we can get modelInfo out of the original object? | |
## anyway, this is the part that would take 1-2 hours to | |
## figure out and test ... | |
## it's always the last little bit of wrapping that's a nuisance | |
## this is *not* functional (it's just code copied from | |
## glmmTMB to show what components we will need to create a | |
## new glmmTMB object) | |
ret <- structure(namedList(obj, fit, sdr, call=TMBStruc$call, | |
frame=TMBStruc$fr, modelInfo, | |
fitted), | |
class = "glmmTMB") | |
## there's also a new experimental AD engine that might be | |
## faster | |
## lme4 might also be faster, if you can hack the diagonal | |
## variance structure | |
## not actually *too* hard ... | |
## I can send an example ... | |
## lme4 is faster for LMMs with relatively low dimensional | |
## 'top level' structure (small number of RE parameters) | |
## number of variance-covariance parameters | |
## (might even be fastest for *large* number of levels, | |
## large number of observations) | |
## | |
## glmmTMB definitely dominates for GLMMs with large number | |
## of fixed effect + variance-cov parameters | |
## (especially NBinom!) | |
## Julia's MixedModels.jl is where to go if you really need | |
## speed ... | |
## https://dm13450.github.io/2022/01/06/Mixed-Models-Benchmarking.html | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment