Skip to content

Instantly share code, notes, and snippets.

@eric-pedersen
Created September 25, 2020 16:21
Show Gist options
  • Save eric-pedersen/69181a972de93db3657489c9d356f01f to your computer and use it in GitHub Desktop.
Save eric-pedersen/69181a972de93db3657489c9d356f01f to your computer and use it in GitHub Desktop.
library(mgcv)
set.seed(3);n <- 400
############################################
## First example: simulated Tweedie model...
############################################
dat <- gamSim(1,n=n,dist="poisson",scale=.2)
dat$y <- rTweedie(exp(dat$f),p=1.3,phi=.5) ## Tweedie response
b <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=tw(),
data=dat,method="REML")
## simulate directly from Gaussian approximate posterior...
br <- rmvn(1000,coef(b),vcov(b))
## Alternatively use MH sampling...
br <- gam.mh(b,thin=2,ns=2000,rw.scale=.15)$bs
## If 'coda' installed, can check effective sample size
## require(coda);effectiveSize(as.mcmc(br))
## Now compare simulation results and Gaussian approximation for
## smooth term confidence intervals...
x <- seq(0,1,length=100)
pd <- data.frame(x0=x,x1=x,x2=x,x3=x)
X <- predict(b,newdata=pd,type="lpmatrix")
par(mfrow=c(2,2))
for(i in 1:4) {
plot(b,select=i,scale=0,scheme=1)
ii <- b$smooth[[i]]$first.para:b$smooth[[i]]$last.para
ff <- X[,ii]%*%t(br[,ii]) #not multipled by br here previously
fq <- apply(ff,1,quantile,probs=c(.025,.16,.84,.975))
lines(x,fq[1,],col=2,lty=2);lines(x,fq[4,],col=2,lty=2)
lines(x,fq[2,],col=2);lines(x,fq[3,],col=2)
}
###############################################################
## Second example, where Gaussian approximation is a failure...
###############################################################
y <- c(rep(0, 89), 1, 0, 1, 0, 0, 1, rep(0, 13), 1, 0, 0, 1,
rep(0, 10), 1, 0, 0, 1, 1, 0, 1, rep(0,4), 1, rep(0,3),
1, rep(0, 3), 1, rep(0, 10), 1, rep(0, 4), 1, 0, 1, 0, 0,
rep(1, 4), 0, rep(1, 5), rep(0, 4), 1, 1, rep(0, 46))
set.seed(3);x <- sort(c(0:10*5,rnorm(length(y)-11)*20+100))
b <- gam(y ~ s(x, k = 15),method = 'REML', family = binomial)
br <- gam.mh(b,thin=2,ns=2000,rw.scale=.4)$bs
X <- model.matrix(b)
par(mfrow=c(1,1))
plot(x, y, col = rgb(0,0,0,0.25), ylim = c(0,1))
ff <- X%*%t(br) #not multipled by br here previously
linv <- b$family$linkinv
## Get intervals for the curve on the response scale...
fq <- linv(apply(ff,1,quantile,probs=c(.025,.16,.5,.84,.975)))
lines(x,fq[1,],col=2,lty=2);lines(x,fq[5,],col=2,lty=2)
lines(x,fq[2,],col=2);lines(x,fq[4,],col=2)
lines(x,fq[3,],col=4)
## Compare to the Gaussian posterior approximation
fv <- predict(b,se=TRUE)
lines(x,linv(fv$fit))
lines(x,linv(fv$fit-2*fv$se.fit),lty=3)
lines(x,linv(fv$fit+2*fv$se.fit),lty=3)
## ... Notice the useless 95% CI (black dotted) based on the
## Gaussian approximation!
@dill
Copy link

dill commented Oct 12, 2020

Only just realised looking through this that in the first example the comparison is not between MVN draws and M-H, but rather between the Nychka-type intervals and M-H (at 2 different levels), so line 14 is redundant?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment