eric-pedersen/gam.mh fixed examples.R

## gam.mh fixed examples.R
library(mgcv)
set.seed(3);n <- 400

############################################
## First example: simulated Tweedie model...
############################################

dat <- gamSim(1,n=n,dist="poisson",scale=.2)
dat$y <- rTweedie(exp(dat$f),p=1.3,phi=.5) ## Tweedie response
b <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=tw(),
         data=dat,method="REML")

## simulate directly from Gaussian approximate posterior...
br <- rmvn(1000,coef(b),vcov(b))

## Alternatively use MH sampling...
br <- gam.mh(b,thin=2,ns=2000,rw.scale=.15)$bs
## If 'coda' installed, can check effective sample size
## require(coda);effectiveSize(as.mcmc(br))

## Now compare simulation results and Gaussian approximation for
## smooth term confidence intervals...
x <- seq(0,1,length=100)
pd <- data.frame(x0=x,x1=x,x2=x,x3=x)
X <- predict(b,newdata=pd,type="lpmatrix")
par(mfrow=c(2,2))
for(i in 1:4) {
  plot(b,select=i,scale=0,scheme=1)
  ii <- b$smooth[[i]]$first.para:b$smooth[[i]]$last.para
  ff <- X[,ii]%*%t(br[,ii]) #not multipled by br here previously
  fq <- apply(ff,1,quantile,probs=c(.025,.16,.84,.975))
  lines(x,fq[1,],col=2,lty=2);lines(x,fq[4,],col=2,lty=2)
  lines(x,fq[2,],col=2);lines(x,fq[3,],col=2)
}

###############################################################
## Second example, where Gaussian approximation is a failure...
###############################################################

y <- c(rep(0, 89), 1, 0, 1, 0, 0, 1, rep(0, 13), 1, 0, 0, 1,
       rep(0, 10), 1, 0, 0, 1, 1, 0, 1, rep(0,4), 1, rep(0,3),
       1, rep(0, 3), 1, rep(0, 10), 1, rep(0, 4), 1, 0, 1, 0, 0,
       rep(1, 4), 0, rep(1, 5), rep(0, 4), 1, 1, rep(0, 46))
set.seed(3);x <- sort(c(0:10*5,rnorm(length(y)-11)*20+100))
b <- gam(y ~ s(x, k = 15),method = 'REML', family = binomial)
br <- gam.mh(b,thin=2,ns=2000,rw.scale=.4)$bs
X <- model.matrix(b)
par(mfrow=c(1,1))
plot(x, y, col = rgb(0,0,0,0.25), ylim = c(0,1))
ff <- X%*%t(br) #not multipled by br here previously
linv <- b$family$linkinv
## Get intervals for the curve on the response scale...
fq <- linv(apply(ff,1,quantile,probs=c(.025,.16,.5,.84,.975)))
lines(x,fq[1,],col=2,lty=2);lines(x,fq[5,],col=2,lty=2)
lines(x,fq[2,],col=2);lines(x,fq[4,],col=2)
lines(x,fq[3,],col=4)
## Compare to the Gaussian posterior approximation
fv <- predict(b,se=TRUE)
lines(x,linv(fv$fit))
lines(x,linv(fv$fit-2*fv$se.fit),lty=3)
lines(x,linv(fv$fit+2*fv$se.fit),lty=3)
## ... Notice the useless 95% CI (black dotted) based on the
## Gaussian approximation!
	library(mgcv)
	set.seed(3);n <- 400

	############################################
	## First example: simulated Tweedie model...
	############################################

	dat <- gamSim(1,n=n,dist="poisson",scale=.2)
	dat$y <- rTweedie(exp(dat$f),p=1.3,phi=.5) ## Tweedie response
	b <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=tw(),
	data=dat,method="REML")

	## simulate directly from Gaussian approximate posterior...
	br <- rmvn(1000,coef(b),vcov(b))

	## Alternatively use MH sampling...
	br <- gam.mh(b,thin=2,ns=2000,rw.scale=.15)$bs
	## If 'coda' installed, can check effective sample size
	## require(coda);effectiveSize(as.mcmc(br))

	## Now compare simulation results and Gaussian approximation for
	## smooth term confidence intervals...
	x <- seq(0,1,length=100)
	pd <- data.frame(x0=x,x1=x,x2=x,x3=x)
	X <- predict(b,newdata=pd,type="lpmatrix")
	par(mfrow=c(2,2))
	for(i in 1:4) {
	plot(b,select=i,scale=0,scheme=1)
	ii <- b$smooth[[i]]$first.para:b$smooth[[i]]$last.para
	ff <- X[,ii]%*%t(br[,ii]) #not multipled by br here previously
	fq <- apply(ff,1,quantile,probs=c(.025,.16,.84,.975))
	lines(x,fq[1,],col=2,lty=2);lines(x,fq[4,],col=2,lty=2)
	lines(x,fq[2,],col=2);lines(x,fq[3,],col=2)
	}

	###############################################################
	## Second example, where Gaussian approximation is a failure...
	###############################################################

	y <- c(rep(0, 89), 1, 0, 1, 0, 0, 1, rep(0, 13), 1, 0, 0, 1,
	rep(0, 10), 1, 0, 0, 1, 1, 0, 1, rep(0,4), 1, rep(0,3),
	1, rep(0, 3), 1, rep(0, 10), 1, rep(0, 4), 1, 0, 1, 0, 0,
	rep(1, 4), 0, rep(1, 5), rep(0, 4), 1, 1, rep(0, 46))
	set.seed(3);x <- sort(c(0:105,rnorm(length(y)-11)20+100))
	b <- gam(y ~ s(x, k = 15),method = 'REML', family = binomial)
	br <- gam.mh(b,thin=2,ns=2000,rw.scale=.4)$bs
	X <- model.matrix(b)
	par(mfrow=c(1,1))
	plot(x, y, col = rgb(0,0,0,0.25), ylim = c(0,1))
	ff <- X%*%t(br) #not multipled by br here previously
	linv <- b$family$linkinv
	## Get intervals for the curve on the response scale...
	fq <- linv(apply(ff,1,quantile,probs=c(.025,.16,.5,.84,.975)))
	lines(x,fq[1,],col=2,lty=2);lines(x,fq[5,],col=2,lty=2)
	lines(x,fq[2,],col=2);lines(x,fq[4,],col=2)
	lines(x,fq[3,],col=4)
	## Compare to the Gaussian posterior approximation
	fv <- predict(b,se=TRUE)
	lines(x,linv(fv$fit))
	lines(x,linv(fv$fit-2*fv$se.fit),lty=3)
	lines(x,linv(fv$fit+2*fv$se.fit),lty=3)
	## ... Notice the useless 95% CI (black dotted) based on the
	## Gaussian approximation!