Created
October 26, 2015 22:04
-
-
Save dill/dd556b5f20f957d75a47 to your computer and use it in GitHub Desktop.
Code to to gIVH with mgcv see http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0141416
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### analysis of ribbon seal data using a frequentist | |
### approach to a GAM | |
### David L Miller dave@ninepointeightone.net | |
### License: GNU GPL v3 | |
# load data from | |
# https://github.com/pconn/SpatPred/blob/master/SpatPred/data/Ribbon_data.rda | |
load("Ribbon_data.rda") | |
library(plyr) | |
library(ggplot2) | |
library(mgcv) | |
library(numDeriv) | |
# working from models defined in: | |
# https://github.com/pconn/SpatPred/blob/master/SpatPred/inst/run_ribbon_models.R | |
### 1. data processing | |
# get the covariates | |
rib <- Cur.grid@data | |
rib$count <- Ribbon.count | |
# get areas | |
rib$area <- laply(Cur.grid@polygons, function(x) x@Polygons[[1]]@area) | |
# multiply areas by proportions of available habitat (for predictions) | |
rib$pred.area <- rib$area*Area.hab | |
# why not do height and width while we're at it? | |
rib$height <- laply(Cur.grid@polygons, function(x) abs(diff(range(x@Polygons[[1]]@coords[,2])))) | |
rib$width <- laply(Cur.grid@polygons, function(x) abs(diff(range(x@Polygons[[1]]@coords[,1])))) | |
# and locations | |
xy <- laply(Cur.grid@polygons, function(x) x@Polygons[[1]]@labpt) | |
rib$x <- xy[,1] | |
rib$y <- xy[,2] | |
# remove the points without samples | |
not.sampled <- c(1:S) | |
not.sampled <- not.sampled[-which(not.sampled %in% Mapping)] | |
# setting to NA will make sure mgcv ignores these observations | |
rib$count[not.sampled] <- NA | |
# plot check | |
#p <- ggplot(aes(x=x,y=y,fill=count), data=rib) + | |
# geom_tile(aes(width=rib$width, height=rib$height)) | |
# add in pseudo-absences | |
pseudo <- (rib$ice_conc<0.001) & is.na(rib$count) | |
rib$count[pseudo] <- 0 | |
# effort | |
rib$effort <- NA | |
rib$effort[!is.na(rib$count)] <- rib$area[!is.na(rib$count)] * | |
c(Area.photo, rep(0.1, sum(pseudo))) | |
### 2. model fitting | |
# model seems sensitive to basis size, see comments | |
k1 <- 5 | |
# model as in paper | |
b <- gam(count ~ offset(log(effort)) + | |
s(ice_conc, k=k1) + s(dist_mainland, k=k1) + | |
s(dist_edge, k=k1) + s(dist_shelf, k=k1), | |
data=rib, family=quasipoisson(), method="REML") | |
# Q-Q plot produced by gam.check seems to indicate quasipoisson | |
# is not a great choice; could try Tweedie or negative binomial. | |
#b.tw <- gam(count ~ offset(log(effort)) + | |
# s(ice_conc, k=10) + s(dist_mainland, k=10) + | |
# s(dist_edge, k=10) + s(dist_shelf, k=10), | |
# data=rib, family=tw(), method="REML") | |
#b.nb <- gam(count ~ offset(log(effort)) + | |
# s(ice_conc, k=10) + s(dist_mainland, k=10) + | |
# s(dist_edge, k=10) + s(dist_shelf, k=10), | |
# data=rib, family=nb(), method="REML") | |
### 3. Variance for observations | |
# calculate L_p matrix (see Wood 2006, p245) | |
lpmat.obs <- predict(b, type="lpmatrix") | |
# shortcuts to link inverse and (numerical) derivatives | |
tmfn <- b$family$linkinv | |
dtmfn <- function(eta){sapply(eta, grad, f=tmfn)} | |
# delta method | |
bread <- rib$pred.area[!is.na(rib$count)] * | |
(c(dtmfn(lpmat.obs%*%coef(b)))*lpmat.obs) | |
varmu <- bread %*% (vcov(b) %*% t(bread)) | |
# get the variance-covariance matrix of the predictions in the data | |
# 375x375 matrix w. diagonals the variances | |
# what's the maximum value? | |
max.var.obs <- max(diag(varmu)) | |
### 4. Variance for predictions | |
# need to replace the effort in the dataset with areas | |
rib$effort <- rib$area | |
# L_p matrix for predictions | |
lpmat.pred <- predict(b, newdata=rib, type="lpmatrix") | |
# delta method | |
breadpred <- rib$pred.area * (c(dtmfn(lpmat.pred%*%coef(b)))*lpmat.pred) | |
varpred <- breadpred %*% (vcov(b) %*% t(breadpred)) | |
### 5. make predictions | |
pred <- predict(b, newdata=rib, type="response") | |
rib$N <- pred | |
# gIVH prediction, removing those outside the hull | |
rib$out <- diag(varpred) > max.var.obs | |
rib$N.givh <- rib$N | |
rib$N.givh[rib$out] <- NA | |
### 6. plot and report results | |
# plot all predictions | |
#p <- ggplot(rib, aes(x=x,y=y)) | |
#p <- p + geom_tile(aes(fill=N), width=plot.dat$width, height=plot.dat$height) | |
#print(p) | |
# plot gIVH predictions | |
p <- ggplot(rib, aes(x=x,y=y)) | |
p <- p + geom_tile(aes(fill=N.givh), width=rib$width, height=rib$height) | |
print(p) | |
# report N with all prediction cells included | |
cat("N :", sum(rib$N),"\n") | |
# with only those inside the gIVH | |
cat("N gIVH:", sum(rib$N.givh, na.rm=TRUE),"\n") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
You so cool!