Skip to content

Instantly share code, notes, and snippets.

@dill
Created October 26, 2015 22:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dill/dd556b5f20f957d75a47 to your computer and use it in GitHub Desktop.
Save dill/dd556b5f20f957d75a47 to your computer and use it in GitHub Desktop.
### analysis of ribbon seal data using a frequentist
### approach to a GAM
### David L Miller dave@ninepointeightone.net
### License: GNU GPL v3
# load data from
# https://github.com/pconn/SpatPred/blob/master/SpatPred/data/Ribbon_data.rda
load("Ribbon_data.rda")
library(plyr)
library(ggplot2)
library(mgcv)
library(numDeriv)
# working from models defined in:
# https://github.com/pconn/SpatPred/blob/master/SpatPred/inst/run_ribbon_models.R
### 1. data processing
# get the covariates
rib <- Cur.grid@data
rib$count <- Ribbon.count
# get areas
rib$area <- laply(Cur.grid@polygons, function(x) x@Polygons[[1]]@area)
# multiply areas by proportions of available habitat (for predictions)
rib$pred.area <- rib$area*Area.hab
# why not do height and width while we're at it?
rib$height <- laply(Cur.grid@polygons, function(x) abs(diff(range(x@Polygons[[1]]@coords[,2]))))
rib$width <- laply(Cur.grid@polygons, function(x) abs(diff(range(x@Polygons[[1]]@coords[,1]))))
# and locations
xy <- laply(Cur.grid@polygons, function(x) x@Polygons[[1]]@labpt)
rib$x <- xy[,1]
rib$y <- xy[,2]
# remove the points without samples
not.sampled <- c(1:S)
not.sampled <- not.sampled[-which(not.sampled %in% Mapping)]
# setting to NA will make sure mgcv ignores these observations
rib$count[not.sampled] <- NA
# plot check
#p <- ggplot(aes(x=x,y=y,fill=count), data=rib) +
# geom_tile(aes(width=rib$width, height=rib$height))
# add in pseudo-absences
pseudo <- (rib$ice_conc<0.001) & is.na(rib$count)
rib$count[pseudo] <- 0
# effort
rib$effort <- NA
rib$effort[!is.na(rib$count)] <- rib$area[!is.na(rib$count)] *
c(Area.photo, rep(0.1, sum(pseudo)))
### 2. model fitting
# model seems sensitive to basis size, see comments
k1 <- 5
# model as in paper
b <- gam(count ~ offset(log(effort)) +
s(ice_conc, k=k1) + s(dist_mainland, k=k1) +
s(dist_edge, k=k1) + s(dist_shelf, k=k1),
data=rib, family=quasipoisson(), method="REML")
# Q-Q plot produced by gam.check seems to indicate quasipoisson
# is not a great choice; could try Tweedie or negative binomial.
#b.tw <- gam(count ~ offset(log(effort)) +
# s(ice_conc, k=10) + s(dist_mainland, k=10) +
# s(dist_edge, k=10) + s(dist_shelf, k=10),
# data=rib, family=tw(), method="REML")
#b.nb <- gam(count ~ offset(log(effort)) +
# s(ice_conc, k=10) + s(dist_mainland, k=10) +
# s(dist_edge, k=10) + s(dist_shelf, k=10),
# data=rib, family=nb(), method="REML")
### 3. Variance for observations
# calculate L_p matrix (see Wood 2006, p245)
lpmat.obs <- predict(b, type="lpmatrix")
# shortcuts to link inverse and (numerical) derivatives
tmfn <- b$family$linkinv
dtmfn <- function(eta){sapply(eta, grad, f=tmfn)}
# delta method
bread <- rib$pred.area[!is.na(rib$count)] *
(c(dtmfn(lpmat.obs%*%coef(b)))*lpmat.obs)
varmu <- bread %*% (vcov(b) %*% t(bread))
# get the variance-covariance matrix of the predictions in the data
# 375x375 matrix w. diagonals the variances
# what's the maximum value?
max.var.obs <- max(diag(varmu))
### 4. Variance for predictions
# need to replace the effort in the dataset with areas
rib$effort <- rib$area
# L_p matrix for predictions
lpmat.pred <- predict(b, newdata=rib, type="lpmatrix")
# delta method
breadpred <- rib$pred.area * (c(dtmfn(lpmat.pred%*%coef(b)))*lpmat.pred)
varpred <- breadpred %*% (vcov(b) %*% t(breadpred))
### 5. make predictions
pred <- predict(b, newdata=rib, type="response")
rib$N <- pred
# gIVH prediction, removing those outside the hull
rib$out <- diag(varpred) > max.var.obs
rib$N.givh <- rib$N
rib$N.givh[rib$out] <- NA
### 6. plot and report results
# plot all predictions
#p <- ggplot(rib, aes(x=x,y=y))
#p <- p + geom_tile(aes(fill=N), width=plot.dat$width, height=plot.dat$height)
#print(p)
# plot gIVH predictions
p <- ggplot(rib, aes(x=x,y=y))
p <- p + geom_tile(aes(fill=N.givh), width=rib$width, height=rib$height)
print(p)
# report N with all prediction cells included
cat("N :", sum(rib$N),"\n")
# with only those inside the gIVH
cat("N gIVH:", sum(rib$N.givh, na.rm=TRUE),"\n")
@lenthomas
Copy link

You so cool!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment