Last active
February 14, 2018 04:57
-
-
Save alharry/4181004 to your computer and use it in GitHub Desktop.
Multi-model analysis of shark growth
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################################### | |
# 20/01/2013 | |
# Author: Alastair V Harry - alastair.harry@gmail.com | |
# Centre For Sustainable Tropical Fisheries & Aquaculture | |
# James Cook University, Townsville | |
# | |
# From Harry et al (2013) Age, growth, and reproductive biology of the spot-tail | |
# shark, Carcharhinus sorrah, and the Australian blacktip shark, C. tilstoni, | |
# from the Great Barrier Reef World Heritage Area, north-eastern Australia. | |
# Marine and Freshwater Research | |
# | |
# Description: An R-script for doing a multi-model analysis of fish growth | |
# and plotting with confidence intervals. This version includes six | |
# deterministic growth model. Obviously more can be included | |
# although the section that does bootstrapping and confidence intervals | |
# is only set up to plot models that have 2 or 3 parameters at the moment, so a | |
# model with more parameters probably isn't going to work. Style of outputs | |
# follows Walker (2005) Reproduction in fisheries science. | |
# | |
# NB this was originally written as a function that I could run in other | |
# scripts but I had some problems when the nlstools functions were | |
# included within loops, so the code is quite repetitive. | |
################################################################################ | |
# Specify libraries required for analysis | |
library(nlstools) | |
library(RCurl) | |
# Load data from github | |
url <- "https://raw.githubusercontent.com/alharry/spot-tail/master/SSS.csv" | |
data <- getURL(url) | |
# If this step doesnt work use this, but read this first | |
# http://ademar.name/blog/2006/04/curl-ssl-certificate-problem-v.html | |
#data <- getURL(url,ssl.verifypeer = FALSE) | |
data <- read.csv(textConnection(data)) | |
# Specify sex: "m", "f" or "both" | |
sex="f" | |
# Prepare data for non-linear model fitting and plotting. Choose starting values | |
# and define extremes of plotting areas | |
# Specify an appropriate length at birth, L0 | |
Mini<-L0<-525 | |
# Specify dimensions for plotting (used at the end of this script) | |
Max.Overall<-ceiling(max(na.omit(data$STL))) | |
Max.Plot<-ceiling(max(na.omit(data$AgeAgree))) | |
# Specify subset of data to be used (i.e. males or females or both) and keep | |
# an extra data frame with all the data (data1) | |
data1<-data | |
data<-if(sex=="both"){data}else{data[data$Sex==sex,]} | |
Max<-max(na.omit(data$STL)) | |
Max.Age<-max(na.omit(data$AgeAgree)) | |
# Define starting values for asymptotic growth models using Ford-Walford method | |
# for Linf and K. To estimate L0, a quadratic is fit to the mean length at | |
# age data, and the intercept extracted. This section I got from Derek Ogle's | |
# Fish R. | |
mean.age<-tapply(data$STL,round(data$AgeAgree),mean,na.rm=T) | |
Lt1<-mean.age[2:length(mean.age)] | |
Lt<-mean.age[1:length(mean.age)-1] | |
model<-lm(Lt1~Lt) | |
start.k<- abs(-log(model$coef[2])) | |
start.linf<-abs(model$coef[1]/(1-model$coef[2])) | |
start.L0<-lm(mean.age~poly(as.numeric(names(mean.age)), 2, raw=TRUE))$coef[1] | |
# Remove all non-essential data from the data frame | |
data<-na.omit(data.frame(data$STL,data$AgeAgree)) | |
names(data)<-c("STL","AgeAgree") | |
# Create a set of candidate models for MMI comparison. | |
# Three commonly used non-linear growth models are listed below. | |
# The functions have each been solved (by someone else) in such a way that | |
# they explicitly include the y-intercept (L0, length at time zero) as a | |
# fitted model parameter, and differ from the more common form that explicitly | |
# includes the x-intercept (t0, hypothetical age at zero length). The L0 | |
# parameterisation makes more biological sense for sharks since they are born | |
# live and fully formed. | |
# In instances where you don't have a good data for very young individuals, | |
# it is probably best to use the 2 parameter versions, otherwise the size | |
# at birth will be overestimated by the model. | |
# Constraining the model to have only 2 parameters instead of 3 causes a bias | |
# in the other parameters, so if you have good data for all sizes its best to | |
# use the 3 parameter version. | |
# It doesn't make much sense to include both 2 and 3 parameter versions of the | |
# same model in your set of candidate models. | |
# G1 - 3 parameter VB | |
g1<- STL~ abc2+(abc1-abc2)*(1-exp(-abc3*AgeAgree)) | |
g1.Start<-list(abc1=start.linf,abc2=start.L0,abc3=start.k) | |
g1.name<-"VB3" | |
# G2 - 2 parameter VB | |
g2<- STL~ L0+(ab1-L0)*(1-exp(-ab2*AgeAgree)) | |
g2.Start<-list(ab1=start.linf,ab2=start.k) | |
g2.name<-"VB2" | |
# G3 - 3 parameter Gompertz | |
g3<- STL~ abc2*exp(log(abc1/abc2)*(1-exp(-abc3*AgeAgree))) | |
g3.Start<-list(abc1=start.linf,abc2=start.L0,abc3=start.k) | |
g3.name<-"GOM3" | |
# G4 - 2 parameter Gompertz | |
g4<- STL~L0*exp(log(ab1/L0)*(1-exp(-ab2*AgeAgree))) | |
g4.Start<-list(ab1=start.linf,ab2=start.k) | |
g4.name<-"GOM2" | |
# G5 - 3 parameter Logistic model | |
g5<- STL~ (abc1*abc2*exp(abc3*AgeAgree))/(abc1+abc2*((exp(abc3*AgeAgree))-1)) | |
g5.Start<-list(abc1=start.linf, abc2=start.L0, abc3=start.k) | |
g5.name<-"LOGI3" | |
# G6 - 2 parameter Logistic model | |
g6<- STL~ (ab1*L0*exp(ab2*AgeAgree))/(ab1+L0*((exp(ab2*AgeAgree))-1)) | |
g6.Start<-list(ab1=start.linf,ab2=start.k) | |
g6.name<-"LOGI2" | |
# Select the list of candidate models to compare. NB only 3 parameter | |
# versions are included here | |
Models<-list(g1,g3,g5) | |
Start<-list(g1.Start,g3.Start,g5.Start) | |
Mod.names<-c(g1.name,g3.name,g5.name) | |
Results<-list() | |
Mods<-length(Models) | |
# Fit growth models, outputs are stored as a list object called Results. Models | |
# are fit using nonlinear least squares | |
Results[[1]]<-nls(Models[[1]],data=data,start=Start[[1]],algorithm="port") | |
Results[[2]]<-nls(Models[[2]],data=data,start=Start[[2]],algorithm="port") | |
Results[[3]]<-nls(Models[[3]],data=data,start=Start[[3]],algorithm="port") | |
# Put best fit paramters into a matrix | |
par.matrix<-rbind(coef(Results[[1]]),coef(Results[[2]]),coef(Results[[3]])) | |
# Look at diagnostic plots | |
# plot(nlsResiduals(Results[[1]])) | |
# plot(nlsResiduals(Results[[2]])) | |
# plot(nlsResiduals(Results[[3]])) | |
# Run the multi-model inference comparison. The following section calculates | |
# AIC values, AIC differences, AIC weights and residual standard errors | |
AIC.vals<-rep(NA,Mods) | |
Residual.Standard.Error<-rep(NA,Mods) | |
for(s in 1:Mods){AIC.vals[s]<-AIC(Results[[s]])} | |
AIC.dif<-AIC.vals-min(AIC.vals) | |
AIC.weight= round((exp(-0.5*AIC.dif))/(sum(exp(-0.5*AIC.dif))),4)*100 | |
for(t in 1:Mods){Residual.Standard.Error[t]<-summary(Results[[t]])$sigma} | |
# Store output of MMI analysis. This is the important bit. Beyond this section | |
# its all plotting and confidence intervals, which may have a tendency not | |
# to work so well depending on the sparsity of your data. It's somewhat | |
# 'optional' beyond here anyway, since many people don't present | |
# confidence intervals with their growth models anyway. | |
MMI.Analysis<-data.frame(AIC.vals,AIC.dif,AIC.weight,Residual.Standard.Error, | |
par.matrix, row.names=Mod.names) | |
names(MMI.Analysis)[1:4]<-c("AIC", "AIC dif", "w","RSE") | |
# Calculate bootstrap confidence intervals. First step is to bootstrap the data. | |
# The nlsBoot function takes the data and randomly re-samples it a certain | |
# number of times. 'Iter' is the number of interations bootstraps to be run. | |
# I usually do 10000 in the final analysis but this will take a while. | |
# The actual confidence intervals can be extracted from the 'Bootstrapping' | |
# list e.g. to extract CI for LOGI3 type Bootstrapping[[3]]$bootCI | |
Iter=300 | |
Bootstrapping<-list() | |
Bootstrapping[[1]]<-nlsBoot(Results[[1]],Iter) | |
Bootstrapping[[2]]<-nlsBoot(Results[[2]],Iter) | |
Bootstrapping[[3]]<-nlsBoot(Results[[3]],Iter) | |
# Create a matrix with 95% confidence intervals | |
Conf.Ints<-data.frame(rbind(c(Bootstrapping[[1]]$bootCI[,-1]), | |
c(Bootstrapping[[2]]$bootCI[,-1]), | |
c(Bootstrapping[[3]]$bootCI[,-1])),row.names=Mod.names) | |
Conf.Ints<-Conf.Ints[order(Conf.Ints[1,],decreasing=TRUE)] | |
names(Conf.Ints)[1:length(Conf.Ints[1,])]<-c("95%","5%") | |
# Store all necessary outputs from analysis together | |
MMI.Analysis<-cbind(MMI.Analysis,Conf.Ints) | |
# Round to four significant figures | |
MMI.Analyis<-signif(MMI.Analysis,4) | |
# Specify values of age that you want to predict length over | |
x.deter<-seq(0,Max.Age,0.1) | |
# Number of iterations is now the number of successful bootstraps (i.e. | |
# original Iter minus those bootstraps that didn't converge) | |
Iter<-nrow(Bootstrapping[[which(AIC.dif==0)]]$coefboot) | |
# This bit calculates the actual confidence intervals and prediction intervals | |
# for the 'best fit' model | |
for(k in which(AIC.dif==0)){ | |
plotmatrix<-matrix(NA,ncol=length(x.deter),nrow=Iter) | |
Npars<-length(Start[[which(AIC.dif==0)]]) | |
for(l in (1:Iter)){ | |
abc1<-as.numeric(Bootstrapping[[k]]$coef[l,1]) | |
abc2<-if(Npars==3){as.numeric(Bootstrapping[[k]]$coef[l,2])}else{L0} | |
abc3<-if(Npars==3){as.numeric(Bootstrapping[[k]]$coef[l,3])} | |
else{as.numeric(Bootstrapping[[k]]$coef[l,2])} | |
# Quick add on to do confidence intervals in 2 parameter models | |
ab1<-abc1 | |
ab2<-abc3 | |
AgeAgree=x.deter | |
plotmatrix[l,]<-formula(formula(Results[[k]])[[3]]) | |
} | |
RSE<-Bootstrapping[[k]]$rse | |
boundsmatrix<-matrix(NA,nrow=4,ncol=length(x.deter)) | |
for(m in 1:length(x.deter)){ | |
boundsmatrix[1,m]<-quantile(plotmatrix[,m],0.025) | |
boundsmatrix[2,m]<-quantile(plotmatrix[,m],0.975) | |
boundsmatrix[3,m]<-quantile((plotmatrix-RSE)[,m],0.025) | |
boundsmatrix[4,m]<-quantile((plotmatrix+RSE)[,m],0.975) | |
} | |
} | |
# Plot data and 'best fit' growth model | |
plot(data$STL~data$AgeAgree,las=1,pch=19,col="black",cex=1, | |
xaxp=c(0,Max.Plot,Max.Plot/2),bty="l",xlim=c(0,Max.Plot), | |
ylim=c(0,Max.Overall),cex.axis=1,xlab="Age(years)",ylab="TL(mm)") | |
lines(x.deter,predict(Results[[which(AIC.dif==0)]],list(AgeAgree=x.deter),type="response")) | |
y.deter<-predict(Results[[which(AIC.dif==0)]],list(AgeAgree=x.deter),type="response") | |
# Plot all growth models | |
#for(n in 1:length(Models)){ | |
# lines(x.deter,predict(Results[[n]],list(AgeAgree=x.deter),type="response") | |
# ,col=n)} | |
# Plot confidence intervals for the 'best fit' model | |
lines(x.deter,boundsmatrix[1,],lty=2) | |
lines(x.deter,boundsmatrix[2,],lty=2) | |
# Plot prediction intervals for the 'best fit' model | |
lines(x.deter,boundsmatrix[3,],lty=3) | |
lines(x.deter,boundsmatrix[4,],lty=3) | |
# Save analysis | |
# write.csv(MMI.Analysis, "MMI.Analysis.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment