alharry/Maturity.ogive.R

## Maturity.ogive.R
###############################################################################
# 20/01/2013
# Author: Alastair V Harry - alastair.harry@gmail.com
# Centre For Sustainable Tropical Fisheries & Aquaculture
# James Cook University, Townsville
#
# From Harry et al (2013) Age, growth, and reproductive biology of the spot-tail
# shark, Carcharhinus sorrah, and the Australian blacktip shark, C. tilstoni,
# from the Great Barrier Reef World Heritage Area, north-eastern Australia.
# Marine and Freshwater Research
#
# Description: An R-script used to analyse length at maturity and/or
# maternity in sharks and plot confidence intervals. Generally following the
# style described in Walker (2005) Reproduction in fisheries science.
# The only real difference is the use of a logit link function in the glm
# rather than a probit link. I don't know what the practical differences are
# between the two, however a probit link can be specified in the glm if
# desired.
###############################################################################

# Specify libraries required for analysis
library(MASS)
library(psyphy)
library(boot)
library(RCurl)

# Load data from github
url <- "https://raw.githubusercontent.com/alharry/spot-tail/master/SSS.csv"
data <- getURL(url)
# If this step doesnt work use this, but read this first
# http://ademar.name/blog/2006/04/curl-ssl-certificate-problem-v.html
#data <- getURL(url,ssl.verifypeer = FALSE)

data <- read.csv(textConnection(data))

# Specify sex: "m", "f" or "both"
sex="m"

# Specify subset of data to be used (i.e. males or females or both)
# Remove missing values and attach data frame. Save a copy of the
# original data (data1)
data1<-data
data<-if(sex=="both"){data}else{data[data$Sex==sex,]}
data<-data[-which(is.na(data$Sex)),]
attach(data)

# Fit a generalised linear model with a logit link function. STL is
# stretched total length in mm. Maturity stage is the binomial response
# variable; 1 for mature and 0 for immature
Maturity.Model<-glm(Maturity.Stage~STL,family=binomial(logit))

# A quick way for obtaining the length at which a specified proportion of
# the population is mature is using the dose.p function, where p = proportion
# mature
L50<- dose.p(Maturity.Model,p=c(0.5))
L95<- dose.p(Maturity.Model,p=c(0.95))

# Alternatively, the fitted parameters of the model can be re-arranged
# so that the model can be parameterised in terms of the key parameters
# of interest (the length at which 50% and 95% of population is mature)

L50<-(Maturity.Model$coef[1]*Maturity.Model$coef[2]^-1)
L95<-(1/Maturity.Model$coef[2])*log(1/0.05-1)-
		Maturity.Model$coef[1]/Maturity.Model$coef[2]


# Tablulate the raw maturity data into some length inverval (Lint), say 50mm
Lint=50
min.x<-min(data$STL[data1$Sex==sex],na.rm=T)
max.x<-max(data$STL[data1$Sex==sex],na.rm=T)
max.x.plot<-ceiling(max.x/Lint)*Lint

Maturity.Table<-table(Maturity.Stage,cut(STL,seq(0,max.x.plot,Lint)))
# Save tabulated data
# write.csv(t(Maturity.Table),"Maturity.Table.csv")
Observed.Proportions<-Maturity.Table[2,]/(Maturity.Table[1,]+Maturity.Table[2,])

# Everything below here is just bootstrap-resampling and plotting

# Set up a vector of x-values over which to predict length at maturity
new <- data.frame(STL = seq(min.x,max.x,0.01))

plot(new$STL,predict(Maturity.Model, data.frame(STL=new$STL),type="response"),
		type="l",las=1,ylab="Proportion mature",xlab="STL(mm)",bty="l" )

# Add observed proportions calculated above if desired
# points(seq(Lint/2,max.x.plot,Lint),Observed.Proportions,pch=19)

# Function for bootstrapping the data. This simply repeats the above analysis.
# The parameters are also expressed in terms of L50 and L95
boot.shark<-function(data, i){
	data <- data[i, ]
	mod <- glm(formula = Maturity.Stage ~ STL, family = binomial(link="logit"),data= data)
	L50<-dose.p(mod,p=c(0.5))
	L95<-dose.p(mod,p=c(0.95))
	Bootstats<-c(coefficients(mod),L50,L95)
}

# Run bootstrap
# "R" is the number of bootstraps to be run, typically set at 10000 for the final
# run, must be a minimum of 300 for the plotting below to work
R=1000

Maturity.Bootstraps<- boot(data,statistic=boot.shark,R)

# This gives 95% confidence intervals on your paramters and L50 and L95
# estimates
Confidence.Intervals<-envelope(Maturity.Bootstraps)
Confidence.Intervals$point

# Function for plotting confidence intervals around curves using
# bootstrap outputs

Confints<-function(data,boot.out,mod,num.curves){
	Boot.results<-boot.out
	Length<-seq(min(data$STL,na.rm=T),max(data$STL,na.rm=T),1)
	Num.curves<- num.curves
	Model<-mod
	par1<-Boot.results$t[1:Num.curves,1]
	par2<-Boot.results$t[1:Num.curves,2]
	par1.vec = rep(par1,each=length(Length))
	par2.vec = rep(par2,each=length(Length))
	predicted.maturity<- 1/(1+exp(-(par1.vec+par2.vec*Length)))
	plotmatrix <- matrix(predicted.maturity,ncol=length(Length),byrow=T)

	# Plot a subsample of the models fitted to bootstrapped data to get an
	# idea of the uncertainty in the data
	#for(i in 1:50){
	#	lines(plotmatrix[i,]~Length)
	#}

	fitted.par.1<-coefficients(Model)[[1]]
	fitted.par.2<-coefficients(Model)[[2]]
	fitted.maturity<- 1/(1+exp(-(fitted.par.1+fitted.par.2*Length)))

	bounds <- quantile(plotmatrix[,1],c(.025,.975))
	boundsmatrix <- matrix(bounds,nrow=2)
	for (i in 2:length(Length)){
		bounds <- quantile(plotmatrix[,i],c(.025,.975))
		boundsmatrix <- cbind(boundsmatrix,bounds)
	}

	return(list(Length=Length,boundsmatrix=boundsmatrix,
					fitted.maturity=fitted.maturity))
}

# Run the above function
CI.Plotting<-Confints(data,Maturity.Bootstraps,Maturity.Model,R)

# Add 95% confidence intervals to graph
for(i in 1:2){lines(CI.Plotting$Length, CI.Plotting$boundsmatrix[i,],lty=2)}

# Detach data
detach(data)
	###############################################################################
	# 20/01/2013
	# Author: Alastair V Harry - alastair.harry@gmail.com
	# Centre For Sustainable Tropical Fisheries & Aquaculture
	# James Cook University, Townsville
	#
	# From Harry et al (2013) Age, growth, and reproductive biology of the spot-tail
	# shark, Carcharhinus sorrah, and the Australian blacktip shark, C. tilstoni,
	# from the Great Barrier Reef World Heritage Area, north-eastern Australia.
	# Marine and Freshwater Research
	#
	# Description: An R-script used to analyse length at maturity and/or
	# maternity in sharks and plot confidence intervals. Generally following the
	# style described in Walker (2005) Reproduction in fisheries science.
	# The only real difference is the use of a logit link function in the glm
	# rather than a probit link. I don't know what the practical differences are
	# between the two, however a probit link can be specified in the glm if
	# desired.
	###############################################################################

	# Specify libraries required for analysis
	library(MASS)
	library(psyphy)
	library(boot)
	library(RCurl)

	# Load data from github
	url <- "https://raw.githubusercontent.com/alharry/spot-tail/master/SSS.csv"
	data <- getURL(url)
	# If this step doesnt work use this, but read this first
	# http://ademar.name/blog/2006/04/curl-ssl-certificate-problem-v.html
	#data <- getURL(url,ssl.verifypeer = FALSE)

	data <- read.csv(textConnection(data))

	# Specify sex: "m", "f" or "both"
	sex="m"

	# Specify subset of data to be used (i.e. males or females or both)
	# Remove missing values and attach data frame. Save a copy of the
	# original data (data1)
	data1<-data
	data<-if(sex=="both"){data}else{data[data$Sex==sex,]}
	data<-data[-which(is.na(data$Sex)),]
	attach(data)

	# Fit a generalised linear model with a logit link function. STL is
	# stretched total length in mm. Maturity stage is the binomial response
	# variable; 1 for mature and 0 for immature
	Maturity.Model<-glm(Maturity.Stage~STL,family=binomial(logit))

	# A quick way for obtaining the length at which a specified proportion of
	# the population is mature is using the dose.p function, where p = proportion
	# mature
	L50<- dose.p(Maturity.Model,p=c(0.5))
	L95<- dose.p(Maturity.Model,p=c(0.95))

	# Alternatively, the fitted parameters of the model can be re-arranged
	# so that the model can be parameterised in terms of the key parameters
	# of interest (the length at which 50% and 95% of population is mature)

	L50<-(Maturity.Model$coef[1]*Maturity.Model$coef[2]^-1)
	L95<-(1/Maturity.Model$coef[2])*log(1/0.05-1)-
	Maturity.Model$coef[1]/Maturity.Model$coef[2]


	# Tablulate the raw maturity data into some length inverval (Lint), say 50mm
	Lint=50
	min.x<-min(data$STL[data1$Sex==sex],na.rm=T)
	max.x<-max(data$STL[data1$Sex==sex],na.rm=T)
	max.x.plot<-ceiling(max.x/Lint)*Lint

	Maturity.Table<-table(Maturity.Stage,cut(STL,seq(0,max.x.plot,Lint)))
	# Save tabulated data
	# write.csv(t(Maturity.Table),"Maturity.Table.csv")
	Observed.Proportions<-Maturity.Table[2,]/(Maturity.Table[1,]+Maturity.Table[2,])

	# Everything below here is just bootstrap-resampling and plotting

	# Set up a vector of x-values over which to predict length at maturity
	new <- data.frame(STL = seq(min.x,max.x,0.01))

	plot(new$STL,predict(Maturity.Model, data.frame(STL=new$STL),type="response"),
	type="l",las=1,ylab="Proportion mature",xlab="STL(mm)",bty="l" )

	# Add observed proportions calculated above if desired
	# points(seq(Lint/2,max.x.plot,Lint),Observed.Proportions,pch=19)

	# Function for bootstrapping the data. This simply repeats the above analysis.
	# The parameters are also expressed in terms of L50 and L95
	boot.shark<-function(data, i){
	data <- data[i, ]
	mod <- glm(formula = Maturity.Stage ~ STL, family = binomial(link="logit"),data= data)
	L50<-dose.p(mod,p=c(0.5))
	L95<-dose.p(mod,p=c(0.95))
	Bootstats<-c(coefficients(mod),L50,L95)
	}

	# Run bootstrap
	# "R" is the number of bootstraps to be run, typically set at 10000 for the final
	# run, must be a minimum of 300 for the plotting below to work
	R=1000

	Maturity.Bootstraps<- boot(data,statistic=boot.shark,R)

	# This gives 95% confidence intervals on your paramters and L50 and L95
	# estimates
	Confidence.Intervals<-envelope(Maturity.Bootstraps)
	Confidence.Intervals$point

	# Function for plotting confidence intervals around curves using
	# bootstrap outputs

	Confints<-function(data,boot.out,mod,num.curves){
	Boot.results<-boot.out
	Length<-seq(min(data$STL,na.rm=T),max(data$STL,na.rm=T),1)
	Num.curves<- num.curves
	Model<-mod
	par1<-Boot.results$t[1:Num.curves,1]
	par2<-Boot.results$t[1:Num.curves,2]
	par1.vec = rep(par1,each=length(Length))
	par2.vec = rep(par2,each=length(Length))
	predicted.maturity<- 1/(1+exp(-(par1.vec+par2.vec*Length)))
	plotmatrix <- matrix(predicted.maturity,ncol=length(Length),byrow=T)

	# Plot a subsample of the models fitted to bootstrapped data to get an
	# idea of the uncertainty in the data
	#for(i in 1:50){
	# lines(plotmatrix[i,]~Length)
	#}

	fitted.par.1<-coefficients(Model)[[1]]
	fitted.par.2<-coefficients(Model)[[2]]
	fitted.maturity<- 1/(1+exp(-(fitted.par.1+fitted.par.2*Length)))

	bounds <- quantile(plotmatrix[,1],c(.025,.975))
	boundsmatrix <- matrix(bounds,nrow=2)
	for (i in 2:length(Length)){
	bounds <- quantile(plotmatrix[,i],c(.025,.975))
	boundsmatrix <- cbind(boundsmatrix,bounds)
	}

	return(list(Length=Length,boundsmatrix=boundsmatrix,
	fitted.maturity=fitted.maturity))
	}

	# Run the above function
	CI.Plotting<-Confints(data,Maturity.Bootstraps,Maturity.Model,R)

	# Add 95% confidence intervals to graph
	for(i in 1:2){lines(CI.Plotting$Length, CI.Plotting$boundsmatrix[i,],lty=2)}

	# Detach data
	detach(data)