TonyLadson/NonLinear.R

## NonLinear.R
###########################################################################################
#
#  Fitting non-linear models in R
#  See tonyladson
#
#  5 June 2016
###############################################################################################
#
# Example data come from Figure 5 from Ian Cordery's thesis
# Also Figure 3 from Cordery, 1970
# Cordery, I. (1968). Improved techniques in flood estimation and forecasting.
# PhD, The University of New South Wales.
#
# Cordery, I. (1970). "Initial loss for flood estimation and forecasting."
# Journal of the Hydraulics Division, American Society of Civil Engineers 96(12): 2447-2466.

#remove(list = objects())

library(ggplot2)
library(dplyr)
library(propagate)
library(nlstools)
library(nls2) # this loads the proto package which is used to calcualte confidence intervals
library(fpp) # for the Acf function
library(car)
library(ggExtra)


# Data from digitizing Figure 5 from Cordery, 1968
# See http://www.unsworks.unsw.edu.au/primo_library/libweb/action/dlDisplay.do?vid=UNSWORKS&docId=unsworks_36997

Fig5 <-
  structure(list(x = c(9.64021986421365, 37.5464569595027, 25.7273447779685,
                       46.7390997673626, 104.193117316487, 108.132821376998, 86.1592245759561,
                       81.2589611595452, 76.0060224121967, 93.4063820127887, 79.6033107877613,
                       90.7658056258769, 120.32769309295, 111.463358956799, 125.908940512008,
                       120.656001764659, 151.188708233622, 147.57731284482, 139.041287380379,
                       143.3093001126, 147.591419858058, 148.576345873186, 174.855146623166,
                       155.79913665079, 208.328524124275, 203.732202720345, 184.361991089498,
                       188.60563716249, 180.726229041467, 197.812386983587, 201.09547370068,
                       236.895225930229, 247.729412096636, 273.994105833378, 253.967276859112,
                       251.340807485438, 269.055368744501, 306.48255731936, 318.301669500894,
                       320.27152153115, 342.92481987909, 336.030337773195, 335.702029101486,
                       333.089666741049, 318.315776514131, 322.912097918061, 410.242204592731,
                       395.782516024285, 411.54133226633, 432.224778584015),
                 y = c(2.54132343897037, 2.50762114884014, 2.38923518377857, 2.41501285415006, 2.30416887155265,
                       2.23724762261166, 2.18569228186868, 2.17557253236737, 2.18588360051597,
                       2.00543990791553, 1.93068466383821, 1.86624048790948, 2.03906164282585,
                       2.10866135282887, 2.162794460609, 2.09835028468028, 2.19888319912909,
                       2.17288400190285, 2.03626234893394, 1.88406333031477, 1.76033051253161,
                       1.73197507512297, 1.99748507994933, 2.01552944920937, 2.18308430662406,
                       1.94850750624349, 1.84024129068323, 1.86601896105472, 1.88148556327762,
                       1.75259721142016, 1.75259721142016, 1.801574785126, 1.77321934771736,
                       1.78868594994025, 1.4973982747424, 1.47162060437091, 1.22684349926912,
                       1.43287354359376, 1.85047180361192, 1.61073946915705, 1.64167267360284,
                       1.56433966248837, 1.48937296095878, 1.25479616057821, 1.20066305279808,
                       1.04341926353198, 1.56670597207325, 1.41461771688145, 1.07693023501492,
                       1.07950800205207)),
            .Names = c("x", "y"),
            row.names = c(NA, -50L),
            class = c("tbl_df", "tbl", "data.frame"))


# Create data frame from digitised data

bobo.dat <- Fig5 %>% # Bobo River
  mutate(API = x) %>%
  mutate(ILs = 10^y) %>% # ILs is on a log scale
  mutate(ILs_mm = ILs/100*25.4) %>% # convert from points (inches/100) to mm
  mutate(API_mm = API/100*25.4)


# Make a plot the initial loss, API relationship

API_ILplot <- bobo.dat  %>%
  ggplot(aes(x = API_mm, y = ILs_mm)) + geom_point() +
  theme_bw() +
  scale_x_continuous(name = 'API (mm)') +
  scale_y_continuous(name = 'Initial loss (mm)') +
  theme(
    panel.background = element_rect(fill="gray98"),
    axis.title.x = element_text(colour="grey20", size=16, margin=margin(20,0,0,0)),
    axis.text.x = element_text(colour="grey20",size=14),
    axis.title.y = element_text(colour="grey20",size=16, margin = margin(0,20,0,0)),
    axis.text.y = element_text(colour="grey20",size=14),
    legend.title = element_text(colour="grey20",size=12),
    plot.margin = unit(c(0.5, 0.5, 1, 0.5), "cm")) # top, right, bottom, left

# Draw the plot

API_ILplot

# Check marginal histograms
#ggExtra::ggMarginal(API_ILplot, type = "histogram")


# Model
# From Cordery

# IL = ILmax(N)^API

# Preview model to check starting values are reasonable

nlstools::preview(ILs_mm ~ ILmax*N^API_mm, data = bobo.dat, start = list(ILmax = 80, N = 0.98))


# fit the model using NLS
bobo.mod <- nls(ILs_mm ~ ILmax*N^API_mm, data = bobo.dat, start = list(ILmax = 80, N = 0.98), trace = TRUE)


# add fitted model to plot
# geom_smooth can use nls and add the fitted model or use nlstools::plotfit

API_ILplot +
  geom_smooth(method = "nls", formula = y ~ ILmax*a^x, method.args = list(start = list(ILmax = 84, a = 0.97)), se = FALSE)


# Check model

summary(bobo.mod)

nlstools::overview(bobo.mod)
#
# ------
#   Formula: ILs_mm ~ ILmax * N^API_mm
#
# Parameters:
#   Estimate Std. Error t value Pr(>|t|)
# ILmax 84.80480    6.13395   13.82   <2e-16 ***
#   N      0.96886    0.00268  361.49   <2e-16 ***
#   ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 9.023 on 48 degrees of freedom
#
# Number of iterations to convergence: 7
# Achieved convergence tolerance: 1.921e-06
#
# ------
#   Residual sum of squares: 3910
#
# ------
#   t-based confidence interval:
#   2.5%      97.5%
#   ILmax 72.4716594 97.1379372
# N      0.9634766  0.9742544
#
# ------
#   Correlation matrix:
#   ILmax          N
# ILmax  1.0000000 -0.8074648
# N     -0.8074648  1.0000000
#

# R-square
RSS <- sum(residuals(bobo.mod)^2)
TSS <- sum((bobo.dat$ILs_mm - mean(bobo.dat$ILs_mm))^2)
R.square <- 1 - (RSS/TSS)
R.square
#[1] 0.7934562


deviance(bobo.mod)
# [1] 3907.827

logLik(bobo.mod)
# 'log Lik.' -179.9148 (df=3)

coef(bobo.mod)

# Confidence intervals for parameters

confint(bobo.mod)

nlstools::confint2(bobo.mod, method = "asymptotic")
#             2.5 %     97.5 %
#   ILmax 72.4716594 97.1379372
#   N      0.9634766  0.9742544


nlstools::confint2(bobo.mod, method = "profile")
#              2.5%      97.5%
#   ILmax  72.4065771 98.3413675
#    N      0.9627527  0.9743995


# Bootstrap confidence intervals for parameters
boot.cl <- nlstools::nlsBoot(bobo.mod)
boot.cl$bootCI
#            Median      2.5%      97.5%
#   ILmax 84.7308584 73.086423 97.0838838
#   N      0.9691428  0.963597  0.9741001

# Confidence region
plot(nlstools::nlsConfRegions(bobo.mod))


nlstools::nlsContourRSS(bobo.mod)
plot(nlsContourRSS(bobo.mod), nlev = 4)


# Check assumptions

test.nlsResiduals(nlsResiduals(bobo.mod))

# residuals
# 1. Constant variance
plot(nlsResiduals(bobo.mod)) # not too bad


# 2. Normal errors
#
plot(nlsResiduals(bobo.mod),  which = 6)
# perhaps heavy tailed, could consider a transformation but passes normality test
plot(nlsResiduals(bobo.mod),  which = 5) # looks ok

# 3. Independence
nlstools::test.nlsResiduals(nlsResiduals(bobo.mod)) # autocorrelated
plot(nlsResiduals(bobo.mod), which = 4)
forecast::Acf(resid(bobo.mod))

# This autocorrelation is just an artifact of the way that I've collected the data
# i.e. by digitizing it so its sorted from high IL to low IL

# We can test by randomising the input data and re-running the model

set.seed(2016)
bobo.dat.rand <- bobo.dat[sample(1:50) ,]
bobo.mod.rand <- nls(ILs_mm ~ ILmax*N^API_mm, data = bobo.dat.rand, start = list(ILmax = 80, N = 0.98), trace = TRUE)
test.nlsResiduals(nlsResiduals(bobo.mod.rand))
# passes teh runs test

# influential observations
plot(nlstools::nlsJack(bobo.mod))
summary(nlstools::nlsJack(bobo.mod))

# Bootstrap
plot(nlsBoot(bobo.mod))
plot(nlsBoot(bobo.mod), type = "boxplot")


#_________________________________________________________________________________________________________________
# Confidence intervals for predicted values
# It seems this is not straight forward
# https://rmazing.wordpress.com/2013/08/14/predictnls-part-1-monte-carlo-simulation-confidence-intervals-for-nls-models/
# http://www.inside-r.org/packages/cran/propagate/docs/predictNLS
# https://quantitativeconservationbiology.wordpress.com/2013/07/02/confidence-interval-for-a-model-fitted-with-nls-in-r/
# see http://www.leg.ufpr.br/~walmes/cursoR/ciaeear/as.lm.R
# https://quantitativeconservationbiology.wordpress.com/2013/07/02/confidence-interval-for-a-model-fitted-with-nls-in-r/
# http://thr3ads.net/r-help/2011/05/1053390-plotting-confidence-bands-from-predict.nls

# Using the as.lm approach
# from http://www.leg.ufpr.br/~walmes/cursoR/ciaeear/as.lm.R


# if object is an "nls" object then its often used like this:
# predict(as.lm(object), ...) where ... are any predict.lm args

# as.lm.nls effectively just does this:
# lm(lhs ~ gradient - 1, offset = fitted(object),
#   list(gradient = object$m$gradient(), lhs = object$m$lhs()))
# so most of the code is just to get the names right.


as.lm <- function(object, ...) UseMethod("as.lm")

as.lm.nls <- function(object, ...) {
  if (!inherits(object, "nls")) {
    w <- paste("expected object of class nls but got object of class:",
               paste(class(object), collapse = " "))
    warning(w)
  }

  gradient <- object$m$gradient()
  if (is.null(colnames(gradient))) {
    colnames(gradient) <- names(object$m$getPars())
  }

  response.name <- if (length(formula(object)) == 2) "0" else
    as.character(formula(object)[[2]])

  lhs <- object$m$lhs()
  L <- data.frame(lhs, gradient)
  names(L)[1] <- response.name

  fo <- sprintf("%s ~ %s - 1", response.name,
                paste(colnames(gradient), collapse = "+"))
  fo <- as.formula(fo, env = as.proto.list(L))

  do.call("lm", list(fo, offset = substitute(fitted(object))))

}


# Calculate and add confidence intervals to plot


# Calculate prediction intervals for all the given data points
predCI <- predict(as.lm.nls(bobo.mod), interval = 'prediction', level = 0.95)
predCI

# Then we need to interpolate to make smooth contour intervals

x.seq <- seq(min(bobo.dat$API_mm),max(bobo.dat$API_mm), length.out = 100) # generate 100 points between the max and min of the x values

lwr <- approx(bobo.dat$API_mm, predCI [, 2], xout = x.seq)$y ## lower CI
upr <- approx(bobo.dat$API_mm, predCI[, 3], xout = x.seq)$y ## upper CI

# tidy into data frame
CL.as.lm <- data_frame(x.seq = x.seq,  lwr = lwr, upr = upr)


# Add to plot

API_ILplot +
  geom_smooth(method = "nls", formula = y ~ ILmax*a^x, method.args = list(start = list(ILmax = 80, a = 0.98)), se = FALSE) +
  geom_line(data = CL.as.lm, aes(x = x.seq, y = upr), linetype = 2) +
  geom_line(data = CL.as.lm, aes(x = x.seq, y = lwr), linetype = 2)

#_________________________________________________________________________________________
# Using the predictNLS function from the propagate package.

#help(predictNLS)

# Test for a particular value of API

my.newdata <- data.frame(API_mm = 30)
x <- propagate::predictNLS(bobo.mod, interval = "prediction", newdata = my.newdata, alpha = 0.05)
x

# $summary
#       Prop.Mean.1 Prop.Mean.2 Prop.sd.1 Prop.sd.2 Prop.2.5% Prop.97.5% Sim.Mean   Sim.sd Sim.Median  Sim.MAD Sim.2.5% Sim.97.5%
#   1     32.8341    32.78426  1.616907  1.622614  14.35145   51.21707   32.784 1.621922   32.73954 1.613052 29.72054  36.07428
#
# $prop
# $prop[[1]]
# Results from error propagation:
#   Mean.1    Mean.2      sd.1      sd.2      2.5%     97.5%
#   32.834103 32.784258  1.616907  1.622614 29.616476 35.967704
#
# Results from Monte Carlo simulation:
#   Mean        sd    Median       MAD      2.5%     97.5%
#   32.783996  1.621922 32.739536  1.613052 29.720536 36.074284

# As before, generate a sequence of x values for plotting
x.seq <- seq(min(bobo.dat$API_mm),max(bobo.dat$API_mm), length.out = 100) # generate 100 points between the max and min of the x values
my.newdata <- data.frame(API_mm = x.seq) # 100 API values where CL will be calculated

# calculate confidence limits as each x.seq

CL.predictNLS <- propagate::predictNLS(bobo.mod, interval = "prediction", newdata = my.newdata, alpha = 0.05)$summary

head(CL.predictNLS)

# add to plot
API_ILplot +
  geom_smooth(method = "nls", formula = y ~ ILmax*a^x, method.args = list(start = list(ILmax = 80, a = 0.98)), se = FALSE) +
  geom_line(data = CL.as.lm, aes(x = x.seq, y = upr), linetype = 1) +
  geom_line(data = CL.as.lm, aes(x = x.seq, y = lwr), linetype = 1) +
  geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Prop.2.5%`), linetype = 2, colour = 'Blue') +
  geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Prop.97.5%`), linetype = 2, colour = 'Blue') +
  geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Sim.2.5%`), linetype = 2, colour = 'Green') +
  geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Sim.97.5%`), linetype = 2, colour = 'Green')

# in this case, the prediction intervals are pretty much the same
# The Sim intervals are confidence intervals not prediction intervals

# Plot for blog

API_ILplot +
  geom_smooth(method = "nls", formula = y ~ ILmax*a^x, method.args = list(start = list(ILmax = 80, a = 0.98)), se = FALSE) +
  geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Prop.2.5%`), linetype = 2, colour = 'Blue') +
  geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Prop.97.5%`), linetype = 2, colour = 'Blue') +
  geom_hline(yintercept = 0)

# When does the lower prediction interval become negative

x.seq[which.min(abs(CL.predictNLS$`Prop.2.5%`))]
# [1] 47.9853

#_________________________________________________________________________
# Other models
# Log-Log model
bobo.mod.log <- lm(log(ILs_mm) ~ log(API_mm), data = bobo.dat)

bobo.mod.log
summary(bobo.mod.log)


# Check residuals
plot(bobo.mod.log)
# Comparison of Log-Log model and Cordery's model

resid(bobo.mod.log)
Acf(resid(bobo.mod.log)) # some autocorrelation


# Compare models with AIC
AIC(bobo.mod.log)
# [1] 77.61382

AIC(bobo.mod)
# [1] 365.8295

# So the log-log model is better (at least according to the AIC)

# Calculate prediction intervals and plot

x.seq <- seq(min(bobo.dat$API_mm),max(bobo.dat$API_mm), length.out = 100) # generate 100 points between the max and min of the x values
my.newdata <- data.frame(API_mm = x.seq) # 100 API values where CL will be calculated

CL.log_log <- as.data.frame(predict(bobo.mod.log, newdata = my.newdata, interval = 'prediction' ))


API_ILplot +
  geom_line(data = CL.log_log, aes(x = x.seq, y = exp(fit)), linetype = 1, colour = 'Blue') +
  geom_line(data = CL.log_log, aes(x = x.seq, y = exp(lwr)), linetype = 2, colour = 'Blue') +
  geom_line(data = CL.log_log, aes(x = x.seq, y = exp(upr)), linetype = 2, colour = 'Blue') +
  scale_y_continuous(name = 'Initial loss (mm)', limits = c(1, 100))
	###########################################################################################
	#
	# Fitting non-linear models in R
	# See tonyladson
	#
	# 5 June 2016
	###############################################################################################
	#
	# Example data come from Figure 5 from Ian Cordery's thesis
	# Also Figure 3 from Cordery, 1970
	# Cordery, I. (1968). Improved techniques in flood estimation and forecasting.
	# PhD, The University of New South Wales.
	#
	# Cordery, I. (1970). "Initial loss for flood estimation and forecasting."
	# Journal of the Hydraulics Division, American Society of Civil Engineers 96(12): 2447-2466.

	#remove(list = objects())

	library(ggplot2)
	library(dplyr)
	library(propagate)
	library(nlstools)
	library(nls2) # this loads the proto package which is used to calcualte confidence intervals
	library(fpp) # for the Acf function
	library(car)
	library(ggExtra)



	# Data from digitizing Figure 5 from Cordery, 1968
	# See http://www.unsworks.unsw.edu.au/primo_library/libweb/action/dlDisplay.do?vid=UNSWORKS&docId=unsworks_36997

	Fig5 <-
	structure(list(x = c(9.64021986421365, 37.5464569595027, 25.7273447779685,
	46.7390997673626, 104.193117316487, 108.132821376998, 86.1592245759561,
	81.2589611595452, 76.0060224121967, 93.4063820127887, 79.6033107877613,
	90.7658056258769, 120.32769309295, 111.463358956799, 125.908940512008,
	120.656001764659, 151.188708233622, 147.57731284482, 139.041287380379,
	143.3093001126, 147.591419858058, 148.576345873186, 174.855146623166,
	155.79913665079, 208.328524124275, 203.732202720345, 184.361991089498,
	188.60563716249, 180.726229041467, 197.812386983587, 201.09547370068,
	236.895225930229, 247.729412096636, 273.994105833378, 253.967276859112,
	251.340807485438, 269.055368744501, 306.48255731936, 318.301669500894,
	320.27152153115, 342.92481987909, 336.030337773195, 335.702029101486,
	333.089666741049, 318.315776514131, 322.912097918061, 410.242204592731,
	395.782516024285, 411.54133226633, 432.224778584015),
	y = c(2.54132343897037, 2.50762114884014, 2.38923518377857, 2.41501285415006, 2.30416887155265,
	2.23724762261166, 2.18569228186868, 2.17557253236737, 2.18588360051597,
	2.00543990791553, 1.93068466383821, 1.86624048790948, 2.03906164282585,
	2.10866135282887, 2.162794460609, 2.09835028468028, 2.19888319912909,
	2.17288400190285, 2.03626234893394, 1.88406333031477, 1.76033051253161,
	1.73197507512297, 1.99748507994933, 2.01552944920937, 2.18308430662406,
	1.94850750624349, 1.84024129068323, 1.86601896105472, 1.88148556327762,
	1.75259721142016, 1.75259721142016, 1.801574785126, 1.77321934771736,
	1.78868594994025, 1.4973982747424, 1.47162060437091, 1.22684349926912,
	1.43287354359376, 1.85047180361192, 1.61073946915705, 1.64167267360284,
	1.56433966248837, 1.48937296095878, 1.25479616057821, 1.20066305279808,
	1.04341926353198, 1.56670597207325, 1.41461771688145, 1.07693023501492,
	1.07950800205207)),
	.Names = c("x", "y"),
	row.names = c(NA, -50L),
	class = c("tbl_df", "tbl", "data.frame"))



	# Create data frame from digitised data

	bobo.dat <- Fig5 %>% # Bobo River
	mutate(API = x) %>%
	mutate(ILs = 10^y) %>% # ILs is on a log scale
	mutate(ILs_mm = ILs/100*25.4) %>% # convert from points (inches/100) to mm
	mutate(API_mm = API/100*25.4)


	# Make a plot the initial loss, API relationship

	API_ILplot <- bobo.dat %>%
	ggplot(aes(x = API_mm, y = ILs_mm)) + geom_point() +
	theme_bw() +
	scale_x_continuous(name = 'API (mm)') +
	scale_y_continuous(name = 'Initial loss (mm)') +
	theme(
	panel.background = element_rect(fill="gray98"),
	axis.title.x = element_text(colour="grey20", size=16, margin=margin(20,0,0,0)),
	axis.text.x = element_text(colour="grey20",size=14),
	axis.title.y = element_text(colour="grey20",size=16, margin = margin(0,20,0,0)),
	axis.text.y = element_text(colour="grey20",size=14),
	legend.title = element_text(colour="grey20",size=12),
	plot.margin = unit(c(0.5, 0.5, 1, 0.5), "cm")) # top, right, bottom, left

	# Draw the plot

	API_ILplot

	# Check marginal histograms
	#ggExtra::ggMarginal(API_ILplot, type = "histogram")



	# Model
	# From Cordery

	# IL = ILmax(N)^API

	# Preview model to check starting values are reasonable

	nlstools::preview(ILs_mm ~ ILmax*N^API_mm, data = bobo.dat, start = list(ILmax = 80, N = 0.98))


	# fit the model using NLS
	bobo.mod <- nls(ILs_mm ~ ILmax*N^API_mm, data = bobo.dat, start = list(ILmax = 80, N = 0.98), trace = TRUE)


	# add fitted model to plot
	# geom_smooth can use nls and add the fitted model or use nlstools::plotfit

	API_ILplot +
	geom_smooth(method = "nls", formula = y ~ ILmax*a^x, method.args = list(start = list(ILmax = 84, a = 0.97)), se = FALSE)


	# Check model

	summary(bobo.mod)

	nlstools::overview(bobo.mod)
	#
	# ------
	# Formula: ILs_mm ~ ILmax * N^API_mm
	#
	# Parameters:
	# Estimate Std. Error t value Pr(>\|t\|)
	# ILmax 84.80480 6.13395 13.82 <2e-16 ***
	# N 0.96886 0.00268 361.49 <2e-16 ***
	# ---
	# Signif. codes: 0 ‘*’ 0.001 ‘’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
	#
	# Residual standard error: 9.023 on 48 degrees of freedom
	#
	# Number of iterations to convergence: 7
	# Achieved convergence tolerance: 1.921e-06
	#
	# ------
	# Residual sum of squares: 3910
	#
	# ------
	# t-based confidence interval:
	# 2.5% 97.5%
	# ILmax 72.4716594 97.1379372
	# N 0.9634766 0.9742544
	#
	# ------
	# Correlation matrix:
	# ILmax N
	# ILmax 1.0000000 -0.8074648
	# N -0.8074648 1.0000000
	#

	# R-square
	RSS <- sum(residuals(bobo.mod)^2)
	TSS <- sum((bobo.dat$ILs_mm - mean(bobo.dat$ILs_mm))^2)
	R.square <- 1 - (RSS/TSS)
	R.square
	#[1] 0.7934562


	deviance(bobo.mod)
	# [1] 3907.827

	logLik(bobo.mod)
	# 'log Lik.' -179.9148 (df=3)

	coef(bobo.mod)

	# Confidence intervals for parameters

	confint(bobo.mod)

	nlstools::confint2(bobo.mod, method = "asymptotic")
	# 2.5 % 97.5 %
	# ILmax 72.4716594 97.1379372
	# N 0.9634766 0.9742544


	nlstools::confint2(bobo.mod, method = "profile")
	# 2.5% 97.5%
	# ILmax 72.4065771 98.3413675
	# N 0.9627527 0.9743995


	# Bootstrap confidence intervals for parameters
	boot.cl <- nlstools::nlsBoot(bobo.mod)
	boot.cl$bootCI
	# Median 2.5% 97.5%
	# ILmax 84.7308584 73.086423 97.0838838
	# N 0.9691428 0.963597 0.9741001

	# Confidence region
	plot(nlstools::nlsConfRegions(bobo.mod))


	nlstools::nlsContourRSS(bobo.mod)
	plot(nlsContourRSS(bobo.mod), nlev = 4)


	# Check assumptions

	test.nlsResiduals(nlsResiduals(bobo.mod))

	# residuals
	# 1. Constant variance
	plot(nlsResiduals(bobo.mod)) # not too bad


	# 2. Normal errors
	#
	plot(nlsResiduals(bobo.mod), which = 6)
	# perhaps heavy tailed, could consider a transformation but passes normality test
	plot(nlsResiduals(bobo.mod), which = 5) # looks ok

	# 3. Independence
	nlstools::test.nlsResiduals(nlsResiduals(bobo.mod)) # autocorrelated
	plot(nlsResiduals(bobo.mod), which = 4)
	forecast::Acf(resid(bobo.mod))

	# This autocorrelation is just an artifact of the way that I've collected the data
	# i.e. by digitizing it so its sorted from high IL to low IL

	# We can test by randomising the input data and re-running the model

	set.seed(2016)
	bobo.dat.rand <- bobo.dat[sample(1:50) ,]
	bobo.mod.rand <- nls(ILs_mm ~ ILmax*N^API_mm, data = bobo.dat.rand, start = list(ILmax = 80, N = 0.98), trace = TRUE)
	test.nlsResiduals(nlsResiduals(bobo.mod.rand))
	# passes teh runs test

	# influential observations
	plot(nlstools::nlsJack(bobo.mod))
	summary(nlstools::nlsJack(bobo.mod))

	# Bootstrap
	plot(nlsBoot(bobo.mod))
	plot(nlsBoot(bobo.mod), type = "boxplot")



	#_________________________________________________________________________________________________________________
	# Confidence intervals for predicted values
	# It seems this is not straight forward
	# https://rmazing.wordpress.com/2013/08/14/predictnls-part-1-monte-carlo-simulation-confidence-intervals-for-nls-models/
	# http://www.inside-r.org/packages/cran/propagate/docs/predictNLS
	# https://quantitativeconservationbiology.wordpress.com/2013/07/02/confidence-interval-for-a-model-fitted-with-nls-in-r/
	# see http://www.leg.ufpr.br/~walmes/cursoR/ciaeear/as.lm.R
	# https://quantitativeconservationbiology.wordpress.com/2013/07/02/confidence-interval-for-a-model-fitted-with-nls-in-r/
	# http://thr3ads.net/r-help/2011/05/1053390-plotting-confidence-bands-from-predict.nls

	# Using the as.lm approach
	# from http://www.leg.ufpr.br/~walmes/cursoR/ciaeear/as.lm.R


	# if object is an "nls" object then its often used like this:
	# predict(as.lm(object), ...) where ... are any predict.lm args

	# as.lm.nls effectively just does this:
	# lm(lhs ~ gradient - 1, offset = fitted(object),
	# list(gradient = object$m$gradient(), lhs = object$m$lhs()))
	# so most of the code is just to get the names right.


	as.lm <- function(object, ...) UseMethod("as.lm")

	as.lm.nls <- function(object, ...) {
	if (!inherits(object, "nls")) {
	w <- paste("expected object of class nls but got object of class:",
	paste(class(object), collapse = " "))
	warning(w)
	}

	gradient <- object$m$gradient()
	if (is.null(colnames(gradient))) {
	colnames(gradient) <- names(object$m$getPars())
	}

	response.name <- if (length(formula(object)) == 2) "0" else
	as.character(formula(object)[[2]])

	lhs <- object$m$lhs()
	L <- data.frame(lhs, gradient)
	names(L)[1] <- response.name

	fo <- sprintf("%s ~ %s - 1", response.name,
	paste(colnames(gradient), collapse = "+"))
	fo <- as.formula(fo, env = as.proto.list(L))

	do.call("lm", list(fo, offset = substitute(fitted(object))))

	}




	# Calculate and add confidence intervals to plot


	# Calculate prediction intervals for all the given data points
	predCI <- predict(as.lm.nls(bobo.mod), interval = 'prediction', level = 0.95)
	predCI

	# Then we need to interpolate to make smooth contour intervals

	x.seq <- seq(min(bobo.dat$API_mm),max(bobo.dat$API_mm), length.out = 100) # generate 100 points between the max and min of the x values

	lwr <- approx(bobo.dat$API_mm, predCI [, 2], xout = x.seq)$y ## lower CI
	upr <- approx(bobo.dat$API_mm, predCI[, 3], xout = x.seq)$y ## upper CI

	# tidy into data frame
	CL.as.lm <- data_frame(x.seq = x.seq, lwr = lwr, upr = upr)


	# Add to plot

	API_ILplot +
	geom_smooth(method = "nls", formula = y ~ ILmax*a^x, method.args = list(start = list(ILmax = 80, a = 0.98)), se = FALSE) +
	geom_line(data = CL.as.lm, aes(x = x.seq, y = upr), linetype = 2) +
	geom_line(data = CL.as.lm, aes(x = x.seq, y = lwr), linetype = 2)

	#_________________________________________________________________________________________
	# Using the predictNLS function from the propagate package.

	#help(predictNLS)

	# Test for a particular value of API

	my.newdata <- data.frame(API_mm = 30)
	x <- propagate::predictNLS(bobo.mod, interval = "prediction", newdata = my.newdata, alpha = 0.05)
	x

	# $summary
	# Prop.Mean.1 Prop.Mean.2 Prop.sd.1 Prop.sd.2 Prop.2.5% Prop.97.5% Sim.Mean Sim.sd Sim.Median Sim.MAD Sim.2.5% Sim.97.5%
	# 1 32.8341 32.78426 1.616907 1.622614 14.35145 51.21707 32.784 1.621922 32.73954 1.613052 29.72054 36.07428
	#
	# $prop
	# $prop[[1]]
	# Results from error propagation:
	# Mean.1 Mean.2 sd.1 sd.2 2.5% 97.5%
	# 32.834103 32.784258 1.616907 1.622614 29.616476 35.967704
	#
	# Results from Monte Carlo simulation:
	# Mean sd Median MAD 2.5% 97.5%
	# 32.783996 1.621922 32.739536 1.613052 29.720536 36.074284

	# As before, generate a sequence of x values for plotting
	x.seq <- seq(min(bobo.dat$API_mm),max(bobo.dat$API_mm), length.out = 100) # generate 100 points between the max and min of the x values
	my.newdata <- data.frame(API_mm = x.seq) # 100 API values where CL will be calculated

	# calculate confidence limits as each x.seq

	CL.predictNLS <- propagate::predictNLS(bobo.mod, interval = "prediction", newdata = my.newdata, alpha = 0.05)$summary

	head(CL.predictNLS)

	# add to plot
	API_ILplot +
	geom_smooth(method = "nls", formula = y ~ ILmax*a^x, method.args = list(start = list(ILmax = 80, a = 0.98)), se = FALSE) +
	geom_line(data = CL.as.lm, aes(x = x.seq, y = upr), linetype = 1) +
	geom_line(data = CL.as.lm, aes(x = x.seq, y = lwr), linetype = 1) +
	geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Prop.2.5%`), linetype = 2, colour = 'Blue') +
	geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Prop.97.5%`), linetype = 2, colour = 'Blue') +
	geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Sim.2.5%`), linetype = 2, colour = 'Green') +
	geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Sim.97.5%`), linetype = 2, colour = 'Green')

	# in this case, the prediction intervals are pretty much the same
	# The Sim intervals are confidence intervals not prediction intervals

	# Plot for blog

	API_ILplot +
	geom_smooth(method = "nls", formula = y ~ ILmax*a^x, method.args = list(start = list(ILmax = 80, a = 0.98)), se = FALSE) +
	geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Prop.2.5%`), linetype = 2, colour = 'Blue') +
	geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Prop.97.5%`), linetype = 2, colour = 'Blue') +
	geom_hline(yintercept = 0)

	# When does the lower prediction interval become negative

	x.seq[which.min(abs(CL.predictNLS$`Prop.2.5%`))]
	# [1] 47.9853

	#_________________________________________________________________________
	# Other models
	# Log-Log model
	bobo.mod.log <- lm(log(ILs_mm) ~ log(API_mm), data = bobo.dat)

	bobo.mod.log
	summary(bobo.mod.log)


	# Check residuals
	plot(bobo.mod.log)
	# Comparison of Log-Log model and Cordery's model

	resid(bobo.mod.log)
	Acf(resid(bobo.mod.log)) # some autocorrelation


	# Compare models with AIC
	AIC(bobo.mod.log)
	# [1] 77.61382

	AIC(bobo.mod)
	# [1] 365.8295

	# So the log-log model is better (at least according to the AIC)

	# Calculate prediction intervals and plot

	x.seq <- seq(min(bobo.dat$API_mm),max(bobo.dat$API_mm), length.out = 100) # generate 100 points between the max and min of the x values
	my.newdata <- data.frame(API_mm = x.seq) # 100 API values where CL will be calculated

	CL.log_log <- as.data.frame(predict(bobo.mod.log, newdata = my.newdata, interval = 'prediction' ))


	API_ILplot +
	geom_line(data = CL.log_log, aes(x = x.seq, y = exp(fit)), linetype = 1, colour = 'Blue') +
	geom_line(data = CL.log_log, aes(x = x.seq, y = exp(lwr)), linetype = 2, colour = 'Blue') +
	geom_line(data = CL.log_log, aes(x = x.seq, y = exp(upr)), linetype = 2, colour = 'Blue') +
	scale_y_continuous(name = 'Initial loss (mm)', limits = c(1, 100))