Last active
June 9, 2016 01:54
-
-
Save TonyLadson/2d63ca70eef92583001dece607127759 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################################################################### | |
# | |
# Fitting non-linear models in R | |
# See tonyladson | |
# | |
# 5 June 2016 | |
############################################################################################### | |
# | |
# Example data come from Figure 5 from Ian Cordery's thesis | |
# Also Figure 3 from Cordery, 1970 | |
# Cordery, I. (1968). Improved techniques in flood estimation and forecasting. | |
# PhD, The University of New South Wales. | |
# | |
# Cordery, I. (1970). "Initial loss for flood estimation and forecasting." | |
# Journal of the Hydraulics Division, American Society of Civil Engineers 96(12): 2447-2466. | |
#remove(list = objects()) | |
library(ggplot2) | |
library(dplyr) | |
library(propagate) | |
library(nlstools) | |
library(nls2) # this loads the proto package which is used to calcualte confidence intervals | |
library(fpp) # for the Acf function | |
library(car) | |
library(ggExtra) | |
# Data from digitizing Figure 5 from Cordery, 1968 | |
# See http://www.unsworks.unsw.edu.au/primo_library/libweb/action/dlDisplay.do?vid=UNSWORKS&docId=unsworks_36997 | |
Fig5 <- | |
structure(list(x = c(9.64021986421365, 37.5464569595027, 25.7273447779685, | |
46.7390997673626, 104.193117316487, 108.132821376998, 86.1592245759561, | |
81.2589611595452, 76.0060224121967, 93.4063820127887, 79.6033107877613, | |
90.7658056258769, 120.32769309295, 111.463358956799, 125.908940512008, | |
120.656001764659, 151.188708233622, 147.57731284482, 139.041287380379, | |
143.3093001126, 147.591419858058, 148.576345873186, 174.855146623166, | |
155.79913665079, 208.328524124275, 203.732202720345, 184.361991089498, | |
188.60563716249, 180.726229041467, 197.812386983587, 201.09547370068, | |
236.895225930229, 247.729412096636, 273.994105833378, 253.967276859112, | |
251.340807485438, 269.055368744501, 306.48255731936, 318.301669500894, | |
320.27152153115, 342.92481987909, 336.030337773195, 335.702029101486, | |
333.089666741049, 318.315776514131, 322.912097918061, 410.242204592731, | |
395.782516024285, 411.54133226633, 432.224778584015), | |
y = c(2.54132343897037, 2.50762114884014, 2.38923518377857, 2.41501285415006, 2.30416887155265, | |
2.23724762261166, 2.18569228186868, 2.17557253236737, 2.18588360051597, | |
2.00543990791553, 1.93068466383821, 1.86624048790948, 2.03906164282585, | |
2.10866135282887, 2.162794460609, 2.09835028468028, 2.19888319912909, | |
2.17288400190285, 2.03626234893394, 1.88406333031477, 1.76033051253161, | |
1.73197507512297, 1.99748507994933, 2.01552944920937, 2.18308430662406, | |
1.94850750624349, 1.84024129068323, 1.86601896105472, 1.88148556327762, | |
1.75259721142016, 1.75259721142016, 1.801574785126, 1.77321934771736, | |
1.78868594994025, 1.4973982747424, 1.47162060437091, 1.22684349926912, | |
1.43287354359376, 1.85047180361192, 1.61073946915705, 1.64167267360284, | |
1.56433966248837, 1.48937296095878, 1.25479616057821, 1.20066305279808, | |
1.04341926353198, 1.56670597207325, 1.41461771688145, 1.07693023501492, | |
1.07950800205207)), | |
.Names = c("x", "y"), | |
row.names = c(NA, -50L), | |
class = c("tbl_df", "tbl", "data.frame")) | |
# Create data frame from digitised data | |
bobo.dat <- Fig5 %>% # Bobo River | |
mutate(API = x) %>% | |
mutate(ILs = 10^y) %>% # ILs is on a log scale | |
mutate(ILs_mm = ILs/100*25.4) %>% # convert from points (inches/100) to mm | |
mutate(API_mm = API/100*25.4) | |
# Make a plot the initial loss, API relationship | |
API_ILplot <- bobo.dat %>% | |
ggplot(aes(x = API_mm, y = ILs_mm)) + geom_point() + | |
theme_bw() + | |
scale_x_continuous(name = 'API (mm)') + | |
scale_y_continuous(name = 'Initial loss (mm)') + | |
theme( | |
panel.background = element_rect(fill="gray98"), | |
axis.title.x = element_text(colour="grey20", size=16, margin=margin(20,0,0,0)), | |
axis.text.x = element_text(colour="grey20",size=14), | |
axis.title.y = element_text(colour="grey20",size=16, margin = margin(0,20,0,0)), | |
axis.text.y = element_text(colour="grey20",size=14), | |
legend.title = element_text(colour="grey20",size=12), | |
plot.margin = unit(c(0.5, 0.5, 1, 0.5), "cm")) # top, right, bottom, left | |
# Draw the plot | |
API_ILplot | |
# Check marginal histograms | |
#ggExtra::ggMarginal(API_ILplot, type = "histogram") | |
# Model | |
# From Cordery | |
# IL = ILmax(N)^API | |
# Preview model to check starting values are reasonable | |
nlstools::preview(ILs_mm ~ ILmax*N^API_mm, data = bobo.dat, start = list(ILmax = 80, N = 0.98)) | |
# fit the model using NLS | |
bobo.mod <- nls(ILs_mm ~ ILmax*N^API_mm, data = bobo.dat, start = list(ILmax = 80, N = 0.98), trace = TRUE) | |
# add fitted model to plot | |
# geom_smooth can use nls and add the fitted model or use nlstools::plotfit | |
API_ILplot + | |
geom_smooth(method = "nls", formula = y ~ ILmax*a^x, method.args = list(start = list(ILmax = 84, a = 0.97)), se = FALSE) | |
# Check model | |
summary(bobo.mod) | |
nlstools::overview(bobo.mod) | |
# | |
# ------ | |
# Formula: ILs_mm ~ ILmax * N^API_mm | |
# | |
# Parameters: | |
# Estimate Std. Error t value Pr(>|t|) | |
# ILmax 84.80480 6.13395 13.82 <2e-16 *** | |
# N 0.96886 0.00268 361.49 <2e-16 *** | |
# --- | |
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | |
# | |
# Residual standard error: 9.023 on 48 degrees of freedom | |
# | |
# Number of iterations to convergence: 7 | |
# Achieved convergence tolerance: 1.921e-06 | |
# | |
# ------ | |
# Residual sum of squares: 3910 | |
# | |
# ------ | |
# t-based confidence interval: | |
# 2.5% 97.5% | |
# ILmax 72.4716594 97.1379372 | |
# N 0.9634766 0.9742544 | |
# | |
# ------ | |
# Correlation matrix: | |
# ILmax N | |
# ILmax 1.0000000 -0.8074648 | |
# N -0.8074648 1.0000000 | |
# | |
# R-square | |
RSS <- sum(residuals(bobo.mod)^2) | |
TSS <- sum((bobo.dat$ILs_mm - mean(bobo.dat$ILs_mm))^2) | |
R.square <- 1 - (RSS/TSS) | |
R.square | |
#[1] 0.7934562 | |
deviance(bobo.mod) | |
# [1] 3907.827 | |
logLik(bobo.mod) | |
# 'log Lik.' -179.9148 (df=3) | |
coef(bobo.mod) | |
# Confidence intervals for parameters | |
confint(bobo.mod) | |
nlstools::confint2(bobo.mod, method = "asymptotic") | |
# 2.5 % 97.5 % | |
# ILmax 72.4716594 97.1379372 | |
# N 0.9634766 0.9742544 | |
nlstools::confint2(bobo.mod, method = "profile") | |
# 2.5% 97.5% | |
# ILmax 72.4065771 98.3413675 | |
# N 0.9627527 0.9743995 | |
# Bootstrap confidence intervals for parameters | |
boot.cl <- nlstools::nlsBoot(bobo.mod) | |
boot.cl$bootCI | |
# Median 2.5% 97.5% | |
# ILmax 84.7308584 73.086423 97.0838838 | |
# N 0.9691428 0.963597 0.9741001 | |
# Confidence region | |
plot(nlstools::nlsConfRegions(bobo.mod)) | |
nlstools::nlsContourRSS(bobo.mod) | |
plot(nlsContourRSS(bobo.mod), nlev = 4) | |
# Check assumptions | |
test.nlsResiduals(nlsResiduals(bobo.mod)) | |
# residuals | |
# 1. Constant variance | |
plot(nlsResiduals(bobo.mod)) # not too bad | |
# 2. Normal errors | |
# | |
plot(nlsResiduals(bobo.mod), which = 6) | |
# perhaps heavy tailed, could consider a transformation but passes normality test | |
plot(nlsResiduals(bobo.mod), which = 5) # looks ok | |
# 3. Independence | |
nlstools::test.nlsResiduals(nlsResiduals(bobo.mod)) # autocorrelated | |
plot(nlsResiduals(bobo.mod), which = 4) | |
forecast::Acf(resid(bobo.mod)) | |
# This autocorrelation is just an artifact of the way that I've collected the data | |
# i.e. by digitizing it so its sorted from high IL to low IL | |
# We can test by randomising the input data and re-running the model | |
set.seed(2016) | |
bobo.dat.rand <- bobo.dat[sample(1:50) ,] | |
bobo.mod.rand <- nls(ILs_mm ~ ILmax*N^API_mm, data = bobo.dat.rand, start = list(ILmax = 80, N = 0.98), trace = TRUE) | |
test.nlsResiduals(nlsResiduals(bobo.mod.rand)) | |
# passes teh runs test | |
# influential observations | |
plot(nlstools::nlsJack(bobo.mod)) | |
summary(nlstools::nlsJack(bobo.mod)) | |
# Bootstrap | |
plot(nlsBoot(bobo.mod)) | |
plot(nlsBoot(bobo.mod), type = "boxplot") | |
#_________________________________________________________________________________________________________________ | |
# Confidence intervals for predicted values | |
# It seems this is not straight forward | |
# https://rmazing.wordpress.com/2013/08/14/predictnls-part-1-monte-carlo-simulation-confidence-intervals-for-nls-models/ | |
# http://www.inside-r.org/packages/cran/propagate/docs/predictNLS | |
# https://quantitativeconservationbiology.wordpress.com/2013/07/02/confidence-interval-for-a-model-fitted-with-nls-in-r/ | |
# see http://www.leg.ufpr.br/~walmes/cursoR/ciaeear/as.lm.R | |
# https://quantitativeconservationbiology.wordpress.com/2013/07/02/confidence-interval-for-a-model-fitted-with-nls-in-r/ | |
# http://thr3ads.net/r-help/2011/05/1053390-plotting-confidence-bands-from-predict.nls | |
# Using the as.lm approach | |
# from http://www.leg.ufpr.br/~walmes/cursoR/ciaeear/as.lm.R | |
# if object is an "nls" object then its often used like this: | |
# predict(as.lm(object), ...) where ... are any predict.lm args | |
# as.lm.nls effectively just does this: | |
# lm(lhs ~ gradient - 1, offset = fitted(object), | |
# list(gradient = object$m$gradient(), lhs = object$m$lhs())) | |
# so most of the code is just to get the names right. | |
as.lm <- function(object, ...) UseMethod("as.lm") | |
as.lm.nls <- function(object, ...) { | |
if (!inherits(object, "nls")) { | |
w <- paste("expected object of class nls but got object of class:", | |
paste(class(object), collapse = " ")) | |
warning(w) | |
} | |
gradient <- object$m$gradient() | |
if (is.null(colnames(gradient))) { | |
colnames(gradient) <- names(object$m$getPars()) | |
} | |
response.name <- if (length(formula(object)) == 2) "0" else | |
as.character(formula(object)[[2]]) | |
lhs <- object$m$lhs() | |
L <- data.frame(lhs, gradient) | |
names(L)[1] <- response.name | |
fo <- sprintf("%s ~ %s - 1", response.name, | |
paste(colnames(gradient), collapse = "+")) | |
fo <- as.formula(fo, env = as.proto.list(L)) | |
do.call("lm", list(fo, offset = substitute(fitted(object)))) | |
} | |
# Calculate and add confidence intervals to plot | |
# Calculate prediction intervals for all the given data points | |
predCI <- predict(as.lm.nls(bobo.mod), interval = 'prediction', level = 0.95) | |
predCI | |
# Then we need to interpolate to make smooth contour intervals | |
x.seq <- seq(min(bobo.dat$API_mm),max(bobo.dat$API_mm), length.out = 100) # generate 100 points between the max and min of the x values | |
lwr <- approx(bobo.dat$API_mm, predCI [, 2], xout = x.seq)$y ## lower CI | |
upr <- approx(bobo.dat$API_mm, predCI[, 3], xout = x.seq)$y ## upper CI | |
# tidy into data frame | |
CL.as.lm <- data_frame(x.seq = x.seq, lwr = lwr, upr = upr) | |
# Add to plot | |
API_ILplot + | |
geom_smooth(method = "nls", formula = y ~ ILmax*a^x, method.args = list(start = list(ILmax = 80, a = 0.98)), se = FALSE) + | |
geom_line(data = CL.as.lm, aes(x = x.seq, y = upr), linetype = 2) + | |
geom_line(data = CL.as.lm, aes(x = x.seq, y = lwr), linetype = 2) | |
#_________________________________________________________________________________________ | |
# Using the predictNLS function from the propagate package. | |
#help(predictNLS) | |
# Test for a particular value of API | |
my.newdata <- data.frame(API_mm = 30) | |
x <- propagate::predictNLS(bobo.mod, interval = "prediction", newdata = my.newdata, alpha = 0.05) | |
x | |
# $summary | |
# Prop.Mean.1 Prop.Mean.2 Prop.sd.1 Prop.sd.2 Prop.2.5% Prop.97.5% Sim.Mean Sim.sd Sim.Median Sim.MAD Sim.2.5% Sim.97.5% | |
# 1 32.8341 32.78426 1.616907 1.622614 14.35145 51.21707 32.784 1.621922 32.73954 1.613052 29.72054 36.07428 | |
# | |
# $prop | |
# $prop[[1]] | |
# Results from error propagation: | |
# Mean.1 Mean.2 sd.1 sd.2 2.5% 97.5% | |
# 32.834103 32.784258 1.616907 1.622614 29.616476 35.967704 | |
# | |
# Results from Monte Carlo simulation: | |
# Mean sd Median MAD 2.5% 97.5% | |
# 32.783996 1.621922 32.739536 1.613052 29.720536 36.074284 | |
# As before, generate a sequence of x values for plotting | |
x.seq <- seq(min(bobo.dat$API_mm),max(bobo.dat$API_mm), length.out = 100) # generate 100 points between the max and min of the x values | |
my.newdata <- data.frame(API_mm = x.seq) # 100 API values where CL will be calculated | |
# calculate confidence limits as each x.seq | |
CL.predictNLS <- propagate::predictNLS(bobo.mod, interval = "prediction", newdata = my.newdata, alpha = 0.05)$summary | |
head(CL.predictNLS) | |
# add to plot | |
API_ILplot + | |
geom_smooth(method = "nls", formula = y ~ ILmax*a^x, method.args = list(start = list(ILmax = 80, a = 0.98)), se = FALSE) + | |
geom_line(data = CL.as.lm, aes(x = x.seq, y = upr), linetype = 1) + | |
geom_line(data = CL.as.lm, aes(x = x.seq, y = lwr), linetype = 1) + | |
geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Prop.2.5%`), linetype = 2, colour = 'Blue') + | |
geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Prop.97.5%`), linetype = 2, colour = 'Blue') + | |
geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Sim.2.5%`), linetype = 2, colour = 'Green') + | |
geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Sim.97.5%`), linetype = 2, colour = 'Green') | |
# in this case, the prediction intervals are pretty much the same | |
# The Sim intervals are confidence intervals not prediction intervals | |
# Plot for blog | |
API_ILplot + | |
geom_smooth(method = "nls", formula = y ~ ILmax*a^x, method.args = list(start = list(ILmax = 80, a = 0.98)), se = FALSE) + | |
geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Prop.2.5%`), linetype = 2, colour = 'Blue') + | |
geom_line(data = CL.predictNLS, aes(x = x.seq, y = `Prop.97.5%`), linetype = 2, colour = 'Blue') + | |
geom_hline(yintercept = 0) | |
# When does the lower prediction interval become negative | |
x.seq[which.min(abs(CL.predictNLS$`Prop.2.5%`))] | |
# [1] 47.9853 | |
#_________________________________________________________________________ | |
# Other models | |
# Log-Log model | |
bobo.mod.log <- lm(log(ILs_mm) ~ log(API_mm), data = bobo.dat) | |
bobo.mod.log | |
summary(bobo.mod.log) | |
# Check residuals | |
plot(bobo.mod.log) | |
# Comparison of Log-Log model and Cordery's model | |
resid(bobo.mod.log) | |
Acf(resid(bobo.mod.log)) # some autocorrelation | |
# Compare models with AIC | |
AIC(bobo.mod.log) | |
# [1] 77.61382 | |
AIC(bobo.mod) | |
# [1] 365.8295 | |
# So the log-log model is better (at least according to the AIC) | |
# Calculate prediction intervals and plot | |
x.seq <- seq(min(bobo.dat$API_mm),max(bobo.dat$API_mm), length.out = 100) # generate 100 points between the max and min of the x values | |
my.newdata <- data.frame(API_mm = x.seq) # 100 API values where CL will be calculated | |
CL.log_log <- as.data.frame(predict(bobo.mod.log, newdata = my.newdata, interval = 'prediction' )) | |
API_ILplot + | |
geom_line(data = CL.log_log, aes(x = x.seq, y = exp(fit)), linetype = 1, colour = 'Blue') + | |
geom_line(data = CL.log_log, aes(x = x.seq, y = exp(lwr)), linetype = 2, colour = 'Blue') + | |
geom_line(data = CL.log_log, aes(x = x.seq, y = exp(upr)), linetype = 2, colour = 'Blue') + | |
scale_y_continuous(name = 'Initial loss (mm)', limits = c(1, 100)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment