Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# Markus Gesmann, January 2013
# See also: http://lamages.blogspot.co.uk/2013/01/reserving-based-on-log-incremental_22.html
log.incr.predict <- function(
model, # lm output
newdata, # same argument as in predict
claims.inflation=0, # Assumed inflation (scalar)
volume.index=NULL, # name of the v.i. column in newdata
origin.var="origin", # name of the origin column in newdata
dev.var="dev", # name of the dev column in newdata
cal.var="cal" # name of the cal. period col. in newdata
){
origin <- newdata[[origin.var]]
dev <- newdata[[dev.var]]
cal <- newdata[[cal.var]]
if(is.null(volume.index)){
index <- 1
}else{
index <- newdata[[volume.index]]
}
Pred <- predict(model, newdata=newdata, se.fit=TRUE)
Y <- Pred$fit
VarY <- Pred$se.fit^2 + Pred$residual.scale^2
P <- exp(Y + VarY/2)
P <- P*index*(1 + claims.inflation)^(cal - min(cal) + 1)
VarP <- P^2*(exp(VarY)-1)
seP <- sqrt(VarP)
## Recreate formula to derive the model.frame and future design matrix
model.formula <- as.formula(paste("~", as.character(formula(model)[3])))
## See also package formula.tool
mframe <- model.frame(model.formula, data=newdata)
fdm <- model.matrix(model.formula, data=newdata)
varcovar <- fdm %*% vcov(model) %*% t(fdm)
Total.SE <- sqrt( t(P) %*% (exp(varcovar)-1) %*% P )
Total.Reserve <- sum(P)
# Prepare output
Incr=data.frame(origin, dev, Y, VarY, P, seP, CV=seP/P)
out <- list(Forecast=Incr[order(newdata[[origin.var]]),],
Totals=data.frame(Total.Reserve,
Total.SE=Total.SE,
CV=Total.SE/Total.Reserve))
return(out)
}
# Resdiual plots
resPlot <- function(model,
data,
rhs=c("origin", "dev", "cal")){
op <- par(mfrow=c(2,2),oma = c(0, 0, 3, 0))
dt <- na.omit(data)
plot.default(rstandard(model) ~ dt[[rhs[1]]],
main=paste("Residuals vs.", rhs[1], "years"),
xlab=rhs[1], ylab="Standardised Residuals")
panel.smooth(y=rstandard(model), x=dt[[rhs[1]]])
abline(h=0, lty=2)
plot.default(rstandard(model) ~ dt[[rhs[2]]],
main=paste("Residuals vs.", rhs[2], "years"),
xlab=rhs[2], ylab="Standardised Residuals")
panel.smooth(y=rstandard(model), x=dt[[rhs[2]]])
abline(h=0, lty=2)
plot.default(rstandard(model) ~ dt[[rhs[3]]],
main=paste("Residuals vs.", rhs[3], "years"),
xlab=rhs[3], ylab="Standardised Residuals")
panel.smooth(y=rstandard(model), x=dt[[rhs[3]]])
abline(h=0, lty=2)
yname <- names(model.frame(model))[1]
plot.default(rstandard(model) ~ model.frame(model)[[yname]],
main="Residuals vs. fitted",
xlab=yname, ylab="Standardised Residuals")
panel.smooth(y=rstandard(model), x=fitted(model))
abline(h=0, lty=2)
mtext(as.character(model$call)[2], outer = TRUE, cex = 1.2)
par(op)
}
# Page D5.17
tri <- t(matrix(
c(3511, 3215, 2266, 1712, 1059, 587, 340,
4001, 3702, 2278, 1180, 956, 629, NA,
4355, 3932, 1946, 1522, 1238, NA, NA,
4295, 3455, 2023, 1320, NA, NA, NA,
4150, 3747, 2320, NA, NA, NA, NA,
5102, 4548, NA, NA, NA, NA, NA,
6283, NA, NA, NA, NA, NA, NA), nc=7))
m <- dim(tri)[1]; n <- dim(tri)[2]
dat <- data.frame(
origin=rep(0:(m-1), n),
dev=rep(0:(n-1), each=m),
value=as.vector(tri))
## Add dimensions as factors
dat <- within(dat, {
originf <- factor(origin)
devf <- as.factor(dev)
calf <- as.factor(origin+dev)
a6 <- ifelse(origin == 6, 1, 0)
a5 <- ifelse(origin == 5, 1, 0)
d <- ifelse(dev < 1, 1, 0)
s <- ifelse(dev < 1, 0, dev)
cal <- origin+dev
logvalue <- log(value)
})
dat <- dat[order(dat$origin),]
# Page D5.36
ClaimsVolume <- data.frame(
origin=0:6,
volume.index=c(1.43, 1.45, 1.52, 1.35, 1.29, 1.47, 1.91))
# Page D5.36
EarningIndex <- data.frame(
cal=0:6,
earning.index=c(1.55, 1.41, 1.3, 1.23, 1.13, 1.05, 1))
dat <- merge(merge(dat, ClaimsVolume), EarningIndex)
# Normalise data for volume and earnings
dat$logvalue.ind.inf <- with(dat, log(value/volume.index*earning.index))
with(dat,
interaction.plot(dev, origin, logvalue.ind.inf))
points(1+dat$dev, dat$logvalue.ind.inf, pch=16, cex=0.8)
summary(Fit4 <- lm(logvalue.ind.inf ~ d + s, data=na.omit(dat)))
resPlot(Fit4, dat)
op <- par(mfrow=c(2,2),oma = c(0, 0, 3, 0))
plot(Fit4)
par(op)
summary(Fit5 <- lm(logvalue.ind.inf ~ a6 + d + s, data=na.omit(dat)))
resPlot(Fit5, dat)
op <- par(mfrow=c(2,2),oma = c(0, 0, 3, 0))
plot(Fit5)
par(op)
# Tail
tail.years <- 6
# Create a data frame for the future periods
fdat <- data.frame(
origin=rep(0:(m-1), n+tail.years),
dev=rep(0:(n+tail.years-1), each=m)
)
fdat <- within(fdat,{
a6 <- ifelse(origin == 6, 1, 0)
d <- ifelse(dev < 1, 1, 0)
s <- ifelse(dev < 1, 0, dev)
cal <- origin + dev
})
ND <- subset(fdat, cal>6)
ND <- merge(ND, ClaimsVolume)
FM4 <- log.incr.predict(Fit4, ND,
claims.inflation=0.075,
volume.index="volume.index")
# Page D5.41
round(xtabs(P ~ origin + dev, data=FM4$Forecast),0)
round(xtabs(seP ~ origin + dev, data=FM4$Forecast),0)
FM4$Totals
# Page D5.42
FM5 <- log.incr.predict(Fit5, ND,
claims.inflation=0.075,
volume.index="volume.index")
round(xtabs(P ~ origin + dev, data=FM5$Forecast),0)
FM5$Totals
###############################################################################
# Glen Barnett and Ben Zehnwirth.
# Best estimates for reserves.
# Proceedings of the CAS, LXXXVII(167), November 2000.
library(ChainLadder) ## for the example data
dat <- as.data.frame(M3IR5)
# Example triangle from:
# Appendix A7 in B. Zehnwirth. Probabilistic Development
# Factor Models with Applications to Loss Reserve Variability,
# Prediction Intervals, and Risk Based Capital.
# Casualty Actuarial Science Forum. Spring 1994. Vol. 2.
# http://www.casact.org/pubs/forum/94spforum/94spf447.pdf
dat <- within(dat, {
cal <- origin + dev -min(origin) - 1
dev <- dev - 1
})
with(dat,
interaction.plot(x.factor=dev, trace.factor=origin,
response=log(value))
)
# Table 3.1 page 274 of Barnett & Zehnwirth, 2000
summary(m4 <- lm(log(value) ~ dev + cal, data=dat))
resPlot(m4, dat)
op <- par(mfrow=c(2,2),oma = c(0, 0, 3, 0))
plot(m4)
par(op)
# Table 3.2, page 277 of Barnett & Zehnwirth, 2000
summary(
m6 <- lm(log(value)
~ dev
+ ifelse(cal > 4, 4, cal)
+ ifelse(cal <= 4, 0, 1)
+ ifelse(cal <= 5, 0, cal - 5),
data=dat ))
# More formally with new columns for the dummy variables
dat <- within(dat, {
cal_78_82 <- ifelse(cal > 4, 4, cal)
cal_82_83 <- ifelse(cal <= 4, 0, 1)
cal_83_91 <- ifelse(cal <= 5, 0, cal - 5)
})
# Table 3.2, page 277
summary(
m6 <- lm(log(value) ~ dev + cal_78_82 + cal_82_83 + cal_83_91, data=dat )
)
resPlot(m6, dat)
op <- par(mfrow=c(2,2),oma = c(0, 0, 3, 0))
plot(m6)
par(op)
# Prediction
FF <- log.incr.predict(m6, dat[is.na(dat$value),])
FF$Totals
# Test stability by taking out most recent calendar yeards
# Table 3.3, page 277
M <- sapply(0:4, function(i){
model <- lm(log(value) ~ dev + cal_78_82 + cal_82_83 + cal_83_91,
data=subset(dat, cal < (14-i)))
return(
c(coef(model),
log.incr.predict(model, dat[is.na(dat$value),])$Totals)
)
})
M <- as.data.frame(t(M))
rownames(M) <- paste(78, 91:87, sep="-")
M
# (Intercept) dev cal_78_82 cal_82_83 cal_83_91 Total.Reserve Total.SE CV
# 78-91 11.53213 -0.2062011 0.08731473 0.3927321 0.1446351 23432162 930057.2 0.03969148
# 78-90 11.53213 -0.2074902 0.08795925 0.3726737 0.1526735 25339572 1193249 0.04709035
# 78-89 11.53213 -0.2085994 0.08851386 0.3779634 0.1512344 24857942 1528294 0.0614811
# 78-88 11.53213 -0.2118672 0.09014776 0.3706285 0.1574523 26303546 1998992 0.07599705
# 78-87 11.53213 -0.2130568 0.09074254 0.3739596 0.1562765 25903783 2871096 0.1108369
# The output above is slightly different from the paper,
# but I wonder if this is caused by roundings, or the fact that
# I typed the data from a paper copy into R and lost precision.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment