mages/Reserving_based_on_log-incremental_payments3.R

## Reserving_based_on_log-incremental_payments3.R
# Markus Gesmann, January 2013
# See also: http://lamages.blogspot.co.uk/2013/01/reserving-based-on-log-incremental_22.html
log.incr.predict <- function(
  model, # lm output
  newdata, # same argument as in predict
  claims.inflation=0, # Assumed inflation (scalar)
  volume.index=NULL, # name of the v.i. column in newdata
  origin.var="origin", # name of the origin column in newdata
  dev.var="dev", # name of the dev column in newdata
  cal.var="cal" # name of the cal. period col. in newdata
){
  origin <- newdata[[origin.var]]
  dev <- newdata[[dev.var]]
  cal <- newdata[[cal.var]]
  if(is.null(volume.index)){
    index <- 1
  }else{
    index <- newdata[[volume.index]]
  }
  Pred <- predict(model, newdata=newdata, se.fit=TRUE)

  Y <- Pred$fit
  VarY <- Pred$se.fit^2 + Pred$residual.scale^2
  P <- exp(Y + VarY/2)
  P <- P*index*(1 + claims.inflation)^(cal - min(cal) + 1)
  VarP <-  P^2*(exp(VarY)-1)
  seP <- sqrt(VarP)

  ## Recreate formula to derive the model.frame and future design matrix
  model.formula <- as.formula(paste("~", as.character(formula(model)[3])))
  ## See also package formula.tool

  mframe <- model.frame(model.formula, data=newdata)
  fdm <- model.matrix(model.formula, data=newdata)
  varcovar <- fdm %*% vcov(model) %*% t(fdm)
  Total.SE <- sqrt( t(P) %*% (exp(varcovar)-1) %*% P )
  Total.Reserve <- sum(P)

  # Prepare output
  Incr=data.frame(origin, dev, Y, VarY, P, seP, CV=seP/P)
  out <- list(Forecast=Incr[order(newdata[[origin.var]]),],
              Totals=data.frame(Total.Reserve,
                                Total.SE=Total.SE,
                                CV=Total.SE/Total.Reserve))
  return(out)
}


# Resdiual plots
resPlot <- function(model,
                    data,
                    rhs=c("origin", "dev", "cal")){

  op <- par(mfrow=c(2,2),oma = c(0, 0, 3, 0))

  dt <- na.omit(data)
  plot.default(rstandard(model) ~ dt[[rhs[1]]],
               main=paste("Residuals vs.", rhs[1], "years"),
               xlab=rhs[1], ylab="Standardised Residuals")
  panel.smooth(y=rstandard(model), x=dt[[rhs[1]]])
  abline(h=0, lty=2)

  plot.default(rstandard(model) ~ dt[[rhs[2]]],
               main=paste("Residuals vs.", rhs[2], "years"),
               xlab=rhs[2], ylab="Standardised Residuals")
  panel.smooth(y=rstandard(model), x=dt[[rhs[2]]])
  abline(h=0, lty=2)
  plot.default(rstandard(model) ~ dt[[rhs[3]]],
               main=paste("Residuals vs.", rhs[3], "years"),
               xlab=rhs[3], ylab="Standardised Residuals")
  panel.smooth(y=rstandard(model), x=dt[[rhs[3]]])
  abline(h=0, lty=2)
  yname <- names(model.frame(model))[1]
  plot.default(rstandard(model) ~ model.frame(model)[[yname]],
               main="Residuals vs. fitted",
               xlab=yname, ylab="Standardised Residuals")
  panel.smooth(y=rstandard(model), x=fitted(model))
  abline(h=0, lty=2)
  mtext(as.character(model$call)[2], outer = TRUE, cex = 1.2)
  par(op)
}

# Page D5.17
tri <- t(matrix(
  c(3511,       3215,   2266,  1712,  1059,	587,	340,
    4001,	3702,	2278,	1180,	 956,	629,	 NA,
    4355,	3932,	1946,	1522,	1238,	 NA,	 NA,
    4295,	3455,	2023,	1320,	  NA,	 NA,	 NA,
    4150,	3747,	2320,	  NA,	  NA,	 NA,	 NA,
    5102,	4548,	  NA,	  NA,	  NA,	 NA,	 NA,
    6283,	  NA,	  NA,	  NA,	  NA,	 NA,	 NA), nc=7))


m <- dim(tri)[1]; n <- dim(tri)[2]
dat <- data.frame(
  origin=rep(0:(m-1), n),
  dev=rep(0:(n-1), each=m),
  value=as.vector(tri))

## Add dimensions as factors
dat <- within(dat, {
  originf <- factor(origin)
  devf <- as.factor(dev)
  calf <- as.factor(origin+dev)
  a6 <- ifelse(origin == 6, 1, 0)
  a5 <- ifelse(origin == 5, 1, 0)
  d <- ifelse(dev < 1, 1, 0)
  s <- ifelse(dev < 1, 0, dev)
  cal <- origin+dev
  logvalue <- log(value)
  })

dat <- dat[order(dat$origin),]

# Page D5.36
ClaimsVolume <- data.frame(
  origin=0:6,
  volume.index=c(1.43, 1.45, 1.52, 1.35, 1.29, 1.47, 1.91))
# Page D5.36
EarningIndex <- data.frame(
  cal=0:6,
  earning.index=c(1.55, 1.41, 1.3, 1.23, 1.13, 1.05, 1))

dat <- merge(merge(dat, ClaimsVolume), EarningIndex)

# Normalise data for volume and earnings
dat$logvalue.ind.inf <- with(dat, log(value/volume.index*earning.index))

with(dat,
     interaction.plot(dev, origin, logvalue.ind.inf))
points(1+dat$dev, dat$logvalue.ind.inf, pch=16, cex=0.8)

summary(Fit4 <- lm(logvalue.ind.inf ~  d + s, data=na.omit(dat)))
resPlot(Fit4, dat)
op <- par(mfrow=c(2,2),oma = c(0, 0, 3, 0))
plot(Fit4)
par(op)


summary(Fit5 <- lm(logvalue.ind.inf ~ a6 + d + s, data=na.omit(dat)))
resPlot(Fit5, dat)
op <- par(mfrow=c(2,2),oma = c(0, 0, 3, 0))
plot(Fit5)
par(op)

# Tail
tail.years <- 6
# Create a data frame for the future periods
fdat <- data.frame(
  origin=rep(0:(m-1), n+tail.years),
  dev=rep(0:(n+tail.years-1), each=m)
)

fdat <- within(fdat,{
  a6 <- ifelse(origin == 6, 1, 0)
  d <- ifelse(dev < 1, 1, 0)
  s <- ifelse(dev < 1, 0, dev)
  cal <- origin + dev
  })

ND <- subset(fdat, cal>6)
ND <- merge(ND, ClaimsVolume)

FM4 <- log.incr.predict(Fit4, ND,
                        claims.inflation=0.075,
                        volume.index="volume.index")
# Page D5.41
round(xtabs(P ~ origin + dev, data=FM4$Forecast),0)
round(xtabs(seP ~ origin + dev, data=FM4$Forecast),0)

FM4$Totals

# Page D5.42
FM5 <- log.incr.predict(Fit5, ND,
                        claims.inflation=0.075,
                        volume.index="volume.index")

round(xtabs(P ~ origin + dev, data=FM5$Forecast),0)
FM5$Totals

###############################################################################
# Glen Barnett and Ben Zehnwirth.
# Best estimates for reserves.
# Proceedings of the CAS, LXXXVII(167), November 2000.

library(ChainLadder) ## for the example data

dat <- as.data.frame(M3IR5)
# Example triangle from:
# Appendix A7 in B. Zehnwirth. Probabilistic Development
# Factor Models with Applications to Loss Reserve Variability,
# Prediction Intervals, and Risk Based Capital.
# Casualty Actuarial Science Forum. Spring 1994. Vol. 2.
# http://www.casact.org/pubs/forum/94spforum/94spf447.pdf

dat <- within(dat, {
  cal <- origin + dev -min(origin) - 1
  dev <- dev - 1
})

with(dat,
     interaction.plot(x.factor=dev, trace.factor=origin,
                      response=log(value))
)

# Table 3.1 page 274 of Barnett & Zehnwirth, 2000
summary(m4 <- lm(log(value) ~  dev + cal, data=dat))
resPlot(m4, dat)
op <- par(mfrow=c(2,2),oma = c(0, 0, 3, 0))
plot(m4)
par(op)

# Table 3.2, page 277 of Barnett & Zehnwirth, 2000
summary(
  m6 <- lm(log(value)
           ~ dev
           + ifelse(cal > 4, 4, cal)
           + ifelse(cal <= 4, 0, 1)
           + ifelse(cal <= 5, 0, cal - 5),
           data=dat ))

# More formally with new columns for the dummy variables
dat <-  within(dat, {
  cal_78_82 <- ifelse(cal > 4, 4, cal)
  cal_82_83 <- ifelse(cal <= 4, 0, 1)
  cal_83_91 <- ifelse(cal <= 5, 0, cal - 5)
})
# Table 3.2, page 277
summary(
  m6 <- lm(log(value) ~ dev + cal_78_82 + cal_82_83 + cal_83_91, data=dat )
  )

resPlot(m6, dat)
op <- par(mfrow=c(2,2),oma = c(0, 0, 3, 0))
plot(m6)
par(op)

# Prediction
FF <- log.incr.predict(m6, dat[is.na(dat$value),])
FF$Totals

# Test stability by taking out most recent calendar yeards
# Table 3.3, page 277
M <- sapply(0:4, function(i){
  model <- lm(log(value) ~ dev + cal_78_82 + cal_82_83 + cal_83_91,
              data=subset(dat, cal < (14-i)))
  return(
  c(coef(model),
    log.incr.predict(model, dat[is.na(dat$value),])$Totals)
  )
})
M <- as.data.frame(t(M))
rownames(M) <- paste(78, 91:87, sep="-")
M

# (Intercept)        dev  cal_78_82 cal_82_83 cal_83_91 Total.Reserve Total.SE         CV
# 78-91    11.53213 -0.2062011 0.08731473 0.3927321 0.1446351      23432162 930057.2 0.03969148
# 78-90    11.53213 -0.2074902 0.08795925 0.3726737 0.1526735      25339572  1193249 0.04709035
# 78-89    11.53213 -0.2085994 0.08851386 0.3779634 0.1512344      24857942  1528294  0.0614811
# 78-88    11.53213 -0.2118672 0.09014776 0.3706285 0.1574523      26303546  1998992 0.07599705
# 78-87    11.53213 -0.2130568 0.09074254 0.3739596 0.1562765      25903783  2871096  0.1108369

# The output above is slightly different from the paper,
# but I wonder if this is caused by roundings, or the fact that
# I typed the data from a paper copy into R and lost precision.
	# Markus Gesmann, January 2013
	# See also: http://lamages.blogspot.co.uk/2013/01/reserving-based-on-log-incremental_22.html
	log.incr.predict <- function(
	model, # lm output
	newdata, # same argument as in predict
	claims.inflation=0, # Assumed inflation (scalar)
	volume.index=NULL, # name of the v.i. column in newdata
	origin.var="origin", # name of the origin column in newdata
	dev.var="dev", # name of the dev column in newdata
	cal.var="cal" # name of the cal. period col. in newdata
	){
	origin <- newdata[[origin.var]]
	dev <- newdata[[dev.var]]
	cal <- newdata[[cal.var]]
	if(is.null(volume.index)){
	index <- 1
	}else{
	index <- newdata[[volume.index]]
	}
	Pred <- predict(model, newdata=newdata, se.fit=TRUE)

	Y <- Pred$fit
	VarY <- Pred$se.fit^2 + Pred$residual.scale^2
	P <- exp(Y + VarY/2)
	P <- Pindex(1 + claims.inflation)^(cal - min(cal) + 1)
	VarP <- P^2*(exp(VarY)-1)
	seP <- sqrt(VarP)

	## Recreate formula to derive the model.frame and future design matrix
	model.formula <- as.formula(paste("~", as.character(formula(model)[3])))
	## See also package formula.tool

	mframe <- model.frame(model.formula, data=newdata)
	fdm <- model.matrix(model.formula, data=newdata)
	varcovar <- fdm %% vcov(model) %% t(fdm)
	Total.SE <- sqrt( t(P) %% (exp(varcovar)-1) %% P )
	Total.Reserve <- sum(P)

	# Prepare output
	Incr=data.frame(origin, dev, Y, VarY, P, seP, CV=seP/P)
	out <- list(Forecast=Incr[order(newdata[[origin.var]]),],
	Totals=data.frame(Total.Reserve,
	Total.SE=Total.SE,
	CV=Total.SE/Total.Reserve))
	return(out)
	}


	# Resdiual plots
	resPlot <- function(model,
	data,
	rhs=c("origin", "dev", "cal")){

	op <- par(mfrow=c(2,2),oma = c(0, 0, 3, 0))

	dt <- na.omit(data)
	plot.default(rstandard(model) ~ dt[[rhs[1]]],
	main=paste("Residuals vs.", rhs[1], "years"),
	xlab=rhs[1], ylab="Standardised Residuals")
	panel.smooth(y=rstandard(model), x=dt[[rhs[1]]])
	abline(h=0, lty=2)

	plot.default(rstandard(model) ~ dt[[rhs[2]]],
	main=paste("Residuals vs.", rhs[2], "years"),
	xlab=rhs[2], ylab="Standardised Residuals")
	panel.smooth(y=rstandard(model), x=dt[[rhs[2]]])
	abline(h=0, lty=2)
	plot.default(rstandard(model) ~ dt[[rhs[3]]],
	main=paste("Residuals vs.", rhs[3], "years"),
	xlab=rhs[3], ylab="Standardised Residuals")
	panel.smooth(y=rstandard(model), x=dt[[rhs[3]]])
	abline(h=0, lty=2)
	yname <- names(model.frame(model))[1]
	plot.default(rstandard(model) ~ model.frame(model)[[yname]],
	main="Residuals vs. fitted",
	xlab=yname, ylab="Standardised Residuals")
	panel.smooth(y=rstandard(model), x=fitted(model))
	abline(h=0, lty=2)
	mtext(as.character(model$call)[2], outer = TRUE, cex = 1.2)
	par(op)
	}

	# Page D5.17
	tri <- t(matrix(
	c(3511, 3215, 2266, 1712, 1059, 587, 340,
	4001, 3702, 2278, 1180, 956, 629, NA,
	4355, 3932, 1946, 1522, 1238, NA, NA,
	4295, 3455, 2023, 1320, NA, NA, NA,
	4150, 3747, 2320, NA, NA, NA, NA,
	5102, 4548, NA, NA, NA, NA, NA,
	6283, NA, NA, NA, NA, NA, NA), nc=7))


	m <- dim(tri)[1]; n <- dim(tri)[2]
	dat <- data.frame(
	origin=rep(0:(m-1), n),
	dev=rep(0:(n-1), each=m),
	value=as.vector(tri))

	## Add dimensions as factors
	dat <- within(dat, {
	originf <- factor(origin)
	devf <- as.factor(dev)
	calf <- as.factor(origin+dev)
	a6 <- ifelse(origin == 6, 1, 0)
	a5 <- ifelse(origin == 5, 1, 0)
	d <- ifelse(dev < 1, 1, 0)
	s <- ifelse(dev < 1, 0, dev)
	cal <- origin+dev
	logvalue <- log(value)
	})

	dat <- dat[order(dat$origin),]

	# Page D5.36
	ClaimsVolume <- data.frame(
	origin=0:6,
	volume.index=c(1.43, 1.45, 1.52, 1.35, 1.29, 1.47, 1.91))
	# Page D5.36
	EarningIndex <- data.frame(
	cal=0:6,
	earning.index=c(1.55, 1.41, 1.3, 1.23, 1.13, 1.05, 1))

	dat <- merge(merge(dat, ClaimsVolume), EarningIndex)

	# Normalise data for volume and earnings
	dat$logvalue.ind.inf <- with(dat, log(value/volume.index*earning.index))

	with(dat,
	interaction.plot(dev, origin, logvalue.ind.inf))
	points(1+dat$dev, dat$logvalue.ind.inf, pch=16, cex=0.8)

	summary(Fit4 <- lm(logvalue.ind.inf ~ d + s, data=na.omit(dat)))
	resPlot(Fit4, dat)
	op <- par(mfrow=c(2,2),oma = c(0, 0, 3, 0))
	plot(Fit4)
	par(op)


	summary(Fit5 <- lm(logvalue.ind.inf ~ a6 + d + s, data=na.omit(dat)))
	resPlot(Fit5, dat)
	op <- par(mfrow=c(2,2),oma = c(0, 0, 3, 0))
	plot(Fit5)
	par(op)

	# Tail
	tail.years <- 6
	# Create a data frame for the future periods
	fdat <- data.frame(
	origin=rep(0:(m-1), n+tail.years),
	dev=rep(0:(n+tail.years-1), each=m)
	)

	fdat <- within(fdat,{
	a6 <- ifelse(origin == 6, 1, 0)
	d <- ifelse(dev < 1, 1, 0)
	s <- ifelse(dev < 1, 0, dev)
	cal <- origin + dev
	})

	ND <- subset(fdat, cal>6)
	ND <- merge(ND, ClaimsVolume)

	FM4 <- log.incr.predict(Fit4, ND,
	claims.inflation=0.075,
	volume.index="volume.index")
	# Page D5.41
	round(xtabs(P ~ origin + dev, data=FM4$Forecast),0)
	round(xtabs(seP ~ origin + dev, data=FM4$Forecast),0)

	FM4$Totals

	# Page D5.42
	FM5 <- log.incr.predict(Fit5, ND,
	claims.inflation=0.075,
	volume.index="volume.index")

	round(xtabs(P ~ origin + dev, data=FM5$Forecast),0)
	FM5$Totals

	###############################################################################
	# Glen Barnett and Ben Zehnwirth.
	# Best estimates for reserves.
	# Proceedings of the CAS, LXXXVII(167), November 2000.

	library(ChainLadder) ## for the example data

	dat <- as.data.frame(M3IR5)
	# Example triangle from:
	# Appendix A7 in B. Zehnwirth. Probabilistic Development
	# Factor Models with Applications to Loss Reserve Variability,
	# Prediction Intervals, and Risk Based Capital.
	# Casualty Actuarial Science Forum. Spring 1994. Vol. 2.
	# http://www.casact.org/pubs/forum/94spforum/94spf447.pdf

	dat <- within(dat, {
	cal <- origin + dev -min(origin) - 1
	dev <- dev - 1
	})

	with(dat,
	interaction.plot(x.factor=dev, trace.factor=origin,
	response=log(value))
	)

	# Table 3.1 page 274 of Barnett & Zehnwirth, 2000
	summary(m4 <- lm(log(value) ~ dev + cal, data=dat))
	resPlot(m4, dat)
	op <- par(mfrow=c(2,2),oma = c(0, 0, 3, 0))
	plot(m4)
	par(op)

	# Table 3.2, page 277 of Barnett & Zehnwirth, 2000
	summary(
	m6 <- lm(log(value)
	~ dev
	+ ifelse(cal > 4, 4, cal)
	+ ifelse(cal <= 4, 0, 1)
	+ ifelse(cal <= 5, 0, cal - 5),
	data=dat ))

	# More formally with new columns for the dummy variables
	dat <- within(dat, {
	cal_78_82 <- ifelse(cal > 4, 4, cal)
	cal_82_83 <- ifelse(cal <= 4, 0, 1)
	cal_83_91 <- ifelse(cal <= 5, 0, cal - 5)
	})
	# Table 3.2, page 277
	summary(
	m6 <- lm(log(value) ~ dev + cal_78_82 + cal_82_83 + cal_83_91, data=dat )
	)

	resPlot(m6, dat)
	op <- par(mfrow=c(2,2),oma = c(0, 0, 3, 0))
	plot(m6)
	par(op)

	# Prediction
	FF <- log.incr.predict(m6, dat[is.na(dat$value),])
	FF$Totals

	# Test stability by taking out most recent calendar yeards
	# Table 3.3, page 277
	M <- sapply(0:4, function(i){
	model <- lm(log(value) ~ dev + cal_78_82 + cal_82_83 + cal_83_91,
	data=subset(dat, cal < (14-i)))
	return(
	c(coef(model),
	log.incr.predict(model, dat[is.na(dat$value),])$Totals)
	)
	})
	M <- as.data.frame(t(M))
	rownames(M) <- paste(78, 91:87, sep="-")
	M

	# (Intercept) dev cal_78_82 cal_82_83 cal_83_91 Total.Reserve Total.SE CV
	# 78-91 11.53213 -0.2062011 0.08731473 0.3927321 0.1446351 23432162 930057.2 0.03969148
	# 78-90 11.53213 -0.2074902 0.08795925 0.3726737 0.1526735 25339572 1193249 0.04709035
	# 78-89 11.53213 -0.2085994 0.08851386 0.3779634 0.1512344 24857942 1528294 0.0614811
	# 78-88 11.53213 -0.2118672 0.09014776 0.3706285 0.1574523 26303546 1998992 0.07599705
	# 78-87 11.53213 -0.2130568 0.09074254 0.3739596 0.1562765 25903783 2871096 0.1108369

	# The output above is slightly different from the paper,
	# but I wonder if this is caused by roundings, or the fact that
	# I typed the data from a paper copy into R and lost precision.