Skip to content

Instantly share code, notes, and snippets.

@timriffe
Last active August 29, 2015 13:57
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timriffe/9748534 to your computer and use it in GitHub Desktop.
Save timriffe/9748534 to your computer and use it in GitHub Desktop.
a code chunk demonstrated in the 2014 Stanford formal demography workshop
# author: tim riffe
# March 24, 2014
# do with this as you will. If you sell it, I'l hunt you down.
# Thanks to Vladimir Canudas Romo for the idea!
# --------------------------------------------
# start
# This package is under development! please test!
# installation is via github. See the README for instructions
#library(devtools)
#install_github("DemogBerkeley", subdir = "DemogBerkeley", username = "UCBdemography")
library(DemogBerkeley)
library(parallel)
library(reshape2)
# define a nifty function to get CAL. it's tricky!!!
getCAL <- function(CTRY, years){
# watch out! HMD cohort data used the word 'Year' for the cohort column
colnames(CTRY)[colnames(CTRY) == "Year"] <- "Cohort"
# get the year
CTRY$Year <- CTRY$Cohort + CTRY$Age
# we need a matric Age by Cohort
# see formula 1 in Vladimir's second linked paper by Guillot (2002)
ACfem <- acast(CTRY, Age~Cohort, value.var = "Female")
# not all cohort values are observed, keep track of NAs
isNA <- is.na(ACfem)
# we need to take cumulative sum, so zero-out NAs
ACfem[isNA] <- 0
# this is the nifty formula
ACfemCS <- exp(-apply(ACfem, 2, cumsum))
# put the NAs back in as 0s
ACfemCS[isNA] <- 0
# only use cohorts observed since birth
keepers <- !isNA[1, ]
ACfemCS <- ACfemCS[, keepers]
# flip back to AP perspective to grab a year!
APfemCS <- AC2AP(ACfemCS)
# grab the desired years. This could be more elegant!
colSums(APfemCS[, as.character(years)], na.rm = TRUE)
}
# ---------------------------------------------
# Demonstration:
# what are all the HMD Countries?
#CNTRIES <- getHMDcountries()
# let's check out Scandinavia... cuz they have good cohort data
CNTRIES <- c("FIN","DNK","NOR","SWE","ISL")
# do this live so that your password isn't saved in the script!
user <- userInput()
password <- userInput()
# grab the needed data item
ALLcMx <- mclapply(CNTRIES, function(CTRY, pw, us){
try(readHMDweb(CTRY, "cMx_1x1", password = pw, username = us))
}, pw = password, us = user)
# use demonstration!
Call <- lapply(ALLcMx, getCAL, years = 1950:1970)
Cal <- do.call(cbind, Call)
colnames(Cal) <- CNTRIES
matplot(years, Cal, type = 'l', col = gray(.5), lty= 1)
# EDIT: April 2, 2014
# ---------------------------------------------------
# Vladimir wrote:
# I have a question related to the improvised presentation during the workshop.
# You mentioned that you had calculated the CALs by using the cohort death rates
# cMx_1x1 from HMD instead of period death rates Mx_1x1. I went just now to look
# for those cohort Mx and it seems the HMD only has them if there is a minimum
# of 30 ages or so of information, for example Denmark only has them from 1764
# until 1981 even when information is available until 2011 for the period death
# rates. The death counts are there and I could of course calculate the exposure
# (cExposures_1x1 ) by using the population counts which are also there. But did
# you do that for your presentation? Thank you.
# ---------------------------------------------------
# I responded:
# You're right about the cMx thing. I had 30 min notice and took the easy route,
# only showing CAL for earlier years. I'll update today to make it go to the most
# recent year. Gotta look up the left truncation thing...
# ---------------------------------------------------
# now a bit better--------------
# Need to calculate cohort exposures from population counts and deaths
# classified by Lexis triangles:
# this function is much more involved because it uses lifetable methodology rather than
# assuming continuous stuff. Grabs data from web on the fly and then calculates
# CAL for males and females.
#' @param CTRY HMD country code
#' @param CTRY pw HMD password
#' @param CTRY us HMD uername
#' @param Sex \code{"m"} or \code{"f"}
#' @param N minimum number of years of data. Don't do this for countries with less than \code{N} years of data
#' @return a vetor of CAL values. Names are years.
getCAL <- function(CTRY, pw, us, sex = "m", N = 75){
stopifnot(sex %in% c("m","f"))
# an internal function to estimate a0,
# similar to that proposed by Andreev-Kingkade (2011)
# don't worry about this- you could use Coale-Demeny rule of thumb instead...
AKq02a0 <- function(q0, sex = "m"){
sex <- rep(sex, length(q0))
ifelse(sex == "m",
ifelse(q0 < .0226, {0.1493 - 2.0367 * q0},
ifelse(q0 < 0.0785, {0.0244 + 3.4994 * q0},.2991)),
ifelse(q0 < 0.0170, {0.1490 - 2.0867 * q0},
ifelse(q0 < 0.0658, {0.0438 + 4.1075 * q0}, 0.3141))
)
}
Pop <- readHMDweb(CTRY, "Population", password = pw, username = us)
Deaths <- readHMDweb(CTRY, "Deaths_lexis", password = pw, username = us)
LY <- max(Deaths$Year)
FY <- min(Deaths$Year)
if (LY - FY < N){
stop("sorry, not enough years of data to have fun with this!")
}
# ---------------------------------------------------
# notice that lexis deaths open age group cohort is NA.
# tail(ALLD[[1]]) # notice that open age group cohort is NA.
# let's just split it 60/40 between a lower and upper triangle
# it's ad hoc, but then so is this script!
# ------------------------------------------------
# this does that:
Deaths <- do.call(rbind,lapply(split(Deaths, Deaths$Year), function(.DAT){
# copy last row
OPEN <- .DAT[nrow(.DAT), ]
# remove last row
.DAT <- .DAT[-nrow(.DAT), ]
# make two rows:
OPEN <- rbind(OPEN,OPEN)
# for selection
MFT <- c("Male","Female","Total")
OPEN[1,MFT] <- OPEN[1,MFT] * .6
OPEN[2,MFT] <- OPEN[2,MFT] * .4
OPEN$Cohort <- .DAT$Cohort[nrow(.DAT)] - c(0,1)
rbind(.DAT, OPEN)
}))
# lower triangle (LT) is when Year - Cohort - Age == 0
# upper triangle (UT) is when Year - Cohort - Age == 1
Deaths$Lexis <- with(Deaths, ifelse(Year - Cohort - Age == 0, "LT",
ifelse(Year - Cohort - Age == 1, "UT", NA )))
# need Cohort for reshaping
Pop$Cohort <- Pop$Year - Pop$Age
# --------------------------------------------------------
# Be aware, we are working with the AC shape.
# This ignores lower triangles in the most recent year.
# call will be based on a vertical stack of AC parallelograms
# rather than a perfectly tessalating stack of PC parallelograms...
# if you prefer the later, you'll need to rethink the following somewhat.
# --------------------------------------------------------
# make Age x Cohort matrices:
if (sex == "m"){
pop <- acast(Pop[Pop$Year < LY, ], Age ~ Cohort, value.var = "Male2")
du <- acast(Deaths[Deaths$Lexis == "UT" & Deaths$Year > FY, ], Age ~ Cohort, value.var = "Male")
dl <- acast(Deaths[Deaths$Lexis == "LT" & Deaths$Year < LY, ], Age ~ Cohort, value.var = "Male")
}
if (sex == "f"){
pop <- acast(Pop[Pop$Year < LY, ], Age ~ Cohort, value.var = "Female2")
du <- acast(Deaths[Deaths$Lexis == "UT" & Deaths$Year > FY, ], Age ~ Cohort, value.var = "Female")
dl <- acast(Deaths[Deaths$Lexis == "LT" & Deaths$Year < LY, ], Age ~ Cohort, value.var = "Female")
}
# --------------------------------------------------------
# cohort range not the same: dimension to be equal
cohs.sclice <- sort(intersect(intersect(colnames(pop), colnames(dl)), colnames(du)))
dl <- dl[, cohs.sclice]
du <- du[, cohs.sclice]
pop <- pop[, cohs.sclice]
# ---------------------------------------------------------
# now we have the 3 items necessary:
years <- as.integer(colnames(dl))
ages <- as.integer(rownames(dl))
#
mx <- (dl + du) / (pop + (1 / 3) * (dl - du))
qx <- (dl + du) / (pop + dl) # Eq 70 MPv5
ax <- ((mx + 1) * qx - mx) / (mx * qx) # Eq 71 MPv5
ax[1, ] <- AKq02a0(qx[1,], sex = sex)
# original formula only holds for continuous hazards. We instead
# will do some lifetable foo. Only need to get as far as Lx:
# this is a cheap way to deal with left truncation
qxtemp <- qx[,1:(nrow(qx)+4)]
NAind <- is.na(qxtemp)
qxtemp <- t(apply(qxtemp, 1, function(qa){
# qa <- qxtemp[111,]
if (any(is.na(qa))){
# arithmetic mean of first 5 non-NA cohort Lx values...
qa[is.na(qa)] <- mean(qa[!is.na(qa)][1:5], na.rm = TRUE)
}
qa
}))
qx[,1:(nrow(qx)+4)][NAind] <- qxtemp[NAind]
#qx[NAind] <- NA
ax[is.nan(ax)] <- .5
px <- 1 - qx # Eq 64 MPv5
px[is.nan(px)] <- 0
# lx needs to be done columnwise over px, argument 2 refers to the margin.
lx <- apply(px, 2, function(px.){ # Eq 65 MPv5
c(1, cumprod(px.[-length(px.)]))
})
rownames(lx) <- 0:110 # these got thrown off because l0 imputed.
lx[is.na(lx)] <- 0
dx <- lx * qx # Eq 66 MPv5
dx[is.na(dx)] <- 0
Lx <- lx - (1 - ax) * dx # Eq 67 MPv5
# now transform back to period.
LxAP <- AC2AP(Lx)
# This could be more elegant!
colSums(LxAP, na.rm = TRUE)
}
# demonstration for same countries
CAL <- do.call(rbind,mclapply(CNTRIES, function(CTRY){
CALm <- getCAL(CTRY=CTRY,pw=pw,us=us,"m")
CALf <- getCAL(CTRY=CTRY,pw=pw,us=us,"f")
years <- as.integer(CALm)
cbind(CTRY = CTRY, Year = years, Male = CALm, Female = CALf)
}))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment