Last active
August 29, 2015 13:57
-
-
Save timriffe/9748534 to your computer and use it in GitHub Desktop.
a code chunk demonstrated in the 2014 Stanford formal demography workshop
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# author: tim riffe | |
# March 24, 2014 | |
# do with this as you will. If you sell it, I'l hunt you down. | |
# Thanks to Vladimir Canudas Romo for the idea! | |
# -------------------------------------------- | |
# start | |
# This package is under development! please test! | |
# installation is via github. See the README for instructions | |
#library(devtools) | |
#install_github("DemogBerkeley", subdir = "DemogBerkeley", username = "UCBdemography") | |
library(DemogBerkeley) | |
library(parallel) | |
library(reshape2) | |
# define a nifty function to get CAL. it's tricky!!! | |
getCAL <- function(CTRY, years){ | |
# watch out! HMD cohort data used the word 'Year' for the cohort column | |
colnames(CTRY)[colnames(CTRY) == "Year"] <- "Cohort" | |
# get the year | |
CTRY$Year <- CTRY$Cohort + CTRY$Age | |
# we need a matric Age by Cohort | |
# see formula 1 in Vladimir's second linked paper by Guillot (2002) | |
ACfem <- acast(CTRY, Age~Cohort, value.var = "Female") | |
# not all cohort values are observed, keep track of NAs | |
isNA <- is.na(ACfem) | |
# we need to take cumulative sum, so zero-out NAs | |
ACfem[isNA] <- 0 | |
# this is the nifty formula | |
ACfemCS <- exp(-apply(ACfem, 2, cumsum)) | |
# put the NAs back in as 0s | |
ACfemCS[isNA] <- 0 | |
# only use cohorts observed since birth | |
keepers <- !isNA[1, ] | |
ACfemCS <- ACfemCS[, keepers] | |
# flip back to AP perspective to grab a year! | |
APfemCS <- AC2AP(ACfemCS) | |
# grab the desired years. This could be more elegant! | |
colSums(APfemCS[, as.character(years)], na.rm = TRUE) | |
} | |
# --------------------------------------------- | |
# Demonstration: | |
# what are all the HMD Countries? | |
#CNTRIES <- getHMDcountries() | |
# let's check out Scandinavia... cuz they have good cohort data | |
CNTRIES <- c("FIN","DNK","NOR","SWE","ISL") | |
# do this live so that your password isn't saved in the script! | |
user <- userInput() | |
password <- userInput() | |
# grab the needed data item | |
ALLcMx <- mclapply(CNTRIES, function(CTRY, pw, us){ | |
try(readHMDweb(CTRY, "cMx_1x1", password = pw, username = us)) | |
}, pw = password, us = user) | |
# use demonstration! | |
Call <- lapply(ALLcMx, getCAL, years = 1950:1970) | |
Cal <- do.call(cbind, Call) | |
colnames(Cal) <- CNTRIES | |
matplot(years, Cal, type = 'l', col = gray(.5), lty= 1) | |
# EDIT: April 2, 2014 | |
# --------------------------------------------------- | |
# Vladimir wrote: | |
# I have a question related to the improvised presentation during the workshop. | |
# You mentioned that you had calculated the CALs by using the cohort death rates | |
# cMx_1x1 from HMD instead of period death rates Mx_1x1. I went just now to look | |
# for those cohort Mx and it seems the HMD only has them if there is a minimum | |
# of 30 ages or so of information, for example Denmark only has them from 1764 | |
# until 1981 even when information is available until 2011 for the period death | |
# rates. The death counts are there and I could of course calculate the exposure | |
# (cExposures_1x1 ) by using the population counts which are also there. But did | |
# you do that for your presentation? Thank you. | |
# --------------------------------------------------- | |
# I responded: | |
# You're right about the cMx thing. I had 30 min notice and took the easy route, | |
# only showing CAL for earlier years. I'll update today to make it go to the most | |
# recent year. Gotta look up the left truncation thing... | |
# --------------------------------------------------- | |
# now a bit better-------------- | |
# Need to calculate cohort exposures from population counts and deaths | |
# classified by Lexis triangles: | |
# this function is much more involved because it uses lifetable methodology rather than | |
# assuming continuous stuff. Grabs data from web on the fly and then calculates | |
# CAL for males and females. | |
#' @param CTRY HMD country code | |
#' @param CTRY pw HMD password | |
#' @param CTRY us HMD uername | |
#' @param Sex \code{"m"} or \code{"f"} | |
#' @param N minimum number of years of data. Don't do this for countries with less than \code{N} years of data | |
#' @return a vetor of CAL values. Names are years. | |
getCAL <- function(CTRY, pw, us, sex = "m", N = 75){ | |
stopifnot(sex %in% c("m","f")) | |
# an internal function to estimate a0, | |
# similar to that proposed by Andreev-Kingkade (2011) | |
# don't worry about this- you could use Coale-Demeny rule of thumb instead... | |
AKq02a0 <- function(q0, sex = "m"){ | |
sex <- rep(sex, length(q0)) | |
ifelse(sex == "m", | |
ifelse(q0 < .0226, {0.1493 - 2.0367 * q0}, | |
ifelse(q0 < 0.0785, {0.0244 + 3.4994 * q0},.2991)), | |
ifelse(q0 < 0.0170, {0.1490 - 2.0867 * q0}, | |
ifelse(q0 < 0.0658, {0.0438 + 4.1075 * q0}, 0.3141)) | |
) | |
} | |
Pop <- readHMDweb(CTRY, "Population", password = pw, username = us) | |
Deaths <- readHMDweb(CTRY, "Deaths_lexis", password = pw, username = us) | |
LY <- max(Deaths$Year) | |
FY <- min(Deaths$Year) | |
if (LY - FY < N){ | |
stop("sorry, not enough years of data to have fun with this!") | |
} | |
# --------------------------------------------------- | |
# notice that lexis deaths open age group cohort is NA. | |
# tail(ALLD[[1]]) # notice that open age group cohort is NA. | |
# let's just split it 60/40 between a lower and upper triangle | |
# it's ad hoc, but then so is this script! | |
# ------------------------------------------------ | |
# this does that: | |
Deaths <- do.call(rbind,lapply(split(Deaths, Deaths$Year), function(.DAT){ | |
# copy last row | |
OPEN <- .DAT[nrow(.DAT), ] | |
# remove last row | |
.DAT <- .DAT[-nrow(.DAT), ] | |
# make two rows: | |
OPEN <- rbind(OPEN,OPEN) | |
# for selection | |
MFT <- c("Male","Female","Total") | |
OPEN[1,MFT] <- OPEN[1,MFT] * .6 | |
OPEN[2,MFT] <- OPEN[2,MFT] * .4 | |
OPEN$Cohort <- .DAT$Cohort[nrow(.DAT)] - c(0,1) | |
rbind(.DAT, OPEN) | |
})) | |
# lower triangle (LT) is when Year - Cohort - Age == 0 | |
# upper triangle (UT) is when Year - Cohort - Age == 1 | |
Deaths$Lexis <- with(Deaths, ifelse(Year - Cohort - Age == 0, "LT", | |
ifelse(Year - Cohort - Age == 1, "UT", NA ))) | |
# need Cohort for reshaping | |
Pop$Cohort <- Pop$Year - Pop$Age | |
# -------------------------------------------------------- | |
# Be aware, we are working with the AC shape. | |
# This ignores lower triangles in the most recent year. | |
# call will be based on a vertical stack of AC parallelograms | |
# rather than a perfectly tessalating stack of PC parallelograms... | |
# if you prefer the later, you'll need to rethink the following somewhat. | |
# -------------------------------------------------------- | |
# make Age x Cohort matrices: | |
if (sex == "m"){ | |
pop <- acast(Pop[Pop$Year < LY, ], Age ~ Cohort, value.var = "Male2") | |
du <- acast(Deaths[Deaths$Lexis == "UT" & Deaths$Year > FY, ], Age ~ Cohort, value.var = "Male") | |
dl <- acast(Deaths[Deaths$Lexis == "LT" & Deaths$Year < LY, ], Age ~ Cohort, value.var = "Male") | |
} | |
if (sex == "f"){ | |
pop <- acast(Pop[Pop$Year < LY, ], Age ~ Cohort, value.var = "Female2") | |
du <- acast(Deaths[Deaths$Lexis == "UT" & Deaths$Year > FY, ], Age ~ Cohort, value.var = "Female") | |
dl <- acast(Deaths[Deaths$Lexis == "LT" & Deaths$Year < LY, ], Age ~ Cohort, value.var = "Female") | |
} | |
# -------------------------------------------------------- | |
# cohort range not the same: dimension to be equal | |
cohs.sclice <- sort(intersect(intersect(colnames(pop), colnames(dl)), colnames(du))) | |
dl <- dl[, cohs.sclice] | |
du <- du[, cohs.sclice] | |
pop <- pop[, cohs.sclice] | |
# --------------------------------------------------------- | |
# now we have the 3 items necessary: | |
years <- as.integer(colnames(dl)) | |
ages <- as.integer(rownames(dl)) | |
# | |
mx <- (dl + du) / (pop + (1 / 3) * (dl - du)) | |
qx <- (dl + du) / (pop + dl) # Eq 70 MPv5 | |
ax <- ((mx + 1) * qx - mx) / (mx * qx) # Eq 71 MPv5 | |
ax[1, ] <- AKq02a0(qx[1,], sex = sex) | |
# original formula only holds for continuous hazards. We instead | |
# will do some lifetable foo. Only need to get as far as Lx: | |
# this is a cheap way to deal with left truncation | |
qxtemp <- qx[,1:(nrow(qx)+4)] | |
NAind <- is.na(qxtemp) | |
qxtemp <- t(apply(qxtemp, 1, function(qa){ | |
# qa <- qxtemp[111,] | |
if (any(is.na(qa))){ | |
# arithmetic mean of first 5 non-NA cohort Lx values... | |
qa[is.na(qa)] <- mean(qa[!is.na(qa)][1:5], na.rm = TRUE) | |
} | |
qa | |
})) | |
qx[,1:(nrow(qx)+4)][NAind] <- qxtemp[NAind] | |
#qx[NAind] <- NA | |
ax[is.nan(ax)] <- .5 | |
px <- 1 - qx # Eq 64 MPv5 | |
px[is.nan(px)] <- 0 | |
# lx needs to be done columnwise over px, argument 2 refers to the margin. | |
lx <- apply(px, 2, function(px.){ # Eq 65 MPv5 | |
c(1, cumprod(px.[-length(px.)])) | |
}) | |
rownames(lx) <- 0:110 # these got thrown off because l0 imputed. | |
lx[is.na(lx)] <- 0 | |
dx <- lx * qx # Eq 66 MPv5 | |
dx[is.na(dx)] <- 0 | |
Lx <- lx - (1 - ax) * dx # Eq 67 MPv5 | |
# now transform back to period. | |
LxAP <- AC2AP(Lx) | |
# This could be more elegant! | |
colSums(LxAP, na.rm = TRUE) | |
} | |
# demonstration for same countries | |
CAL <- do.call(rbind,mclapply(CNTRIES, function(CTRY){ | |
CALm <- getCAL(CTRY=CTRY,pw=pw,us=us,"m") | |
CALf <- getCAL(CTRY=CTRY,pw=pw,us=us,"f") | |
years <- as.integer(CALm) | |
cbind(CTRY = CTRY, Year = years, Male = CALm, Female = CALf) | |
})) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment