Skip to content

Instantly share code, notes, and snippets.

@timriffe
Created July 19, 2014 21:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timriffe/ef2b745c8d374c3aaca6 to your computer and use it in GitHub Desktop.
Save timriffe/ef2b745c8d374c3aaca6 to your computer and use it in GitHub Desktop.
Deriving chronological and thanatological fertility rates from Croatian linked historical demographic data
# download linked data from here:
# data from http://www.demog.berkeley.edu/croatia/datadir/Linkfiles/linkfiles.htm
# add path to where data is, or set working directory
CRO <- read.table("croatdata4.txt.gz",
stringsAsFactors = FALSE, sep = "\t", col.names = c('bid','bdate','sex','motherid','fatherid','mid1','mid2','mid3','mid4','mid5',
'sid1','sid2','sid3','sid4','sid5','mdate1','mdate2','mdate3','mdate4','mdate5','did',
'ddate','idk1','idk2','idk3','idk4','idk5','idk6','idk7','idk8','idk9','idk10','idk11',
'idk12','idk13','idk14','dobk1','dobk2','dobk3','dobk4','dobk5','dobk6','dobk7','dobk8','dobk9',
'dobk10','dobk11','dobk12','dobk13','dobk14','sidk1','sidk2','sidk3','sidk4','sidk5','sidk6','sidk7',
'sidk8','sidk9','sidk10','sidk11','sidk12','sidk13','sidk14','remark1','remark2','remark3','remark4','remark5',
'remark6','remark7','remark8','remark9','remark10','remark11','remark12','remark13',
'remark14','remarok1','remarok2','remarok3','remarok4','remarok5','remarok6','remarok7',
'remarok8','remarok9','remarok10','remarok11','remarok12','remarok13','remarok14','park1',
'park2','park3','park4','park5','park6','park7','park8','park9','park10','park11','park12','park13','park14','lastdate')
)
# add origin to decimal dates:
DateIDS <- which(grepl("date", colnames(CRO)) | grepl("dob", colnames(CRO)))
CRO[DateIDS] <- lapply(CRO[,DateIDS], function(x){
as.numeric(x) + 1700
})
# only known dates of death (this will condition both results, but is necessary for now)
CRO <- CRO[!is.na(CRO$ddate), ]
dobkcols <- paste0("dobk",1:14)
# tricky. Make ever-had a child column, anyk:
CRO$anyk <- apply(CRO,1,function(x){
any(!is.na(unlist(x[dobkcols])))
})
# lifespan
CRO$LS <- CRO$ddate - CRO$bdate
# take just necessary columns for thano fertility
# put in long format
CROL <- do.call(rbind,lapply(1:14, function(cc, CRO){
DAT <- CRO[,c("bdate","ddate", "sex", "anyk","LS", paste0("dobk", cc))]
colnames(DAT)[5] <- "dobk"
DAT$bo <- cc
DAT
}, CRO = CRO))
# remove NAs of higher-than 1 order births if
# dim(CROL)
CROL <- CROL[!with(CROL, bo > 1 & is.na(dobk)),]
# hist(CROL$bo[CROL$sex == "m" & !is.na(CROL$dobk)])
# calculate thanatological age at birth of child and chronological age at birth of child
CROL$tak <- CROL$ddate - CROL$dobk
CROL$cak <- CROL$dobk - CROL$bdate
# uh oh. maybe linkage in wrong direction somehow?
#hist(CROL$tak[CROL$tak < 0 & !is.na(CROL$tak)])
# should remove cases less than -2:
CROL <- CROL[(!CROL$tak < -2) | is.na(CROL$dobk), ]
# now any remaining negatives get set to zero, assume it's a precision issue:
CROL$tak[CROL$tak < 0] <- 0
# round tak
CROL$takfloor <- floor(CROL$tak)
CROL$cakfloor <- floor(CROL$cak)
#matplot(t(table(CROL$sex,CROL$takfloor)),type='l',lty=1)
# make exposure table, based on LS
# when calculating cohort exposures by aggregating element-wise over
# lifelines, chronological and thanatological exposures are
# IDENTICAL
# because in this pooled case we have a de facto closed cohort, which by definition is stationary
# !wow!
# note: exposures don't come from the long file, otherwise they get
# included redundantly for each birth...
ME <- rowSums(sapply(CRO$LS[CRO$sex=="m"], function(x){
c(rep(1,floor(x)),x-floor(x),rep(0,101-floor(x)-1))
}))
FE <- rowSums(sapply(CRO$LS[CRO$sex=="f"], function(x){
c(rep(1,floor(x)),x-floor(x),rep(0,101-floor(x)-1))
}))
E <- cbind(Female = FE, male = ME)
rownames(E) <- 0:100
#library(Pyramid)
#Pyramid(ME,FE)
# tabulate births
TBirths <- table(CROL$takfloor,CROL$sex)
CBirths <- table(CROL$cakfloor,CROL$sex)
CB <- TB <- E * NA
TB[rownames(TBirths),] <- TBirths
CB[rownames(CBirths),] <- CBirths
# plot using my quirks
yticks <- pretty(CB/E)
png("Figures/CROfert.png")
par(xpd=TRUE,xaxs="i",yaxs="i",mai=c(1,.4,.4,.4))
plot(NULL,type="n",xlim=c(0,80),ylim=range(yticks),axes=FALSE,
xlab="",ylab="",panel.first=list(
rect(0,0,80,max(yticks),col=gray(.95),border=NA),
segments(0,yticks,80,yticks,col="white"),
segments(seq(10,70,by=10),0,seq(10,70,by=10),max(yticks),col="white"),
text(0,yticks,yticks,pos=2,cex=.8),
text(seq(0,80,by=20),0,seq(0,80,by=20),pos=1,cex=.8)))
matplot(0:100,CB/E,type = 'l',add=TRUE,lty=1,col=c("red","blue"))
matplot(0:100,TB/E,type = 'l',add=TRUE,lty=2,col=c("red","blue"))
arrows(40,-.025,60,-.025,length=.1)
arrows(40,-.018,20,-.018,length=.1,lty=2)
text(40,-.018,"years left",pos=4)
text(40,-.025,"years lived",pos=2)
legend("topright",bty="n",col=c("red","red","blue","blue"),lty=c(1,2,1,2),legend=c("females chronological","females thanatological", "males chronological", "males thanatological"))
text(2,max(yticks)*1.02,"Fertility Rate",pos=3)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment