Skip to content

Instantly share code, notes, and snippets.

@timriffe
Last active July 10, 2017 19:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timriffe/b4eba40a52fe1dd62451c323183aefe9 to your computer and use it in GitHub Desktop.
Save timriffe/b4eba40a52fe1dd62451c323183aefe9 to your computer and use it in GitHub Desktop.
Code to make joyplot of HFC data
Name Code
Albania ALB
American Samoa ASM
Argentina ARG
Armenia ARM
Aruba ABW
Australia AUS
Austria AUT
Azerbaijan AZE
Bahamas BHS
Bahrain BHR
Bangladesh BGD
Belarus BLR
Belgium BEL
Bosnia and Herzegovina BIH
Brasil BRA
Bulgaria BGR
Canada CAN
Chile CHL
China CHN
Costa Rica CRI
Croatia HRV
Cuba CUB
Cyprus CYP
Czech Republic CZE
Czechoslovakia CSK
Denmark DNK
Egypt EGY
Estonia EST
Faroe Islands FRO
Fiji FJI
Finland FIN
France FRA
Georgia GEO
Germany DEUTNP
Germany, East DEUTE
Germany, West DEUTW
Greece GRC
Greenland GRL
Hong Kong HKG
Hungary HUN
Iceland ISL
India IND
Indonesia IDN
Iran IRN
Ireland IRL
Israel ISR
Italy ITA
Japan JPN
Jordan JOR
Kazakhstan KAZ
Kenya KEN
Kosovo RKS
Latvia LVA
Liechtenstein LIE
Lithuania LTU
Luxembourg LUX
Macedonia MKD
Malaysia MYS
Malta MLT
Mauritius MUS
Mexico MEX
Micronesia FSM
Moldova MDA
Mongolia MNG
Montenegro MNE
Netherlands NLD
New Zealand NZL
Norway NOR
Pakistan PAK
Palau PLW
Panama PAN
Philippines PHL
Poland POL
Portugal PRT
Qatar QAT
Republic of Korea KOR
Romania ROU
Russia RUS
San Marino SMR
Serbia SRB
Serbia and Montenegro SCG
Seychelles SYC
Singapore SGP
Slovakia SVK
Slovenia SVN
Spain ESP
Sri Lanka LKA
Sweden SWE
Switzerland CHE
Taiwan TWN
Thailand THA
Tunisia TUN
Turkey TUR
UK GBR_NP
UK, England and Wales GBRTENW
UK, Northern Ireland GBR_NIR
UK, Scotland GBR_SCO
Ukraine UKR
United States of America USA
Uruguay URY
Yugoslavia YUG
# ---------------------------------------------
# This data is from http://www.fertilitydata.org
# the "ASFR and CPFR, standardized age scale, All birth orders combined, All HFC Data" zip file.
# this is the filename as given in that zipped file:
HFC <- read.table("HFC_ASFRstand_TOT.txt",
sep = ",",
header = TRUE,
stringsAsFactors = FALSE,
strip.white = TRUE,
na.strings = ".")
# This selection sequence is hackish. The HFC has potentially different
# versions of the data for each country in the year 2013, so I wanted to
# select down to just a single series per country. Ugly ugly code here:
# let's take 2013 data. pretty recent, lots of countries
HFC <- HFC[HFC$Year1 == 2013 & HFC$AgeDef == "ACY" & !is.na(HFC$Vitality), ]
HFC <- HFC[order(HFC$Country, HFC$Split, HFC$Collection, HFC$Age), ]
# archaic way of solving the 'multiple splits problem' here:
# there must be a better way of doing this
HFCL1 <- split(HFC, list(HFC$Country))
# make nested list, now cutting out 'Splits'
HFCL2 <- lapply(HFCL1, function(X){ split(X,X$Split)})
# then select the first one only
HFCL3 <- lapply(HFCL2, "[[", 1)
# and glue back together into a long data.frame
HFC <- do.call(rbind, HFCL3)
# repeat for Collection
HFCL1 <- split(HFC, list(HFC$Country))
HFCL2 <- lapply(HFCL1, function(X){ split(X,X$Collection)})
HFCL3 <- lapply(HFCL2, "[[", 1)
HFC <- do.call(rbind, HFCL3)
#head(HFC)
# reworked version of joypoly(), flexible to different age ranges
joypoly2 <- function(Fx, Age, y, col = "#194f46", border = "#38f0ac"){
# a more robust way to handle differing age ranges
this.df <- data.frame(Age = 10:59, Fx = 0)
this.df$Fx[this.df$Age %in% Age] <- Fx
polygon(this.df$Age, this.df$Fx + y, # note, the arguments have generic names
col = col, # since the actual data we pass in will be
# different each time
border = border, # this is how we pass in args, can have same names!
lwd = 2)
}
# how many countries?
countries <- unique(HFC$Country)
Ncurves <- length(countries) # how many?
# vertical spacing between new curves:
space <- .06 # just a guess. too crowded?
# baselines are therefore:
baselines <- seq(0, space * Ncurves, length = Ncurves)
# determine y range of plot programmatically
ylim <- c(0, space * Ncurves + .15)
#country names ripped from website, cleaned up into csv, attached to this gist.
country.codes <- read.csv("countries.csv", stringsAsFactors = FALSE)
# this is a recode vector.
country.names <- country.codes$Name
names(country.names) <- country.codes$Code
#graphics.off()
#dev.new(height = 12, width = 5)
HFC$ASFR[is.na(HFC$ASFR)] <- 0
# sort countries by TFR
TFR <- tapply(HFC$ASFR, HFC$Country, sum)
TFR <- sort(TFR)
countries <- names(TFR)
pdf("joyHFC.pdf", height = 12, width = 5.6)
par(xaxs = "i", yaxs = "i", mai = c(.1, 1.6, .1, .1))
plot(NULL, type = "n",
xlim = c(13,48),
ylim = ylim,
axes = FALSE,
xlab = "",
ylab = "")
# draw polygons in loop
for (i in 1:length(countries)){
Fx <- HFC$ASFR[HFC$Country == countries[i]]
Age <- HFC$Age[HFC$Country == countries[i]]
# toggle polygon color
col <- ifelse(i %% 2 == 0, "#194f46", adjustcolor("#194f46", offset = c(.1, .1, .1, 0)))
joypoly2(Fx, Age, rev(baselines)[i], col = col)
}
# label with full country names
text(12, rev(baselines), country.names[countries], xpd = TRUE, pos = 2)
dev.off()
# end. The labels were worked up in Inkscape.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment