Last active
July 10, 2017 19:59
-
-
Save timriffe/b4eba40a52fe1dd62451c323183aefe9 to your computer and use it in GitHub Desktop.
Code to make joyplot of HFC data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Name | Code | |
---|---|---|
Albania | ALB | |
American Samoa | ASM | |
Argentina | ARG | |
Armenia | ARM | |
Aruba | ABW | |
Australia | AUS | |
Austria | AUT | |
Azerbaijan | AZE | |
Bahamas | BHS | |
Bahrain | BHR | |
Bangladesh | BGD | |
Belarus | BLR | |
Belgium | BEL | |
Bosnia and Herzegovina | BIH | |
Brasil | BRA | |
Bulgaria | BGR | |
Canada | CAN | |
Chile | CHL | |
China | CHN | |
Costa Rica | CRI | |
Croatia | HRV | |
Cuba | CUB | |
Cyprus | CYP | |
Czech Republic | CZE | |
Czechoslovakia | CSK | |
Denmark | DNK | |
Egypt | EGY | |
Estonia | EST | |
Faroe Islands | FRO | |
Fiji | FJI | |
Finland | FIN | |
France | FRA | |
Georgia | GEO | |
Germany | DEUTNP | |
Germany, East | DEUTE | |
Germany, West | DEUTW | |
Greece | GRC | |
Greenland | GRL | |
Hong Kong | HKG | |
Hungary | HUN | |
Iceland | ISL | |
India | IND | |
Indonesia | IDN | |
Iran | IRN | |
Ireland | IRL | |
Israel | ISR | |
Italy | ITA | |
Japan | JPN | |
Jordan | JOR | |
Kazakhstan | KAZ | |
Kenya | KEN | |
Kosovo | RKS | |
Latvia | LVA | |
Liechtenstein | LIE | |
Lithuania | LTU | |
Luxembourg | LUX | |
Macedonia | MKD | |
Malaysia | MYS | |
Malta | MLT | |
Mauritius | MUS | |
Mexico | MEX | |
Micronesia | FSM | |
Moldova | MDA | |
Mongolia | MNG | |
Montenegro | MNE | |
Netherlands | NLD | |
New Zealand | NZL | |
Norway | NOR | |
Pakistan | PAK | |
Palau | PLW | |
Panama | PAN | |
Philippines | PHL | |
Poland | POL | |
Portugal | PRT | |
Qatar | QAT | |
Republic of Korea | KOR | |
Romania | ROU | |
Russia | RUS | |
San Marino | SMR | |
Serbia | SRB | |
Serbia and Montenegro | SCG | |
Seychelles | SYC | |
Singapore | SGP | |
Slovakia | SVK | |
Slovenia | SVN | |
Spain | ESP | |
Sri Lanka | LKA | |
Sweden | SWE | |
Switzerland | CHE | |
Taiwan | TWN | |
Thailand | THA | |
Tunisia | TUN | |
Turkey | TUR | |
UK | GBR_NP | |
UK, England and Wales | GBRTENW | |
UK, Northern Ireland | GBR_NIR | |
UK, Scotland | GBR_SCO | |
Ukraine | UKR | |
United States of America | USA | |
Uruguay | URY | |
Yugoslavia | YUG |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# --------------------------------------------- | |
# This data is from http://www.fertilitydata.org | |
# the "ASFR and CPFR, standardized age scale, All birth orders combined, All HFC Data" zip file. | |
# this is the filename as given in that zipped file: | |
HFC <- read.table("HFC_ASFRstand_TOT.txt", | |
sep = ",", | |
header = TRUE, | |
stringsAsFactors = FALSE, | |
strip.white = TRUE, | |
na.strings = ".") | |
# This selection sequence is hackish. The HFC has potentially different | |
# versions of the data for each country in the year 2013, so I wanted to | |
# select down to just a single series per country. Ugly ugly code here: | |
# let's take 2013 data. pretty recent, lots of countries | |
HFC <- HFC[HFC$Year1 == 2013 & HFC$AgeDef == "ACY" & !is.na(HFC$Vitality), ] | |
HFC <- HFC[order(HFC$Country, HFC$Split, HFC$Collection, HFC$Age), ] | |
# archaic way of solving the 'multiple splits problem' here: | |
# there must be a better way of doing this | |
HFCL1 <- split(HFC, list(HFC$Country)) | |
# make nested list, now cutting out 'Splits' | |
HFCL2 <- lapply(HFCL1, function(X){ split(X,X$Split)}) | |
# then select the first one only | |
HFCL3 <- lapply(HFCL2, "[[", 1) | |
# and glue back together into a long data.frame | |
HFC <- do.call(rbind, HFCL3) | |
# repeat for Collection | |
HFCL1 <- split(HFC, list(HFC$Country)) | |
HFCL2 <- lapply(HFCL1, function(X){ split(X,X$Collection)}) | |
HFCL3 <- lapply(HFCL2, "[[", 1) | |
HFC <- do.call(rbind, HFCL3) | |
#head(HFC) | |
# reworked version of joypoly(), flexible to different age ranges | |
joypoly2 <- function(Fx, Age, y, col = "#194f46", border = "#38f0ac"){ | |
# a more robust way to handle differing age ranges | |
this.df <- data.frame(Age = 10:59, Fx = 0) | |
this.df$Fx[this.df$Age %in% Age] <- Fx | |
polygon(this.df$Age, this.df$Fx + y, # note, the arguments have generic names | |
col = col, # since the actual data we pass in will be | |
# different each time | |
border = border, # this is how we pass in args, can have same names! | |
lwd = 2) | |
} | |
# how many countries? | |
countries <- unique(HFC$Country) | |
Ncurves <- length(countries) # how many? | |
# vertical spacing between new curves: | |
space <- .06 # just a guess. too crowded? | |
# baselines are therefore: | |
baselines <- seq(0, space * Ncurves, length = Ncurves) | |
# determine y range of plot programmatically | |
ylim <- c(0, space * Ncurves + .15) | |
#country names ripped from website, cleaned up into csv, attached to this gist. | |
country.codes <- read.csv("countries.csv", stringsAsFactors = FALSE) | |
# this is a recode vector. | |
country.names <- country.codes$Name | |
names(country.names) <- country.codes$Code | |
#graphics.off() | |
#dev.new(height = 12, width = 5) | |
HFC$ASFR[is.na(HFC$ASFR)] <- 0 | |
# sort countries by TFR | |
TFR <- tapply(HFC$ASFR, HFC$Country, sum) | |
TFR <- sort(TFR) | |
countries <- names(TFR) | |
pdf("joyHFC.pdf", height = 12, width = 5.6) | |
par(xaxs = "i", yaxs = "i", mai = c(.1, 1.6, .1, .1)) | |
plot(NULL, type = "n", | |
xlim = c(13,48), | |
ylim = ylim, | |
axes = FALSE, | |
xlab = "", | |
ylab = "") | |
# draw polygons in loop | |
for (i in 1:length(countries)){ | |
Fx <- HFC$ASFR[HFC$Country == countries[i]] | |
Age <- HFC$Age[HFC$Country == countries[i]] | |
# toggle polygon color | |
col <- ifelse(i %% 2 == 0, "#194f46", adjustcolor("#194f46", offset = c(.1, .1, .1, 0))) | |
joypoly2(Fx, Age, rev(baselines)[i], col = col) | |
} | |
# label with full country names | |
text(12, rev(baselines), country.names[countries], xpd = TRUE, pos = 2) | |
dev.off() | |
# end. The labels were worked up in Inkscape. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment