Skip to content

Instantly share code, notes, and snippets.

@timriffe
Created December 4, 2011 13:17
Show Gist options
  • Save timriffe/1430171 to your computer and use it in GitHub Desktop.
Save timriffe/1430171 to your computer and use it in GitHub Desktop.
HFDget(), a function for getting HFD data into R
# instructions:
#1) Register for the Human Fertility Database (http://www.humanfertility.org)
#2) Download the complete zip file from this page: http://www.humanfertility.org/cgi-bin/zipfiles.php (it's the #one labeled "All types of HFD data"
#3) make a folder whereever you keep your data, and call it something memorable, like, 'HFD'
#4) unpack the HFD zip file, which will have a couple nested folders, the last of which contains the text files #for each variable/series.- move these into your HFD directory.
# then the following function ought to work just fine for now:
HFDget <- function(abbrev = TRUE, countries = FALSE, years = FALSE, NoCohortNAs = TRUE, path) {
# where the files at?
if (missing(path)){
path <- choose.dir()
}
# remove '+' and '-'
parseages1 <- function(x){
gsub(pattern="\\-",replacement="",x)
}
parseages2 <- function(x){
as.integer(gsub(pattern="\\+",replacement="",x))
}
# descriptions
getdescription <- function(x){
readLines(paste(path,x,sep=""),n=1)
}
HFDfiles <- list.files(path)
HFDshorts <- gsub(pattern = ".txt", replacement = "",HFDfiles)
if (abbrev==FALSE){
fullnames <- unlist(sapply(HFDfiles, getdescription))
selection <- HFDfiles[fullnames == select.list(fullnames)]
} else {
selection <- HFDfiles[HFDshorts == select.list(HFDshorts)]
}
DATA <- read.table(paste(path, "\\", selection, sep = ""), skip = 2, na.strings = ".", as.is = TRUE, header = TRUE)
# remove "+" and "-" from open ages
if ("Age" %in% colnames(DATA)){
DATA$Age <- parseages2(parseages1(DATA$Age))
}
# ------- select countries -----------
# get a vector of country codes
if ("Code" %in% colnames(DATA)){
ctries <- unique(DATA$Code)
# optional countries
if (is.logical(countries)){
if (countries == TRUE){
ctry <- select.list(ctries, multiple = TRUE)
ind <- DATA$Code %in% ctry
DATA <- DATA[ind, ]
}
} else {
if (is.character(countries)){
ind <- DATA$Code %in% countries
DATA <- DATA[ind,]
} else {
cat("\n\'countries\' must be either a logical, if TRUE, a selection menu will pop up,\nor a character vector of HFD country codes\nNo special country selection will be made.")
}
}
}
# ------- select years -----------
# get a vector of country codes
if ("Year" %in% colnames(DATA)){
yrs <- unique(DATA$Year)
# optional countries
if (is.logical(years)){
if (years==TRUE){
yr <- select.list(yrs, multiple = TRUE)
ind <- DATA$Year %in% yr
DATA <- DATA[ind, ]
}
} else {
if (is.integer(years)){
ind <- DATA$Year %in% years
DATA <- DATA[ind, ]
} else {
cat("\n\'years\' must be either a logical, if TRUE, a selection menu will pop up,\nor a numeric vector of years\nNo special country selection will be made.")
}
}
}
# in case user wants to impute values for cohorts in open age groups:
if (NoCohortNAs == TRUE){
if ("Cohort" %in% colnames(DATA)){
if (any(is.na(DATA$Cohort))){
# maybe not the clearest code here:
ind <- DATA$Age == 12 & is.na(DATA$Cohort)
DATA$Cohort[ind] <- DATA$Cohort[c(FALSE,ind[-length(ind)])]
ind <- DATA$Age == 55 & is.na(DATA$Cohort)
DATA$Cohort[ind] <- DATA$Cohort[c(ind[-1],FALSE)]
}
}
}
return(DATA)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment