timriffe/HFDget.R

## HFDget.R
# instructions:

#1) Register for the Human Fertility Database (http://www.humanfertility.org)
#2) Download the complete zip file from this page: http://www.humanfertility.org/cgi-bin/zipfiles.php (it's the #one labeled "All types of HFD data"
#3) make a folder whereever you keep your data, and call it something memorable, like, 'HFD'
#4) unpack the HFD zip file, which will have a couple nested folders, the last of which contains the text files #for each variable/series.- move these into your HFD directory.

# then the following function ought to work just fine for now:

HFDget <- function(abbrev = TRUE, countries = FALSE, years = FALSE, NoCohortNAs = TRUE, path) {
	# where the files at?
	if (missing(path)){
		path <- choose.dir()
	}

	# remove '+' and '-'
	parseages1 <- function(x){
		gsub(pattern="\\-",replacement="",x)
	}
	parseages2 <- function(x){
		as.integer(gsub(pattern="\\+",replacement="",x))
	}

	# descriptions
	getdescription <- function(x){
		readLines(paste(path,x,sep=""),n=1)
	}

	HFDfiles <- list.files(path)
	HFDshorts <- gsub(pattern = ".txt", replacement = "",HFDfiles)

	if (abbrev==FALSE){
		fullnames <- unlist(sapply(HFDfiles, getdescription))
		selection <- HFDfiles[fullnames == select.list(fullnames)]
	} else {
		selection <- HFDfiles[HFDshorts == select.list(HFDshorts)]
	}

	DATA <- read.table(paste(path, "\\", selection, sep = ""), skip = 2, na.strings = ".", as.is = TRUE, header = TRUE)

	# remove "+" and "-" from open ages
	if ("Age" %in% colnames(DATA)){
		DATA$Age <- parseages2(parseages1(DATA$Age))
	}

	# ------- select countries -----------
	# get a vector of country codes
	if ("Code" %in% colnames(DATA)){
		ctries <- unique(DATA$Code)

		# optional countries
		if (is.logical(countries)){
			if (countries == TRUE){
				ctry <- select.list(ctries, multiple = TRUE)
				ind <- DATA$Code %in% ctry
				DATA <- DATA[ind, ]
			}
		} else {
			if (is.character(countries)){
				ind <- DATA$Code %in% countries
				DATA <- DATA[ind,]
			} else {
				cat("\n\'countries\' must be either a logical, if TRUE, a selection menu will pop up,\nor a character vector of HFD country codes\nNo special country selection will be made.")
			}
		}
	}
	# ------- select years -----------
	# get a vector of country codes

	if ("Year" %in% colnames(DATA)){
		yrs <- unique(DATA$Year)

		# optional countries
		if (is.logical(years)){
			if (years==TRUE){
				yr <- select.list(yrs, multiple = TRUE)
				ind <- DATA$Year %in% yr
				DATA <- DATA[ind, ]
			}
		} else {
			if (is.integer(years)){
				ind <- DATA$Year %in% years
				DATA <- DATA[ind, ]
			} else {
				cat("\n\'years\' must be either a logical, if TRUE, a selection menu will pop up,\nor a numeric vector of years\nNo special country selection will be made.")
			}
		}
	}

	# in case user wants to impute values for cohorts in open age groups:
	if (NoCohortNAs == TRUE){
		if ("Cohort" %in% colnames(DATA)){
			if (any(is.na(DATA$Cohort))){
				# maybe not the clearest code here:
				ind <- DATA$Age == 12 & is.na(DATA$Cohort)
				DATA$Cohort[ind] <- DATA$Cohort[c(FALSE,ind[-length(ind)])]

				ind <- DATA$Age == 55 & is.na(DATA$Cohort)
				DATA$Cohort[ind] <- DATA$Cohort[c(ind[-1],FALSE)]
			}
		}
	}
	return(DATA)
}
	# instructions:

	#1) Register for the Human Fertility Database (http://www.humanfertility.org)
	#2) Download the complete zip file from this page: http://www.humanfertility.org/cgi-bin/zipfiles.php (it's the #one labeled "All types of HFD data"
	#3) make a folder whereever you keep your data, and call it something memorable, like, 'HFD'
	#4) unpack the HFD zip file, which will have a couple nested folders, the last of which contains the text files #for each variable/series.- move these into your HFD directory.

	# then the following function ought to work just fine for now:

	HFDget <- function(abbrev = TRUE, countries = FALSE, years = FALSE, NoCohortNAs = TRUE, path) {
	# where the files at?
	if (missing(path)){
	path <- choose.dir()
	}

	# remove '+' and '-'
	parseages1 <- function(x){
	gsub(pattern="\\-",replacement="",x)
	}
	parseages2 <- function(x){
	as.integer(gsub(pattern="\\+",replacement="",x))
	}

	# descriptions
	getdescription <- function(x){
	readLines(paste(path,x,sep=""),n=1)
	}

	HFDfiles <- list.files(path)
	HFDshorts <- gsub(pattern = ".txt", replacement = "",HFDfiles)

	if (abbrev==FALSE){
	fullnames <- unlist(sapply(HFDfiles, getdescription))
	selection <- HFDfiles[fullnames == select.list(fullnames)]
	} else {
	selection <- HFDfiles[HFDshorts == select.list(HFDshorts)]
	}

	DATA <- read.table(paste(path, "\\", selection, sep = ""), skip = 2, na.strings = ".", as.is = TRUE, header = TRUE)

	# remove "+" and "-" from open ages
	if ("Age" %in% colnames(DATA)){
	DATA$Age <- parseages2(parseages1(DATA$Age))
	}

	# ------- select countries -----------
	# get a vector of country codes
	if ("Code" %in% colnames(DATA)){
	ctries <- unique(DATA$Code)

	# optional countries
	if (is.logical(countries)){
	if (countries == TRUE){
	ctry <- select.list(ctries, multiple = TRUE)
	ind <- DATA$Code %in% ctry
	DATA <- DATA[ind, ]
	}
	} else {
	if (is.character(countries)){
	ind <- DATA$Code %in% countries
	DATA <- DATA[ind,]
	} else {
	cat("\n\'countries\' must be either a logical, if TRUE, a selection menu will pop up,\nor a character vector of HFD country codes\nNo special country selection will be made.")
	}
	}
	}
	# ------- select years -----------
	# get a vector of country codes

	if ("Year" %in% colnames(DATA)){
	yrs <- unique(DATA$Year)

	# optional countries
	if (is.logical(years)){
	if (years==TRUE){
	yr <- select.list(yrs, multiple = TRUE)
	ind <- DATA$Year %in% yr
	DATA <- DATA[ind, ]
	}
	} else {
	if (is.integer(years)){
	ind <- DATA$Year %in% years
	DATA <- DATA[ind, ]
	} else {
	cat("\n\'years\' must be either a logical, if TRUE, a selection menu will pop up,\nor a numeric vector of years\nNo special country selection will be made.")
	}
	}
	}

	# in case user wants to impute values for cohorts in open age groups:
	if (NoCohortNAs == TRUE){
	if ("Cohort" %in% colnames(DATA)){
	if (any(is.na(DATA$Cohort))){
	# maybe not the clearest code here:
	ind <- DATA$Age == 12 & is.na(DATA$Cohort)
	DATA$Cohort[ind] <- DATA$Cohort[c(FALSE,ind[-length(ind)])]

	ind <- DATA$Age == 55 & is.na(DATA$Cohort)
	DATA$Cohort[ind] <- DATA$Cohort[c(ind[-1],FALSE)]
	}
	}
	}
	return(DATA)
	}