ulfelder/freedomintheworld.converter

## freedomintheworld.converter
# This script converts Freedom House's annual Freedom in the World data from
# the ugly, table-like Excel format in which it's posted into a data frame in R.
# It is set up to work on future updates, too, and should automatically adjust
# as years or countries are added.
#
# Before running this script:
#
# 1. Open a browser & go to http://www.freedomhouse.org/report-types/freedom-world
#
# 2. Download the file "Country ratings and status, FIW 1973-2013(EXCEL)"
#
# 3. Open that file.
#
# 4. Under the File menu, click on "Save As."
#
# 5. Select your R working directory as the destination, name the file "fiw2013",
#    and set "Save as type" to "Text (Tab delimited)".  Click "Yes" in response
#    to the warning that follows.
#
# Then run the following in R.

library(reshape)

fiw <- read.delim("fiw2013.txt", header = FALSE)  # Assumes file is in working directory
fiw2 <- fiw[9:dim(fiw)[1],]  # Lops off junk rows at the top
row.names(fiw2) <- NULL   # Removes bogus variable names
today <- Sys.Date()
thisyear <- as.numeric(substr(today,1,4))
ncountries <- nrow(fiw2[nchar(as.character(fiw2$V1)) > 1,])  # Counts countries
nyears <- thisyear - 1972 - 1  # Counts years, adjusts for no data for 1981
fiw3 <- fiw2[1:ncountries,1:((3 * nyears) + 1)]  # Trims the rest of the fat
fiwvars <- c("name", rep(c("civlibs", "polrights", "status"),
             times = thisyear - 1972 - 1) )  # Again, -1 bc no data for 1981
names(fiw3) <- fiwvars
fiw4 <- t(fiw3)
countries <- fiw4[1,]
fiw5 <- fiw4[-1,]
fiw6 <- melt(fiw5)
fiwyears <- c(rep(1972:1980, each = 3), rep(1982:(thisyear - 1), each = 3))
name <- rep(countries, each = length(fiwyears))
year <- rep(fiwyears, times = length(countries))
fiw7 <- cbind(name, year, fiw6)
fiw7[,4] <- NULL
names(fiw7) <- c("name", "year", "var", "value")

civlibs <- subset(fiw7, var=="civlibs")
civlibs[,3] <- NULL
names(civlibs) <- c("name", "year", "civlibs")
polrights <- subset(fiw7, var=="polrights")
polrights[,3] <- NULL
names(polrights) <- c("name", "year", "polrights")
status <- subset(fiw7, var=="status")
status[,3] <- NULL
names(status) <- c("name", "year", "status")
fiw8 <- merge(civlibs, polrights)
fiw9 <- merge(fiw8, status)

fiw9$name <- as.character(fiw9$name)
fiw9$civlibs <- as.numeric(as.character(fiw9$civlibs))
fiw9$polrights <- as.numeric(as.character(fiw9$polrights))
fiw9$status <- as.character(fiw9$status)
fiw10 <- subset(fiw9, is.na(civlibs)==FALSE)
fiw <- fiw10

rm(fiw2,fiw3,fiw4,fiw5,fiw6,fiw7,fiw8,fiw9,fiw10)

# After running this script, you'll probably want to add ID codes
# for countries to facilitate merging with other TSCS data. The
# 'countrycode' package is useful here. For example, to get
# Correlates of War numeric codes, you could run:

library(countrycode)
fiw$ccode <- countrycode(fiw$name, "country.name", "cown", warn = TRUE)

# Note that this process usually requires some massaging to get
# matches for all cases. One way to do this is to open countrycode_data,
# find the names it's using for the cases that don't match, replace
# the relevant country names in 'fiw' with the ones countrycode wants,
# and iterate this until all names match properly.
	# This script converts Freedom House's annual Freedom in the World data from
	# the ugly, table-like Excel format in which it's posted into a data frame in R.
	# It is set up to work on future updates, too, and should automatically adjust
	# as years or countries are added.
	#
	# Before running this script:
	#
	# 1. Open a browser & go to http://www.freedomhouse.org/report-types/freedom-world
	#
	# 2. Download the file "Country ratings and status, FIW 1973-2013(EXCEL)"
	#
	# 3. Open that file.
	#
	# 4. Under the File menu, click on "Save As."
	#
	# 5. Select your R working directory as the destination, name the file "fiw2013",
	# and set "Save as type" to "Text (Tab delimited)". Click "Yes" in response
	# to the warning that follows.
	#
	# Then run the following in R.

	library(reshape)

	fiw <- read.delim("fiw2013.txt", header = FALSE) # Assumes file is in working directory
	fiw2 <- fiw[9:dim(fiw)[1],] # Lops off junk rows at the top
	row.names(fiw2) <- NULL # Removes bogus variable names
	today <- Sys.Date()
	thisyear <- as.numeric(substr(today,1,4))
	ncountries <- nrow(fiw2[nchar(as.character(fiw2$V1)) > 1,]) # Counts countries
	nyears <- thisyear - 1972 - 1 # Counts years, adjusts for no data for 1981
	fiw3 <- fiw2[1:ncountries,1:((3 * nyears) + 1)] # Trims the rest of the fat
	fiwvars <- c("name", rep(c("civlibs", "polrights", "status"),
	times = thisyear - 1972 - 1) ) # Again, -1 bc no data for 1981
	names(fiw3) <- fiwvars
	fiw4 <- t(fiw3)
	countries <- fiw4[1,]
	fiw5 <- fiw4[-1,]
	fiw6 <- melt(fiw5)
	fiwyears <- c(rep(1972:1980, each = 3), rep(1982:(thisyear - 1), each = 3))
	name <- rep(countries, each = length(fiwyears))
	year <- rep(fiwyears, times = length(countries))
	fiw7 <- cbind(name, year, fiw6)
	fiw7[,4] <- NULL
	names(fiw7) <- c("name", "year", "var", "value")

	civlibs <- subset(fiw7, var=="civlibs")
	civlibs[,3] <- NULL
	names(civlibs) <- c("name", "year", "civlibs")
	polrights <- subset(fiw7, var=="polrights")
	polrights[,3] <- NULL
	names(polrights) <- c("name", "year", "polrights")
	status <- subset(fiw7, var=="status")
	status[,3] <- NULL
	names(status) <- c("name", "year", "status")
	fiw8 <- merge(civlibs, polrights)
	fiw9 <- merge(fiw8, status)

	fiw9$name <- as.character(fiw9$name)
	fiw9$civlibs <- as.numeric(as.character(fiw9$civlibs))
	fiw9$polrights <- as.numeric(as.character(fiw9$polrights))
	fiw9$status <- as.character(fiw9$status)
	fiw10 <- subset(fiw9, is.na(civlibs)==FALSE)
	fiw <- fiw10

	rm(fiw2,fiw3,fiw4,fiw5,fiw6,fiw7,fiw8,fiw9,fiw10)

	# After running this script, you'll probably want to add ID codes
	# for countries to facilitate merging with other TSCS data. The
	# 'countrycode' package is useful here. For example, to get
	# Correlates of War numeric codes, you could run:

	library(countrycode)
	fiw$ccode <- countrycode(fiw$name, "country.name", "cown", warn = TRUE)

	# Note that this process usually requires some massaging to get
	# matches for all cases. One way to do this is to open countrycode_data,
	# find the names it's using for the cases that don't match, replace
	# the relevant country names in 'fiw' with the ones countrycode wants,
	# and iterate this until all names match properly.