twedl/download_world_trade_db.R

## download_world_trade_db.R
# Download the World Trade Database from http://cid.econ.ucdavis.edu/ucdstats.html
# From documentation: value is in thousands of current USD.
library(reshape2)
library(dplyr)
library(readr)

process_file <- function(f_name) {
  # returns unzipped file.
  # create tempfile to remove later.
  tf <- tempfile(pattern="wtdb",fileext="zip")

  # download file at url `f_name` to tempfile `tf`
  download.file(url=f_name, destfile=tf, method='curl')

  # file is zipped; unzip it. there's two files inside, take the first one (second one is a readme)
  file <- unzip(tf)[[1]]

  # remove created tempfile
  file.remove(tf)

  # return the filename of the downloaded file
  return(file)
}

# downloads the data, returns a dataframe with years 80-99 and all SITC together.
main <- function() {

  c <- process_file("http://cid.econ.ucdavis.edu/data/ucdstatcan/COUNTRY.TXT.zip")

  # save country names and codes.
  names <- read_table(c,skip=7,col_names=c("code","name"),col_types=cols(code=col_character(),name=col_character()))

  # function to download/unzip the actual data
  # args = list(year=,index=)
  # year=80,...,97; index=1,2,3
  load_wtdb <- function(args) {
    year <- args[[1]]
    index <- args[[2]]

    f_name <- paste("http://cid.econ.ucdavis.edu/data/ucdstatcan/Wtdb",year,"_",index,".asc.txt.zip",sep="")
    x <- process_file(f_name)

    z <- read_table(x,col_names=c("imp_code","sitc","exp_code","value"),col_types=cols(.default='c', value='d')) #imp_code=col_character(),sitc=col_character(),exp_code=col_character(),value=col_double()))
    z <- z %>% mutate(year = 1900 + year) # make sure the year observation exists

    return(z)
  }

  # need to create the list of year/index pairs to download the full dataset
  x <- expand.grid(i=80:97,j=1:3)

  # split the data frame to get a list of argument pairs
  y <- split(x, 1:dim(x)[1])

  # apply the load_wtdb function to each pair in the list
  z <- lapply(y,FUN=load_wtdb)

  # z is a list of data frames;
  w <- Reduce(rbind,z) # 6.3 million rows.

  # merge on the country names using the country code list
  w <- w %>%
    left_join(names,by=c("imp_code" = "code")) %>%
    rename(imp_name = name) # make sure we keep track of which name is the importer

  # merge on the country names using the country code list;
  w <- w %>%
    left_join(names,by=c("exp_code" = "code")) %>%
    rename(exp_name = name) # make sure we keep track of which name is the exporter
  w <- w %>% select(-imp_code,-exp_code)
  return(w)
}

# actually run it.
w <- main()

# and save the data.
write_csv(w,"wtdb_clean.csv")


## graph_exports.R
library(dplyr)
library(readr)
# load ggplot2
library(ggplot2)
# and cowplot---required to switch the x-axis to the top so you can read it where it matters.
library(cowplot)

# if you've already run main() and saved the data, do this instead
# so you don't need to re-download the data
w <- read_csv("wtdb_clean.csv")

# harder if you have data that includes internal trade.
exports <- w %>%
  filter(year==1997) %>%
  group_by(exp_name) %>%
  mutate(value = value / 10^6) %>% # switch to billions
  summarize(value = sum(value))

p <- ggplot(exports,
            aes(reorder(exp_name, value),value)) + # re-order country names so USA is on the "right" side of
                                                   # the x-axis (to be flipped later)
  geom_point() +
  coord_flip() +
  labs(y="Value (Billons of current USD)",
       x="Country", title="Total exports, 1997")

ggdraw(switch_axis_position(p +
                              theme_gray() +
                              theme(axis.text.y = element_text(size=4)),
                            axis = 'x')) # switch x-axis

ggsave("exports-1997.png",width=5,height=16)
	# Download the World Trade Database from http://cid.econ.ucdavis.edu/ucdstats.html
	# From documentation: value is in thousands of current USD.
	library(reshape2)
	library(dplyr)
	library(readr)

	process_file <- function(f_name) {
	# returns unzipped file.
	# create tempfile to remove later.
	tf <- tempfile(pattern="wtdb",fileext="zip")

	# download file at url `f_name` to tempfile `tf`
	download.file(url=f_name, destfile=tf, method='curl')

	# file is zipped; unzip it. there's two files inside, take the first one (second one is a readme)
	file <- unzip(tf)[[1]]

	# remove created tempfile
	file.remove(tf)

	# return the filename of the downloaded file
	return(file)
	}

	# downloads the data, returns a dataframe with years 80-99 and all SITC together.
	main <- function() {

	c <- process_file("http://cid.econ.ucdavis.edu/data/ucdstatcan/COUNTRY.TXT.zip")

	# save country names and codes.
	names <- read_table(c,skip=7,col_names=c("code","name"),col_types=cols(code=col_character(),name=col_character()))

	# function to download/unzip the actual data
	# args = list(year=,index=)
	# year=80,...,97; index=1,2,3
	load_wtdb <- function(args) {
	year <- args[[1]]
	index <- args[[2]]

	f_name <- paste("http://cid.econ.ucdavis.edu/data/ucdstatcan/Wtdb",year,"_",index,".asc.txt.zip",sep="")
	x <- process_file(f_name)

	z <- read_table(x,col_names=c("imp_code","sitc","exp_code","value"),col_types=cols(.default='c', value='d')) #imp_code=col_character(),sitc=col_character(),exp_code=col_character(),value=col_double()))
	z <- z %>% mutate(year = 1900 + year) # make sure the year observation exists

	return(z)
	}

	# need to create the list of year/index pairs to download the full dataset
	x <- expand.grid(i=80:97,j=1:3)

	# split the data frame to get a list of argument pairs
	y <- split(x, 1:dim(x)[1])

	# apply the load_wtdb function to each pair in the list
	z <- lapply(y,FUN=load_wtdb)

	# z is a list of data frames;
	w <- Reduce(rbind,z) # 6.3 million rows.

	# merge on the country names using the country code list
	w <- w %>%
	left_join(names,by=c("imp_code" = "code")) %>%
	rename(imp_name = name) # make sure we keep track of which name is the importer

	# merge on the country names using the country code list;
	w <- w %>%
	left_join(names,by=c("exp_code" = "code")) %>%
	rename(exp_name = name) # make sure we keep track of which name is the exporter
	w <- w %>% select(-imp_code,-exp_code)
	return(w)
	}

	# actually run it.
	w <- main()

	# and save the data.
	write_csv(w,"wtdb_clean.csv")
	library(dplyr)
	library(readr)
	# load ggplot2
	library(ggplot2)
	# and cowplot---required to switch the x-axis to the top so you can read it where it matters.
	library(cowplot)

	# if you've already run main() and saved the data, do this instead
	# so you don't need to re-download the data
	w <- read_csv("wtdb_clean.csv")

	# harder if you have data that includes internal trade.
	exports <- w %>%
	filter(year==1997) %>%
	group_by(exp_name) %>%
	mutate(value = value / 10^6) %>% # switch to billions
	summarize(value = sum(value))

	p <- ggplot(exports,
	aes(reorder(exp_name, value),value)) + # re-order country names so USA is on the "right" side of
	# the x-axis (to be flipped later)
	geom_point() +
	coord_flip() +
	labs(y="Value (Billons of current USD)",
	x="Country", title="Total exports, 1997")

	ggdraw(switch_axis_position(p +
	theme_gray() +
	theme(axis.text.y = element_text(size=4)),
	axis = 'x')) # switch x-axis

	ggsave("exports-1997.png",width=5,height=16)