Skip to content

Instantly share code, notes, and snippets.

@twedl
Created August 29, 2016 11:48
Show Gist options
  • Save twedl/73d2e84246deefed77757f8c5a30a68d to your computer and use it in GitHub Desktop.
Save twedl/73d2e84246deefed77757f8c5a30a68d to your computer and use it in GitHub Desktop.
Download World Trade Database (R)
# Download the World Trade Database from http://cid.econ.ucdavis.edu/ucdstats.html
# From documentation: value is in thousands of current USD.
library(reshape2)
library(dplyr)
library(readr)
process_file <- function(f_name) {
# returns unzipped file.
# create tempfile to remove later.
tf <- tempfile(pattern="wtdb",fileext="zip")
# download file at url `f_name` to tempfile `tf`
download.file(url=f_name, destfile=tf, method='curl')
# file is zipped; unzip it. there's two files inside, take the first one (second one is a readme)
file <- unzip(tf)[[1]]
# remove created tempfile
file.remove(tf)
# return the filename of the downloaded file
return(file)
}
# downloads the data, returns a dataframe with years 80-99 and all SITC together.
main <- function() {
c <- process_file("http://cid.econ.ucdavis.edu/data/ucdstatcan/COUNTRY.TXT.zip")
# save country names and codes.
names <- read_table(c,skip=7,col_names=c("code","name"),col_types=cols(code=col_character(),name=col_character()))
# function to download/unzip the actual data
# args = list(year=,index=)
# year=80,...,97; index=1,2,3
load_wtdb <- function(args) {
year <- args[[1]]
index <- args[[2]]
f_name <- paste("http://cid.econ.ucdavis.edu/data/ucdstatcan/Wtdb",year,"_",index,".asc.txt.zip",sep="")
x <- process_file(f_name)
z <- read_table(x,col_names=c("imp_code","sitc","exp_code","value"),col_types=cols(.default='c', value='d')) #imp_code=col_character(),sitc=col_character(),exp_code=col_character(),value=col_double()))
z <- z %>% mutate(year = 1900 + year) # make sure the year observation exists
return(z)
}
# need to create the list of year/index pairs to download the full dataset
x <- expand.grid(i=80:97,j=1:3)
# split the data frame to get a list of argument pairs
y <- split(x, 1:dim(x)[1])
# apply the load_wtdb function to each pair in the list
z <- lapply(y,FUN=load_wtdb)
# z is a list of data frames;
w <- Reduce(rbind,z) # 6.3 million rows.
# merge on the country names using the country code list
w <- w %>%
left_join(names,by=c("imp_code" = "code")) %>%
rename(imp_name = name) # make sure we keep track of which name is the importer
# merge on the country names using the country code list;
w <- w %>%
left_join(names,by=c("exp_code" = "code")) %>%
rename(exp_name = name) # make sure we keep track of which name is the exporter
w <- w %>% select(-imp_code,-exp_code)
return(w)
}
# actually run it.
w <- main()
# and save the data.
write_csv(w,"wtdb_clean.csv")
library(dplyr)
library(readr)
# load ggplot2
library(ggplot2)
# and cowplot---required to switch the x-axis to the top so you can read it where it matters.
library(cowplot)
# if you've already run main() and saved the data, do this instead
# so you don't need to re-download the data
w <- read_csv("wtdb_clean.csv")
# harder if you have data that includes internal trade.
exports <- w %>%
filter(year==1997) %>%
group_by(exp_name) %>%
mutate(value = value / 10^6) %>% # switch to billions
summarize(value = sum(value))
p <- ggplot(exports,
aes(reorder(exp_name, value),value)) + # re-order country names so USA is on the "right" side of
# the x-axis (to be flipped later)
geom_point() +
coord_flip() +
labs(y="Value (Billons of current USD)",
x="Country", title="Total exports, 1997")
ggdraw(switch_axis_position(p +
theme_gray() +
theme(axis.text.y = element_text(size=4)),
axis = 'x')) # switch x-axis
ggsave("exports-1997.png",width=5,height=16)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment