Created
August 29, 2016 11:48
-
-
Save twedl/73d2e84246deefed77757f8c5a30a68d to your computer and use it in GitHub Desktop.
Download World Trade Database (R)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Download the World Trade Database from http://cid.econ.ucdavis.edu/ucdstats.html | |
# From documentation: value is in thousands of current USD. | |
library(reshape2) | |
library(dplyr) | |
library(readr) | |
process_file <- function(f_name) { | |
# returns unzipped file. | |
# create tempfile to remove later. | |
tf <- tempfile(pattern="wtdb",fileext="zip") | |
# download file at url `f_name` to tempfile `tf` | |
download.file(url=f_name, destfile=tf, method='curl') | |
# file is zipped; unzip it. there's two files inside, take the first one (second one is a readme) | |
file <- unzip(tf)[[1]] | |
# remove created tempfile | |
file.remove(tf) | |
# return the filename of the downloaded file | |
return(file) | |
} | |
# downloads the data, returns a dataframe with years 80-99 and all SITC together. | |
main <- function() { | |
c <- process_file("http://cid.econ.ucdavis.edu/data/ucdstatcan/COUNTRY.TXT.zip") | |
# save country names and codes. | |
names <- read_table(c,skip=7,col_names=c("code","name"),col_types=cols(code=col_character(),name=col_character())) | |
# function to download/unzip the actual data | |
# args = list(year=,index=) | |
# year=80,...,97; index=1,2,3 | |
load_wtdb <- function(args) { | |
year <- args[[1]] | |
index <- args[[2]] | |
f_name <- paste("http://cid.econ.ucdavis.edu/data/ucdstatcan/Wtdb",year,"_",index,".asc.txt.zip",sep="") | |
x <- process_file(f_name) | |
z <- read_table(x,col_names=c("imp_code","sitc","exp_code","value"),col_types=cols(.default='c', value='d')) #imp_code=col_character(),sitc=col_character(),exp_code=col_character(),value=col_double())) | |
z <- z %>% mutate(year = 1900 + year) # make sure the year observation exists | |
return(z) | |
} | |
# need to create the list of year/index pairs to download the full dataset | |
x <- expand.grid(i=80:97,j=1:3) | |
# split the data frame to get a list of argument pairs | |
y <- split(x, 1:dim(x)[1]) | |
# apply the load_wtdb function to each pair in the list | |
z <- lapply(y,FUN=load_wtdb) | |
# z is a list of data frames; | |
w <- Reduce(rbind,z) # 6.3 million rows. | |
# merge on the country names using the country code list | |
w <- w %>% | |
left_join(names,by=c("imp_code" = "code")) %>% | |
rename(imp_name = name) # make sure we keep track of which name is the importer | |
# merge on the country names using the country code list; | |
w <- w %>% | |
left_join(names,by=c("exp_code" = "code")) %>% | |
rename(exp_name = name) # make sure we keep track of which name is the exporter | |
w <- w %>% select(-imp_code,-exp_code) | |
return(w) | |
} | |
# actually run it. | |
w <- main() | |
# and save the data. | |
write_csv(w,"wtdb_clean.csv") | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(readr) | |
# load ggplot2 | |
library(ggplot2) | |
# and cowplot---required to switch the x-axis to the top so you can read it where it matters. | |
library(cowplot) | |
# if you've already run main() and saved the data, do this instead | |
# so you don't need to re-download the data | |
w <- read_csv("wtdb_clean.csv") | |
# harder if you have data that includes internal trade. | |
exports <- w %>% | |
filter(year==1997) %>% | |
group_by(exp_name) %>% | |
mutate(value = value / 10^6) %>% # switch to billions | |
summarize(value = sum(value)) | |
p <- ggplot(exports, | |
aes(reorder(exp_name, value),value)) + # re-order country names so USA is on the "right" side of | |
# the x-axis (to be flipped later) | |
geom_point() + | |
coord_flip() + | |
labs(y="Value (Billons of current USD)", | |
x="Country", title="Total exports, 1997") | |
ggdraw(switch_axis_position(p + | |
theme_gray() + | |
theme(axis.text.y = element_text(size=4)), | |
axis = 'x')) # switch x-axis | |
ggsave("exports-1997.png",width=5,height=16) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment