Skip to content

Instantly share code, notes, and snippets.

@kpq
Last active December 26, 2015 11:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kpq/7147531 to your computer and use it in GitHub Desktop.
Save kpq/7147531 to your computer and use it in GitHub Desktop.
Scrape all U.S. adoptions by country of birth and year from the state department
get_country_data <- function(country_name) {
# this puts a plus instead of the space, which is how the URL behaves
country_name <- gsub(" ", "+", country_name)
#this fixes ivory coast
country_name <- gsub("`", "%60", country_name)
# first part of the url
first_part <- "http://adoption.state.gov/maps/statistics/map_files/statistics.php?special=NONE&year=ALL&country="
#last part of the url
last_part <- "&state=NONE&returnType=TABLE"
#get the url for any country
url <- paste(first_part, country_name, last_part, sep = "")
#fetch it from the internet
this_country <- readHTMLTable(url)
#cast it as a data frame
this_country <- data.frame(this_country)
#rename the columns
colnames(this_country) <- c("year", "adoptions")
#sorry, there are factors
this_country$year <- as.numeric(as.character(this_country$year))
this_country$adoptions <- as.numeric(as.character(this_country$adoptions))
this_country$country <- country_name
return (this_country)
}
#a full list of countries, pasted from the site and formatted by hand
# you might prefer to load them from a csv...whichever works
countries <- c("Afghanistan","Albania","Algeria","Antigua and Barbuda","Argentina","Armenia","Australia","Austria","Azerbaijan","THE BAHAMAS","Bangladesh","Barbados","Belarus","Belgium","Belize","Benin","Bermuda","Bhutan","Bolivia","Bosnia-Herzegovina","Botswana","Brazil","Bulgaria","Burkina Faso","Burundi","Cambodia","Cameroon","Canada","Cape Verde","Central African Republic","Chad","Chile","China","Colombia","Congo-Kinshasa","Congo-Brazzaville","Costa Rica","Cote d`Ivoire","Croatia","Cuba","Cyprus","Czech Republic","Djibouti","Dominica","Dominican Republic","Ecuador","Egypt","El Salvador","Equatorial Guinea","Eritrea","Estonia","Ethiopia","Fiji","Finland","France","Gabon","The Gambia","Georgia","Germany","Ghana","Greece","Grenada","Guatemala","Guinea","Guinea-Bissau","Guyana","Haiti","Honduras","Hong Kong","Hungary","Iceland","India","Indonesia","Iran","Iraq","Ireland","Israel","Jamaica","Japan","Jordan","Kyrgyz Republic","Kenya","Kiribati","Kosovo","Kazakhstan","Laos","Latvia","Lebanon","Lesotho","Liberia","Libya","Lithuania","Macedonia","Madagascar","Malawi","Malaysia","Mali","MARSHALL ISLANDS, REPUBLIC OF THE","Mauritius","Mexico","MICRONESIA, FEDERATED STATES OF","Moldova","Mongolia","Montenegro","Morocco","Mozambique","Myanmar","Namibia","Nepal","Netherlands","New Zealand","Nicaragua","Niger","Nigeria","Norway","Oman","Pakistan","Palestinian Authority","Panama","Papua New Guinea","Paraguay","Peru","Philippines","Poland","Portugal","Romania","Russia","Rwanda","Samoa","Saudi Arabia","Senegal","Serbia","Seychelles","Sierra Leone","Singapore","Slovakia","Somalia","South Africa","South Korea","Spain","Sri Lanka","St. Kitts and Nevis","St. Lucia","St. Vincent and the Grenadines","Sudan","Suriname","Swaziland","Switzerland","Syria","Taiwan","Tajikistan","Tanzania","Thailand","Timor-Leste","Togo","Tonga","Trinidad and Tobago","Tunisia","Turkey","Turkmenistan","Uganda","Ukraine","United Kingdom","Uruguay","Uzbekistan","Vanuatu","Venezuela","Vietnam","Yemen","Zambia","Zimbabwe")
#run this function for every country, and store the result in a data frame called 'all'
all <- NULL
for (i in countries) {
print(i)
this_country <- get_country_data(i)
all <- rbind(this_country, all)
}
#done! write it to a csv is you like
write.csv(all, file="all_adoptions.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment