Skip to content

Instantly share code, notes, and snippets.

Created Oct 28, 2019
What would you like to do?
Webscraping all the Bureau Labor Stats tables and joining them into an R data frame
if(!file.exists("./data/labor_data")) {dir.create("./data/labor_data")}
#save url into a variable
url <- ""
#download the html content using read_html
us_county_labor_html <- read_html("./data/uslabor.html")
#extract the xslx
us_county_labor_html %>%
rvest::html_nodes("ul") %>%
rvest::html_nodes("li") %>%
rvest::html_nodes("a") %>%
rvest::html_attr("href") %>%
str_subset(".xlsx$") -> us_labor_urls
domain <- ""
#paste domain to urls
str_c(domain,us_labor_urls) -> us_labor_urls
#only need years from 2000 to 2016
us_labor_urls[3:19] -> us_labor_2000_2016
years <- rep(2000:2016,1)
#a for loop that downloads each file
for(i in seq_along(us_labor_2000_2016)){
download.file(us_labor_2000_2016[i],destfile = paste("./data/labor_data/",years[i],".xslx",sep=""),mode="wb")
#save the files pertaining to us labor
labor_files <- dir("./data/labor_data")
#create a function that downloads each url and saves it #into a dataframe
read_files <- function(x){
read_excel(path= paste("./data/labor_data",x,sep=""),skip = 7,col_names = c("laus_code","state_fips_code","county_fips_code","county_name","year","","labor_force","employed","unemployed","unemployment_rate"),sheet = 1,na="")
#map the function to read each file
map(labor_files,read_files) -> all_labor_data
#join all the US labor tables
all_labor_data %>% reduce(full_join) -> all_labor_data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment