Skip to content

Instantly share code, notes, and snippets.

Created October 28, 2019 03:01
Show Gist options
  • Save monipip3/7634c9a3bd5957698e90a7542d760e2c to your computer and use it in GitHub Desktop.
Save monipip3/7634c9a3bd5957698e90a7542d760e2c to your computer and use it in GitHub Desktop.
Webscraping all the Bureau Labor Stats tables and joining them into an R data frame
if(!file.exists("./data/labor_data")) {dir.create("./data/labor_data")}
#save url into a variable
url <- ""
#download the html content using read_html
us_county_labor_html <- read_html("./data/uslabor.html")
#extract the xslx
us_county_labor_html %>%
rvest::html_nodes("ul") %>%
rvest::html_nodes("li") %>%
rvest::html_nodes("a") %>%
rvest::html_attr("href") %>%
str_subset(".xlsx$") -> us_labor_urls
domain <- ""
#paste domain to urls
str_c(domain,us_labor_urls) -> us_labor_urls
#only need years from 2000 to 2016
us_labor_urls[3:19] -> us_labor_2000_2016
years <- rep(2000:2016,1)
#a for loop that downloads each file
for(i in seq_along(us_labor_2000_2016)){
download.file(us_labor_2000_2016[i],destfile = paste("./data/labor_data/",years[i],".xslx",sep=""),mode="wb")
#save the files pertaining to us labor
labor_files <- dir("./data/labor_data")
#create a function that downloads each url and saves it #into a dataframe
read_files <- function(x){
read_excel(path= paste("./data/labor_data",x,sep=""),skip = 7,col_names = c("laus_code","state_fips_code","county_fips_code","county_name","year","","labor_force","employed","unemployed","unemployment_rate"),sheet = 1,na="")
#map the function to read each file
map(labor_files,read_files) -> all_labor_data
#join all the US labor tables
all_labor_data %>% reduce(full_join) -> all_labor_data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment