Created
October 28, 2019 03:01
-
-
Save monipip3/7634c9a3bd5957698e90a7542d760e2c to your computer and use it in GitHub Desktop.
Webscraping all the Bureau Labor Stats tables and joining them into an R data frame
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(purrr) | |
setwd("~/Desktop") | |
library(rvest) | |
if(!file.exists("./data/labor_data")) {dir.create("./data/labor_data")} | |
suppressPackageStartupMessages(library(lubridate)) | |
suppressPackageStartupMessages(library(rvest)) | |
#save url into a variable | |
url <- "https://www.bls.gov/lau/#tables" | |
#download the html content using read_html | |
download.file(url,destfile="./data/uslabor.html") | |
us_county_labor_html <- read_html("./data/uslabor.html") | |
#extract the xslx | |
us_county_labor_html %>% | |
rvest::html_nodes("ul") %>% | |
rvest::html_nodes("li") %>% | |
rvest::html_nodes("a") %>% | |
rvest::html_attr("href") %>% | |
str_subset(".xlsx$") -> us_labor_urls | |
#domain | |
domain <- "https://www.bls.gov" | |
#paste domain to urls | |
str_c(domain,us_labor_urls) -> us_labor_urls | |
#only need years from 2000 to 2016 | |
us_labor_urls[3:19] -> us_labor_2000_2016 | |
years <- rep(2000:2016,1) | |
#a for loop that downloads each file | |
for(i in seq_along(us_labor_2000_2016)){ | |
download.file(us_labor_2000_2016[i],destfile = paste("./data/labor_data/",years[i],".xslx",sep=""),mode="wb") | |
} | |
#save the files pertaining to us labor | |
labor_files <- dir("./data/labor_data") | |
#create a function that downloads each url and saves it #into a dataframe | |
read_files <- function(x){ | |
read_excel(path= paste("./data/labor_data",x,sep=""),skip = 7,col_names = c("laus_code","state_fips_code","county_fips_code","county_name","year","","labor_force","employed","unemployed","unemployment_rate"),sheet = 1,na="") | |
} | |
#map the function to read each file | |
map(labor_files,read_files) -> all_labor_data | |
#join all the US labor tables | |
all_labor_data %>% reduce(full_join) -> all_labor_data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment