monipip3/bls_gov.R

## bls_gov.R
library(purrr)
setwd("~/Desktop")
library(rvest)
if(!file.exists("./data/labor_data")) {dir.create("./data/labor_data")}
suppressPackageStartupMessages(library(lubridate))
suppressPackageStartupMessages(library(rvest))

#save url into a variable
url <- "https://www.bls.gov/lau/#tables"

#download the html content using read_html
download.file(url,destfile="./data/uslabor.html")
us_county_labor_html <- read_html("./data/uslabor.html")

#extract the xslx
us_county_labor_html %>%
   rvest::html_nodes("ul") %>%
        rvest::html_nodes("li") %>%
        rvest::html_nodes("a") %>%
        rvest::html_attr("href") %>%
        str_subset(".xlsx$") -> us_labor_urls

#domain
domain <- "https://www.bls.gov"

#paste domain to urls
str_c(domain,us_labor_urls) -> us_labor_urls

#only need years from 2000 to 2016
us_labor_urls[3:19] -> us_labor_2000_2016
years <- rep(2000:2016,1)

#a for loop that downloads each file
for(i in seq_along(us_labor_2000_2016)){
  download.file(us_labor_2000_2016[i],destfile = paste("./data/labor_data/",years[i],".xslx",sep=""),mode="wb")
}
#save the files pertaining to us labor
labor_files <- dir("./data/labor_data")
#create a function that downloads each url and saves it #into a dataframe

read_files <- function(x){
read_excel(path= paste("./data/labor_data",x,sep=""),skip = 7,col_names = c("laus_code","state_fips_code","county_fips_code","county_name","year","","labor_force","employed","unemployed","unemployment_rate"),sheet = 1,na="")
}

#map the function to read each file
map(labor_files,read_files) -> all_labor_data

#join all the US labor tables
all_labor_data %>% reduce(full_join) -> all_labor_data
	library(purrr)
	setwd("~/Desktop")
	library(rvest)
	if(!file.exists("./data/labor_data")) {dir.create("./data/labor_data")}
	suppressPackageStartupMessages(library(lubridate))
	suppressPackageStartupMessages(library(rvest))

	#save url into a variable
	url <- "https://www.bls.gov/lau/#tables"

	#download the html content using read_html
	download.file(url,destfile="./data/uslabor.html")
	us_county_labor_html <- read_html("./data/uslabor.html")

	#extract the xslx
	us_county_labor_html %>%
	rvest::html_nodes("ul") %>%
	rvest::html_nodes("li") %>%
	rvest::html_nodes("a") %>%
	rvest::html_attr("href") %>%
	str_subset(".xlsx$") -> us_labor_urls

	#domain
	domain <- "https://www.bls.gov"

	#paste domain to urls
	str_c(domain,us_labor_urls) -> us_labor_urls

	#only need years from 2000 to 2016
	us_labor_urls[3:19] -> us_labor_2000_2016
	years <- rep(2000:2016,1)

	#a for loop that downloads each file
	for(i in seq_along(us_labor_2000_2016)){
	download.file(us_labor_2000_2016[i],destfile = paste("./data/labor_data/",years[i],".xslx",sep=""),mode="wb")
	}
	#save the files pertaining to us labor
	labor_files <- dir("./data/labor_data")
	#create a function that downloads each url and saves it #into a dataframe

	read_files <- function(x){
	read_excel(path= paste("./data/labor_data",x,sep=""),skip = 7,col_names = c("laus_code","state_fips_code","county_fips_code","county_name","year","","labor_force","employed","unemployed","unemployment_rate"),sheet = 1,na="")
	}

	#map the function to read each file
	map(labor_files,read_files) -> all_labor_data

	#join all the US labor tables
	all_labor_data %>% reduce(full_join) -> all_labor_data