Created
November 1, 2017 10:01
-
-
Save slarge/1b2245f90eadc96cb259ccc7810bad30 to your computer and use it in GitHub Desktop.
explore ICES advice with tabulizer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Exploring ICES advice ## | |
library(tabulizer) | |
library(glue) | |
library(dplyr) | |
library(stringr) | |
library(purrr) | |
library(httr) | |
library(jsonlite) | |
rawsd <- jsonlite::fromJSON("http://sd.ices.dk/services/odata3/StockListDWs3")$value %>% | |
filter(ActiveYear == 2017) %>% | |
mutate(stock_code = dplyr::case_when(YearOfLastAssessment <= 2016 ~ PreviousStockKeyLabel, | |
YearOfLastAssessment >= 2017 ~ StockKeyLabel, | |
TRUE ~ NA_character_), | |
advice_url = glue::glue("http://www.ices.dk/sites/pub/Publication%20Reports/Advice/{YearOfLastAssessment}/{YearOfLastAssessment}/{stock_code}.pdf"), | |
valid_url = purrr::map(advice_url, httr::http_error) == FALSE) %>% | |
select(StockKeyDescription, | |
StockKeyLabel, | |
YearOfLastAssessment, | |
advice_url, | |
valid_url) | |
## This downloads all of the first pages of advice into a data.frame. The same concept could be done with tables | |
advice_table <- rawsd %>% | |
filter(valid_url == TRUE) #%>% # Note, there are ~15 that don't have a valid url... this should be explored | |
# mutate(advice_sentence = purrr::map(advice_url, tabulizer::extract_tables, pages = 1, method = "data.frame")) | |
td <- tabulizer::extract_tables(advice_table$advice_url[1], guess = FALSE, method = "data.frame") | |
td <- data.frame(VAL = as.character(td[[1]]), stringsAsFactors = FALSE) | |
## Advice value | |
## TAC over time | |
## Total wanted and unwanted catch | |
## Advice at FMSY | |
start_row <- grep("ICES stock advice", td$VAL) | |
end_row <- grep("Stock development over time", td$VAL) | |
td$VAL[start_row] | |
library(icesVocab) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment