library(rvest)
library(tidyverse)
library(janitor)
#>
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#>
#> chisq.test, fisher.test
esac <-
# fetch and parse table
rvest::html_table(
rvest::read_html(
"https://esac-initiative.org/about/transformative-agreements/agreement-registry/"
)
)[[1]] %>%
# make nicer column names
janitor::clean_names() %>%
# add url to record
dplyr::mutate(
link = paste0(
"https://esac-initiative.org/about/transformative-agreements/agreement-registry/",
`details_id`,
"/"
)
) %>%
# normalize dates
dplyr::mutate(across(ends_with("date"), function(x)
lubridate::parse_date_time(x, c("m/d/y")) %>%
lubridate::ymd()))
#> Warning: 3 failed to parse.
# show tibble
esac
#> # A tibble: 267 x 8
#> publisher country organization annual_publicat… start_date end_date
#> <chr> <chr> <chr> <int> <date> <date>
#> 1 CSIRO Austral… CAUL 625 2021-01-01 2021-12-31
#> 2 Future Scienc… Austral… CAUL 50 2021-01-01 2022-12-31
#> 3 Karger Austral… CAUL 30 2021-01-01 2021-12-31
#> 4 Microbiology … Austral… CAUL 40 2020-01-01 2021-12-31
#> 5 Portland Press Austral… CAUL 36 2020-01-01 2021-12-31
#> 6 The Geologica… Austral… CAUL 27 2021-01-01 2021-12-31
#> 7 The Royal Soc… Austral… CAUL 255 2021-01-01 2021-12-31
#> 8 AIP Publishing Austria University of… 15 2020-01-01 2022-12-31
#> 9 American Chem… Austria KEMOE 400 2020-01-01 2022-12-31
#> 10 Brill Austria University of… 25 2020-01-01 2020-12-31
#> # … with 257 more rows, and 2 more variables: details_id <chr>, link <chr>
# export to csv
readr::write_csv(esac, "my_esac_dump.csv")
Created on 2021-03-30 by the reprex package (v0.3.0)