Skip to content

Instantly share code, notes, and snippets.

@yjunechoe
Created January 24, 2024 02:22
Show Gist options
  • Save yjunechoe/2116b8ce6221cd84c2dc20325e662aa1 to your computer and use it in GitHub Desktop.
Save yjunechoe/2116b8ce6221cd84c2dc20325e662aa1 to your computer and use it in GitHub Desktop.
chromote + rvest
library(rvest)
library(chromote)
# Open page in headless chrome
url <- "https://www.kulturdirektoratet.no/web/guest/stotteordning/-/vis/digitalisering-mangfold-dialog-samarbeid/tildelinger"
b <- ChromoteSession$new()
b$Page$navigate(url, wait_ = TRUE); Sys.sleep(3)
# Get document
rootnode <- b$DOM$getDocument()$root$nodeId
page <- b$DOM$getOuterHTML(rootnode)$outerHTML %>%
read_html()
# Clean up some junk in the table that messes up parsing with `html_table()`
page %>%
xml2::xml_find_all("//div[@class='dropdown-row'] | //*[contains(@class, 'show-for-small-only')]") %>%
xml2::xml_remove()
# Select and read table as normal
tbl <- page %>%
html_element("#til-table") %>%
html_table()
tbl
#> # A tibble: 53 × 7
#> Søker Prosjekttittel Fylke Vedtak År Søknadssum `Totalt tilskudd`
#> <chr> <chr> <chr> <chr> <int> <chr> <chr>
#> 1 ANNO MUSEUM AS Videreutvikli… Innl… Bevil… 2022 13 225 000 2 850 000
#> 2 ANNO MUSEUM AS Videreutvikli… Innl… Bevil… 2023 13 225 000 2 850 000
#> 3 ANNO MUSEUM AS Videreutvikli… Innl… Bevil… 2024 13 225 000 2 850 000
#> 4 NORSK MARITIM… Digitaliserin… Oslo Bevil… 2021 1 497 600 1 166 000
#> 5 NORSK MARITIM… Digitaliserin… Oslo Bevil… 2022 1 687 600 1 167 000
#> 6 NORSK MARITIM… Digitaliserin… Oslo Bevil… 2023 1 727 600 1 167 000
#> 7 RYFYLKEMUSEET Tradisjonshan… Roga… Bevil… 2022 458 644 400 560
#> 8 RYFYLKEMUSEET Tradisjonshan… Roga… Bevil… 2023 458 644 418 750
#> 9 RYFYLKEMUSEET Tradisjonshan… Roga… Bevil… 2024 458 644 418 979
#> 10 SØRLANDETS KU… Digital samli… Agder Bevil… 2021 537 080 670 000
#> # ℹ 43 more rows
@yjunechoe
Copy link
Author

JS-first imperative approach

library(rvest)
library(chromote)

# Open page in headless chrome
url <- "https://www.kulturdirektoratet.no/web/guest/stotteordning/-/vis/digitalisering-mangfold-dialog-samarbeid/tildelinger"
b <- ChromoteSession$new()
b$view()
b$Page$navigate(url, wait_ = TRUE); Sys.sleep(3)

# Clean up junk first by executing JS
b$Runtime$evaluate('
  document.querySelectorAll("div.dropdown-row, .show-for-small-only")
    .forEach(e => e.remove())
')

# Get table from page
rootnode <- b$DOM$getDocument()$root$nodeId
page <- b$DOM$getOuterHTML(rootnode)$outerHTML %>% 
  read_html()
tbl <- page %>% 
  html_element("#til-table") %>%
  html_table()

tbl

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment