library(tidyverse)
library(rvest)
#>
#> Attaching package: 'rvest'
#> The following object is masked from 'package:readr':
#>
#> guess_encoding
library(polite)
url <- "https://en.wikipedia.org/wiki/All-time_Olympic_Games_medal_table"
bow_scrape <- function(x){
bow(x) %>%
scrape()
}
wiki_raw <- bow_scrape(url)
all_tables <- html_table(wiki_raw)
#> Warning in table_fill(cells, trim = trim): NAs introduced by coercion
#> Warning in table_fill(cells, trim = trim): NAs introduced by coercion
all_tables[[1]]
#> # A tibble: 6 × 1
#> `Olympic Games`
#> <chr>
#> 1 ""
#> 2 "Main topics"
#> 3 "Bids\nBoycotts\nCeremonies\nCharter\nHost cities\nIFs\nIOC\nMedal\nMedal tab…
#> 4 "Games"
#> 5 "Summer\nWinterYouthAfrican\nAsian\nEuropean\nPacific\nPan-AmericanAncient\nI…
#> 6 ".mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.m…
which_table <- function(table, which){
table[[which]]
}
which_table(all_tables, 3)
#> # A tibble: 76 × 4
#> `Team (IOC code)` `No. Summer` `No. Winter` `No. Games`
#> <chr> <int> <int> <int>
#> 1 Albania (ALB) 9 5 14
#> 2 American Samoa (ASA) 9 2 11
#> 3 Andorra (AND) 12 13 25
#> 4 Angola (ANG) 10 0 10
#> 5 Antigua and Barbuda (ANT) 11 0 11
#> 6 Aruba (ARU) 9 0 9
#> 7 Bangladesh (BAN) 10 0 10
#> 8 Belize (BIZ) [BIZ] 13 0 13
#> 9 Benin (BEN) [BEN] 12 0 12
#> 10 Bhutan (BHU) 10 0 10
#> # … with 66 more rows
#> # ℹ Use `print(n = ...)` to see more rows
get_brackets <- function(x){
stringr::str_extract(x, "\\((.*?)\\)")
}
remove_brackets <- function(x){
stringr::str_remove_all(x, "\\(|\\)")
}
get_brackets("Albania (ALB)") %>%
remove_brackets()
#> [1] "ALB"
which_table(all_tables,3) %>%
pull(1) %>%
get_brackets() %>%
remove_brackets()
#> [1] "ALB" "ASA" "AND" "ANG" "ANT" "ARU" "BAN" "BIZ" "BEN" "BHU" "BOL" "BIH"
#> [13] "IVB" "BRU" "CAM" "CPV" "CAY" "CAF" "CHA" "COM" "CGO" "COD" "COK" "DMA"
#> [25] "ESA" "SWZ" "GEQ" "GAM" "GUM" "GUI" "GBS" "HON" "KIR" "LAO" "LES" "LBR"
#> [37] "LBA" "MAD" "MAW" "MDV" "MLI" "MLT" "MHL" "MTN" "FSM" "MON" "MYA" "NRU"
#> [49] "NEP" "NCA" "OMA" "PLW" "PLE" "PNG" "RWA" "SKN" "LCA" "VIN" "STP" "SEY"
#> [61] "SLE" "SOL" "SOM" "SSD" "TLS" "TUV" "VAN" "YEM" "COR" "MAL" "NBO" "ROC"
#> [73] "SAA" "YAR" "YMD" "ROT"
Created on 2022-09-01 by the reprex package (v2.0.1)
Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.2.0 (2022-04-22)
#> os macOS Monterey 12.3.1
#> system aarch64, darwin20
#> ui X11
#> language (EN)
#> collate en_AU.UTF-8
#> ctype en_AU.UTF-8
#> tz Australia/Perth
#> date 2022-09-01
#> pandoc 2.18 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.2.0)
#> backports 1.4.1 2021-12-13 [1] CRAN (R 4.2.0)
#> broom 1.0.0 2022-07-01 [1] CRAN (R 4.2.0)
#> cachem 1.0.6 2021-08-19 [1] CRAN (R 4.2.0)
#> cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.2.0)
#> cli 3.3.0.9000 2022-06-15 [1] Github (r-lib/cli@31a5db5)
#> colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.2.0)
#> crayon 1.5.1 2022-03-26 [1] CRAN (R 4.2.0)
#> curl 4.3.2 2021-06-23 [1] CRAN (R 4.2.0)
#> DBI 1.1.3 2022-06-18 [1] CRAN (R 4.2.0)
#> dbplyr 2.2.1 2022-06-27 [1] CRAN (R 4.2.0)
#> digest 0.6.29 2021-12-01 [1] CRAN (R 4.2.0)
#> dplyr * 1.0.9 2022-04-28 [1] CRAN (R 4.2.0)
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.0)
#> evaluate 0.16 2022-08-09 [1] CRAN (R 4.2.0)
#> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.2.0)
#> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.0)
#> forcats * 0.5.1 2021-01-27 [1] CRAN (R 4.2.0)
#> fs 1.5.2 2021-12-08 [1] CRAN (R 4.2.0)
#> gargle 1.2.0 2021-07-02 [1] CRAN (R 4.2.0)
#> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.0)
#> ggplot2 * 3.3.6 2022-05-03 [1] CRAN (R 4.2.0)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0)
#> googledrive 2.0.0 2021-07-08 [1] CRAN (R 4.2.0)
#> googlesheets4 1.0.1 2022-08-13 [1] CRAN (R 4.2.0)
#> gtable 0.3.0 2019-03-25 [1] CRAN (R 4.2.0)
#> haven 2.5.0 2022-04-15 [1] CRAN (R 4.2.0)
#> highr 0.9 2021-04-16 [1] CRAN (R 4.2.0)
#> hms 1.1.1 2021-09-26 [1] CRAN (R 4.2.0)
#> htmltools 0.5.3 2022-07-18 [1] CRAN (R 4.2.0)
#> httr 1.4.3 2022-05-04 [1] CRAN (R 4.2.0)
#> jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.2.0)
#> knitr 1.39 2022-04-26 [1] CRAN (R 4.2.0)
#> lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.2.0)
#> lubridate 1.8.0 2021-10-07 [1] CRAN (R 4.2.0)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0)
#> memoise 2.0.1 2021-11-26 [1] CRAN (R 4.2.0)
#> mime 0.12 2021-09-28 [1] CRAN (R 4.2.0)
#> modelr 0.1.8 2020-05-19 [1] CRAN (R 4.2.0)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.0)
#> pillar 1.8.0 2022-07-18 [1] CRAN (R 4.2.0)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0)
#> polite * 0.1.2 2022-08-09 [1] CRAN (R 4.2.0)
#> purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.2.0)
#> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.2.0)
#> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.2.0)
#> R.oo 1.25.0 2022-06-12 [1] CRAN (R 4.2.0)
#> R.utils 2.12.0 2022-06-28 [1] CRAN (R 4.2.0)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0)
#> ratelimitr 0.4.1 2018-10-07 [1] CRAN (R 4.2.0)
#> Rcpp 1.0.9 2022-07-08 [1] CRAN (R 4.2.0)
#> readr * 2.1.2 2022-01-30 [1] CRAN (R 4.2.0)
#> readxl 1.4.0 2022-03-28 [1] CRAN (R 4.2.0)
#> reprex 2.0.1 2021-08-05 [1] CRAN (R 4.2.0)
#> rlang 1.0.4 2022-07-12 [1] CRAN (R 4.2.0)
#> rmarkdown 2.14 2022-04-25 [1] CRAN (R 4.2.0)
#> robotstxt 0.7.13 2020-09-03 [1] CRAN (R 4.2.0)
#> rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.2.0)
#> rvest * 1.0.2 2021-10-16 [1] CRAN (R 4.2.0)
#> scales 1.2.0 2022-04-13 [1] CRAN (R 4.2.0)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.0)
#> spiderbar 0.2.4 2021-05-16 [1] CRAN (R 4.2.0)
#> stringi 1.7.8 2022-07-11 [1] CRAN (R 4.2.0)
#> stringr * 1.4.0 2019-02-10 [1] CRAN (R 4.2.0)
#> styler 1.7.0 2022-03-13 [1] CRAN (R 4.2.0)
#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0)
#> tidyr * 1.2.0 2022-02-01 [1] CRAN (R 4.2.0)
#> tidyselect 1.1.2 2022-02-21 [1] CRAN (R 4.2.0)
#> tidyverse * 1.3.2 2022-07-18 [1] CRAN (R 4.2.0)
#> tzdb 0.3.0 2022-03-28 [1] CRAN (R 4.2.0)
#> usethis 2.1.6 2022-05-25 [1] CRAN (R 4.2.0)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.0)
#> vctrs 0.4.1 2022-04-13 [1] CRAN (R 4.2.0)
#> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0)
#> xfun 0.32.1 2022-08-11 [1] https://yihui.r-universe.dev (R 4.2.0)
#> xml2 1.3.3 2021-11-30 [1] CRAN (R 4.2.0)
#> yaml 2.3.5 2022-02-21 [1] CRAN (R 4.2.0)
#>
#> [1] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library
#>
#> ──────────────────────────────────────────────────────────────────────────────