Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save Tadge-Analytics/f23e5fe7512e5e30f38d3d72a8fbc9f9 to your computer and use it in GitHub Desktop.
Save Tadge-Analytics/f23e5fe7512e5e30f38d3d72a8fbc9f9 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(rvest)
google_scholar_urls <- tibble::tribble(
~name, ~url_path,
"Tim Capon", "https://scholar.google.com/citations?user=mJeSBzAAAAAJ&hl=en",
"Céline Nauges", "https://scholar.google.com/citations?hl=en&user=ehxgHeQAAAAJ",
"Stéphane De Cara", "https://scholar.google.com/citations?hl=en&user=OjOkZwEAAAAJ",
"Israel Finkelshtain", "https://scholar.google.com/citations?hl=en&user=ZBlt4VgAAAAJ",
"Macario Rodríguez-Entrena", "https://scholar.google.com/citations?hl=en&user=pkagohUAAAAJ",
"Carmen Almansa Sáez", "https://scholar.google.com/citations?hl=en&user=2L3U3doAAAAJ"
)
###################################################################
# test the process with an individual url
url <- google_scholar_urls %>% slice(1) %>% pull(url_path)
download <- read_html(url) %>%
html_nodes(".gsc_rsb_std")
download %>%
html_text() %>%
matrix(nrow = 3, byrow = TRUE) %>%
as_tibble() %>%
rename(All = 1,
`Since 2015` = 2) %>%
add_column(stat_name = c("Citations", "h-index", "i10-index"), .before = 1)
###################################################################
# run the process over all urls, with map
all_download <- google_scholar_urls %>%
mutate(stat_table = map(url_path, ~.x %>%
read_html() %>%
html_nodes(".gsc_rsb_std") %>%
html_text() %>%
matrix(nrow = 3, byrow = TRUE) %>%
as_tibble() %>%
rename(All = 1,
`Since 2015` = 2) %>%
add_column(stat_name = c("Citations", "h-index", "i10-index"), .before = 1))) %>%
unnest(stat_table) %>%
print()
all_download %>%
openxlsx::write.xlsx(paste0("ggle_schlr_scp_", lubridate::today(), ".xlsx"), asTable = T)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment