Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# load libraries -------------------------------------------------------
library(psjournals)
library(tidyverse)
library(rvest)
library(robotstxt)
# get the journals data from the psjournals package ---------------------
tf_journals <- psjournals %>%
select(journal, publisher, url, h5_index) %>%
filter(publisher == "Taylor & Francis") %>%
distinct() %>%
mutate(id = str_split(url, fixed("/"), simplify = TRUE)[ , 5]) %>%
arrange(desc(h5_index))
# check if scraping metrics is allowed [it is] -------------------------
robotstxt(domain = "https://www.tandfonline.com")$permissions
# scrape related pages for metrics --------------------------------------
temp_list <- list()
for(i in 1:nrow(tf_journals)){
metrics_url = paste0("https://www.tandfonline.com/action/journalInformation?show=journalMetrics&journalCode=",
tf_journals$id[i])
html <- read_html(metrics_url)
try(foo <- tibble(Journal = tf_journals$journal[i],
url = tf_journals$url[i],
metrics = html %>%
html_elements(".speed li") %>%
html_text()),
silent = TRUE)
temp_list[[i]] <- foo
}
# create the metrics dataframe ------------------------------------------
tf_metrics <- as_tibble(do.call(rbind, temp_list)) %>%
mutate(Journal = paste0("<a href='", url, "'>", journal, "</a>"),
statistic = word(metrics, 1),
type = case_when(str_detect(metrics, "acceptance rate") ~ "Acceptance Rate",
str_detect(metrics, "first decision") ~ "First Decision",
str_detect(metrics, "post-review") ~ "First Review",
str_detect(metrics, "online publication") ~ "Online Publication")) %>%
pivot_wider(id_cols = "journal",
names_from = "type", values_from = "statistic")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment