Instantly share code, notes, and snippets.

Embed
What would you like to do?
NEJM and pre-print policy
## Code for the blog post here:
## https://mathewkiang.com/2017/10/08/using-r-wikipedia-sherparomeo-show-new-england-journal-medicines-pre-print-statement-empirically-false
## You can download the original files from the blogpost.
## If downloading new files, you probably want to change the date in the file names.
## Imports
library(rvest)
library(tidyverse)
## Wiki data
if (!file.exists("./list_of_med_journals_20170906.RDS")) {
list_url <- "https://en.wikipedia.org/wiki/List_of_medical_journals"
list_df <- list_url %>%
read_html() %>%
html_nodes("table") %>%
html_table(fill = TRUE) %>%
.[[1]]
saveRDS(list_df, file = "./list_of_med_journals_20170906.RDS")
} else {
list_df <- readRDS("./list_of_med_journals_20170906.RDS")
}
## RoMEO data
if (!file.exists("./romeo_colors_20170906.RDS")) {
## Make new columns (store ISSN for verification in future)
romeo_df <- list_df %>%
mutate(romeo = NA,
issn = NA,
api_outcome = NA)
for (i in seq_along(romeo_df$Name)) {
print(romeo_df$Name[i])
api_url <- "http://www.sherpa.ac.uk/romeo/api29.php?jtitle="
api_req <- gsub(" ", "%20", sprintf("%s%s", api_url, romeo_df$Name[i]))
request <- read_xml(api_req)
temp_issn <- request %>%
xml_node("issn") %>%
xml_text()
temp_color <- request %>%
xml_node("romeocolour") %>%
xml_text()
romeo_df$issn[i] <- ifelse(length(temp_issn > 0), temp_issn, NA)
romeo_df$romeo[i] <- ifelse(length(temp_color > 0), temp_color, NA)
romeo_df$api_outcome <- request %>% xml_node("outcome") %>% xml_text()
}
saveRDS(romeo_df, file = "./romeo_colors_20170906.RDS")
} else {
romeo_df <- readRDS("./romeo_colors_20170906.RDS")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment