Skip to content

Instantly share code, notes, and snippets.

@mathzero
Last active November 6, 2023 04:58
Show Gist options
  • Save mathzero/0fb271725da8ff54de0508f15eba0c64 to your computer and use it in GitHub Desktop.
Save mathzero/0fb271725da8ff54de0508f15eba0c64 to your computer and use it in GitHub Desktop.
This script runs a literature search on PubMed programatically, then pulls some article metadata on citations and altmetrics
install.packages("pubmedR")
install.packages("rcrossref")
devtools::install_github("ropensci/rAltmetric")
install.packages("tidyverse")
install.packages("janitor")
library(pubmedR)
library(rcrossref)
library(tidyverse)
library(janitor)
library(rAltmetric)
library(curl)
library(readr)
# Conduct pubmed search --------------------------------------------------
# Pubmed API key
#' Instructions get an API key:
#' https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/
#' Register here: https://account.ncbi.nlm.nih.gov/
api_key <- "INSERT_YOUR_API_KEY_HERE"
# Write query
query <- "COVID-19*[Title/Abstract] OR SARS-CoV-2*[Title/Abstract]
AND symptoms*[Title/Abstract] AND persistent*[Title/Abstract] OR
long COVID*[Title/Abstract] OR post-COVID syndrome*[Title/Abstract]
AND english[LA] AND Journal Article[PT] AND 2020[DP]"
# How many docs?
res <- pmQueryTotalCount(query = query, api_key = api_key)
res$total_count
# API request for docs
docs <- pmApiRequest(query = query, limit = res$total_count, api_key = api_key)
# Convert to DF
df <- pmApi2df(docs) %>% janitor::clean_names()
# Create local bibtex file ------------------------------------------------------
# turn dois into urls
urls <- paste0("https://doi.org/",df$di[!is.na(df$di)])
h <- new_handle()
handle_setheaders(h, "accept" = "application/x-bibtex")
# iterate through urls and pull bib info, and append to a local file
walk(urls[1:10], ~ { # only doing the first 10 here, to save time on the demo
curl(., handle = h) %>%
readLines(warn = FALSE) %>%
write(file = "mybibfile.bib", append = TRUE)
})
closeAllConnections()
read_delim("mybibfile.bib", delim = "\n") # this will add break lines to your bib file you created
# Query crossref for citation data ----------------------------------------
# Get citation counts
cites_df=cr_citation_count(df$di,url = "http://www.crossref.org/openurl/",
key = "cboettig@ropensci.org",async = FALSE)
# Join
df <- cbind(df,cites_df)
df <- df %>% rename(citations_count=count)
# Add altmetrics data -----------------------------------------------------
# Function for querying altmetrics API
altmetrics_new <-
function(doi = NULL,
apikey = NULL,
...) {
base_url <- "https://api.altmetric.com/v1/"
args <- list(key = apikey)
request <-
httr::GET(paste0(base_url, "doi/",doi))
if(httr::status_code(request) == 404) {
stop("No metrics found for object")
} else {
httr::warn_for_status(request)
results <-
jsonlite::fromJSON(httr::content(request, as = "text"), flatten = TRUE)
results <- rlist::list.flatten(results)
class(results) <- "altmetric"
results
}
}
### Batch altmetrics query function
alm <- function(x){
out <- suppressWarnings(try(altmetric_data(altmetrics_new(doi = x,
apikey = '37c9ae22b7979124ea650f3412255bf9')),silent = TRUE))
if(class(out) == "try-error"){
return(NULL)
}else{
return(out)
}
}
### Get doi list
dois <- df$di[!is.na(df$di)] %>% as.list()
# pull altmetrics
alt_df <- map_df(dois, alm)
# get rid of all these authors!
nms=alt_df %>% colnames()
nms <- grep("authors",x = nms,value = T)
nms <- nms[3:length(nms)]
alt_df <- alt_df %>% select(-nms)
# join with original data
df <- df %>% left_join(alt_df,by=c("di"="doi"))
# impute the title
df$ti <- stringr::str_to_sentence(df$ti)
df$title[is.na(df$title)] <- df$ti[is.na(df$title)]
# score to numeric
df$score <- as.numeric(df$score)
# Summary top papers ------------------------------------------------------
# Citations top 10
df %>%
arrange(-citations_count) %>%
slice_head(n = 10) %>% select(title,journal,di,authors1,citations_count)
# Altmetrics top 10
df %>%
arrange(-score) %>%
slice_head(n = 10) %>% select(title,journal,di,authors1,score)
@Gabrielle-p
Copy link

Great, thank you @mathzero !

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment