Created
September 19, 2018 18:27
-
-
Save zackbatist/bfeaa66b64c7afe749a7f5c6f9e596c2 to your computer and use it in GitHub Desktop.
A small script that reads DOIs from a bibtex file, fetches abstracts from Crossref when they are available, and exports another bibtex file with that added info.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install and run packages | |
# install.packages("bib2df") | |
# install.packages("rcrossref") | |
library(bib2df) | |
library(rcrossref) | |
# import the bibtex to a data frame | |
# biblio.bib is a file in the working directory | |
df <- bib2df("biblio.bib") | |
# fetch the bibtex file from url: | |
# url <- "https://gist.githubusercontent.com/zackbatist/46c14011fd5dd4e2763842cd98627927/raw/e8678589cbb9f73ada52e7944bf617e588e1a5fe/GS01ax.bib" | |
# df <- bib2df(url) | |
# loop through many DOIs, allowing for failures | |
x <- lapply(df$DOI, function(z) tryCatch(cr_abstract(z), error = function(e) e)) | |
# write the results to a new field called ABSTRACT as character string | |
df$ABSTRACT <- unlist(x) | |
# clean up the abstract field | |
# add any other regular expressions as you see fit | |
df$ABSTRACT[grepl("HTTP 404", df$ABSTRACT, ignore.case=FALSE)] <- NA | |
df$ABSTRACT[grepl("no abstract found for", df$ABSTRACT, ignore.case=FALSE)] <- NA | |
df$ABSTRACT <- gsub("<p>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("</p>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("<strong>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("</strong>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("<li>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("</li>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("<ul>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("</ul>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("<em>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("</em>", "", df$ABSTRACT) | |
# write to bibtex file | |
# following bibtex formatting rules, if there is no text following the abstract field the field will not be written at all | |
df2bib(df, file = "biblio.bib", append = FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment