A small script that reads DOIs from a bibtex file, fetches abstracts from Crossref when they are available, and exports another bibtex file with that added info.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install and run packages | |
# install.packages("bib2df") | |
# install.packages("rcrossref") | |
library(bib2df) | |
library(rcrossref) | |
# import the bibtex to a data frame | |
# biblio.bib is a file in the working directory | |
df <- bib2df("biblio.bib") | |
# fetch the bibtex file from url: | |
# url <- "https://gist.githubusercontent.com/zackbatist/46c14011fd5dd4e2763842cd98627927/raw/e8678589cbb9f73ada52e7944bf617e588e1a5fe/GS01ax.bib" | |
# df <- bib2df(url) | |
# loop through many DOIs, allowing for failures | |
x <- lapply(df$DOI, function(z) tryCatch(cr_abstract(z), error = function(e) e)) | |
# write the results to a new field called ABSTRACT as character string | |
df$ABSTRACT <- unlist(x) | |
# clean up the abstract field | |
# add any other regular expressions as you see fit | |
df$ABSTRACT[grepl("HTTP 404", df$ABSTRACT, ignore.case=FALSE)] <- NA | |
df$ABSTRACT[grepl("no abstract found for", df$ABSTRACT, ignore.case=FALSE)] <- NA | |
df$ABSTRACT <- gsub("<p>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("</p>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("<strong>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("</strong>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("<li>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("</li>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("<ul>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("</ul>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("<em>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("</em>", "", df$ABSTRACT) | |
# write to bibtex file | |
# following bibtex formatting rules, if there is no text following the abstract field the field will not be written at all | |
df2bib(df, file = "biblio.bib", append = FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment