A small script that reads DOIs from a bibtex file, fetches abstracts from Crossref when they are available, and exports another bibtex file with that added info.
# install and run packages | |
# install.packages("bib2df") | |
# install.packages("rcrossref") | |
library(bib2df) | |
library(rcrossref) | |
# import the bibtex to a data frame | |
# biblio.bib is a file in the working directory | |
df <- bib2df("biblio.bib") | |
# fetch the bibtex file from url: | |
# url <- "https://gist.githubusercontent.com/zackbatist/46c14011fd5dd4e2763842cd98627927/raw/e8678589cbb9f73ada52e7944bf617e588e1a5fe/GS01ax.bib" | |
# df <- bib2df(url) | |
# loop through many DOIs, allowing for failures | |
x <- lapply(df$DOI, function(z) tryCatch(cr_abstract(z), error = function(e) e)) | |
# write the results to a new field called ABSTRACT as character string | |
df$ABSTRACT <- unlist(x) | |
# clean up the abstract field | |
# add any other regular expressions as you see fit | |
df$ABSTRACT[grepl("HTTP 404", df$ABSTRACT, ignore.case=FALSE)] <- NA | |
df$ABSTRACT[grepl("no abstract found for", df$ABSTRACT, ignore.case=FALSE)] <- NA | |
df$ABSTRACT <- gsub("<p>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("</p>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("<strong>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("</strong>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("<li>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("</li>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("<ul>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("</ul>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("<em>", "", df$ABSTRACT) | |
df$ABSTRACT <- gsub("</em>", "", df$ABSTRACT) | |
# write to bibtex file | |
# following bibtex formatting rules, if there is no text following the abstract field the field will not be written at all | |
df2bib(df, file = "biblio.bib", append = FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment