Skip to content

Instantly share code, notes, and snippets.

@jrsmith3
Created May 5, 2013 22:11
Show Gist options
  • Save jrsmith3/5522397 to your computer and use it in GitHub Desktop.
Save jrsmith3/5522397 to your computer and use it in GitHub Desktop.
Combining ideas from [getcites.py](https://gist.github.com/jrsmith3/5519665) and [doi2bib.py](https://gist.github.com/jrsmith3/5513926), generate a bibtex file of papers referenced by DOIs in the body of a LaTeX document.
# -*- coding: utf-8 -*-
import pycurl
import StringIO
from pybtex import auxfile
from pybtex.database.input import bibtex as bibtexin
from pybtex.database.output import bibtex as bibtexout
import pybtex.database
def doi2bib(doi):
"""
Return a bibTeX string of metadata for a given DOI.
"""
# Create an object to write data to
buf = StringIO.StringIO()
# Create the curl object
c = pycurl.Curl()
# Build the URL
url = "http://dx.doi.org/" + doi
# Set up the curl options
c.setopt(pycurl.URL, str(url))
c.setopt(pycurl.HTTPHEADER, ["Accept: application/x-bibtex"])
c.setopt(pycurl.FOLLOWLOCATION, 1)
c.setopt(pycurl.WRITEFUNCTION, buf.write)
c.setopt(c.CONNECTTIMEOUT, 5)
# Execute the curl request
c.perform()
bib = buf.getvalue()
buf.close()
return bib
# I want to get a list of citations from the LaTeX file. Probably the easiest way to do this is to build the LaTeX document then parse the auxfile for the citations. pybtex can do this for me.
aux_data = auxfile.parse_file("paper.aux", None)
cites = []
for cite in aux_data.citations:
bibstr = doi2bib(cite)
cites.append(bibstr)
parser = bibtexin.Parser()
# Parse the results into a bibtex database object.
bib_data = parser.parse_stream(StringIO.StringIO("\n".join(cites)))
# Create empty bibtex database for entries with DOI for keys.
doikey_db = pybtex.database.BibliographyData()
for key in bib_data.entries.keys():
en = bib_data.entries[key]
doikey_db.add_entry(en.fields["doi"], en)
# Write the result to a new bibtex database.
w = bibtexout.Writer()
w.write_file(doikey_db, "bibtex.bib")
@jrsmith3
Copy link
Author

jrsmith3 commented May 5, 2013

One problem with this is that pycurl can't deal with unicode.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment