Skip to content

Instantly share code, notes, and snippets.

@oiao
Last active October 27, 2021 12:19
Show Gist options
  • Save oiao/5753aaac1540e9e6a793d080895877ac to your computer and use it in GitHub Desktop.
Save oiao/5753aaac1540e9e6a793d080895877ac to your computer and use it in GitHub Desktop.
Convert a list of DOIs to BibTex strings
#!/usr/bin/env python3
def get_json(doi:str) -> dict:
# get citation dict from doi string
import re
import urllib.request
from urllib.error import HTTPError
import json
from pprint import pprint
for match in re.findall('doi.org/(.*)', doi):
match = match
break
else:
match = doi
req = urllib.request.Request('https://dx.doi.org/' + match)
# see https://citation.crosscite.org/docs.html
req.add_header('Accept', 'application/vnd.citationstyles.csl+json') # JSON
# req.add_header('Accept', 'application/x-bibtex') # BIBTEX
try:
with urllib.request.urlopen(req) as f:
d = f.read().decode()
except HTTPError as e:
print(f"HTTP Error for {doi}: {e.code}")
exit(1)
except Error as e:
raise
return json.loads(d)
def bibify(d:dict, to_str=True, abbreviate_journal=True, titlecase=True) -> dict:
# filter the output of get_json() to bibtex format
bibtex = {}
bibtex['ENTRYTYPE'] = 'article' if 'article' in d['type'] else d['title']
bibtex['title'] = d['title']
if titlecase:
bibtex['title'] = bibtex['title'].title()
bibtex['author'] = ' and '.join( f"{dd['family']}, {dd['given']}" for dd in d['author'] )
if abbreviate_journal and 'container-title-short' in d:
bibtex['journal'] = ' '.join(i+'.' if not i.endswith('.') else i for i in d['container-title-short'].split())
else:
bibtex['journal'] = d['container-title']
if 'page' in d:
bibtex['pages'] = '--'.join(d['page'].split('-'))
if 'number' in d:
bibtex['issue'] = d['number']
y, *md = d['published']['date-parts'][0]
bibtex['year'] = y
if md:
bibtex['month'] = md[0]
for k in 'doi', 'url', 'volume', 'publisher':
if k in d:
bibtex[k] = d[k]
bibtex['ID'] = d['author'][0]['family'] + f"{y}"
if to_str:
for k,v in bibtex.items():
bibtex[k] = str(v)
return bibtex
def dump(d:dict) -> str:
# dump the output of `bibify()` to string
out = f"@{d['ENTRYTYPE']}" + '{' + f"{d['ID']},\n"
for k,v in d.items():
if k not in {'ENTRYTYPE', 'ID'}:
out += f" {k} = " + '{' + str(v) + '},\n'
out += '}\n\n'
return out
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='DOI to BibTex converter')
parser.add_argument('DOIs', nargs='+', help='DOI strings to be converted to BibTex')
parser.add_argument('-t', '--titlecase', action='store_true', help='Use title case for titles')
parser.add_argument('-a', '--abbreviate', action='store_true', help='Abbreviate journal names')
args = parser.parse_args()
try:
from bibtexparser.bibdatabase import BibDatabase
from bibtexparser import dumps as _dumps
bibparser = True
_db = BibDatabase()
db = _db.entries
dumps = lambda db: _dumps(_db)
# p = bibtexparser.bparser.BibTexParser(common_strings=True) # For month names.
except ImportError:
bibparser = False
dumps = lambda db: ''.join(dump(i) for i in db)
db = []
for doi in args.DOIs:
d = get_json(doi)
d = bibify(d, abbreviate_journal=args.abbreviate, titlecase=args.titlecase)
db.append(d)
print()
print(dumps(db))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment