Skip to content

Instantly share code, notes, and snippets.

@ntessore
Last active March 27, 2017 08:49
Show Gist options
  • Save ntessore/13d2748b338b4d91287541f09e2d4722 to your computer and use it in GitHub Desktop.
Save ntessore/13d2748b338b4d91287541f09e2d4722 to your computer and use it in GitHub Desktop.
query the ADS bibliography database for a list of bibcodes or the missing citations from a LaTeX log file
#!/usr/bin/env python
####
# small command line tool to get a BibTeX bibliography from ADS bibcodes
#
# $ adsbibq.py '2015A&A...580A..79T' '2016MNRAS.463.3115T'
#
# Query Results from the ADS Database
#
# Retrieved 2 abstracts, starting with number 1. Total number selected: 2.
# ...
#
####
from argparse import ArgumentParser
from re import finditer
from sys import stdin, stdout
from urllib.parse import urlencode
from urllib.request import urlopen, URLError
# argument parsing
parser = ArgumentParser()
parser.add_argument('bibcode', nargs='*', help='query bibcodes')
parser.add_argument('--log', action='store_true', help='parse log file')
args = parser.parse_args()
# make sure at least one way of input is provided
if (not args.bibcode) and (not args.log):
parser.error('either bibcodes or the --log option must be given')
# list of bibcodes to query
bibcodes = []
# read log or use bibcodes
if args.log:
# read the log from stdin and strip newlines (LaTeX warnings can wrap)
log = stdin.read().replace('\n', '').replace('\r', '')
# the natbib warning message to search for undefined citations
pattern = r"Package natbib Warning: Citation `(.*?)' on page \d+ undefined"
# find all warnings in the log and store citekeys as bibcodes
for m in finditer(pattern, log):
bibcodes.append(m.group(1))
else:
# take bibcodes from arguments
bibcodes = args.bibcode
# sort and remove duplicates
bibcodes = sorted(set(bibcodes))
# prepare the ADS query
query = [
('bibcode', bibcodes),
('data_type', 'BIBTEX'),
('db_key', 'AST'),
('nocookieset', '1'),
]
# the base url and data of the request
urlbase = 'http://adsabs.harvard.edu/cgi-bin/nph-bib_query'
urldata = urlencode(query, True).encode('ascii')
# perform request and output the result
try:
f = urlopen(urlbase, urldata)
stdout.write(f.read().decode('utf-8'))
except URLError:
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment