Skip to content

Instantly share code, notes, and snippets.

@AyeGill
Last active August 15, 2019 17:46
Show Gist options
  • Save AyeGill/421785edbe44c9d67a4da15d785d914c to your computer and use it in GitHub Desktop.
Save AyeGill/421785edbe44c9d67a4da15d785d914c to your computer and use it in GitHub Desktop.
Python script to pull a bibtex reference from arxiv.
#!/usr/bin/python3
import arxiv
import sys
import PyPDF4
import re
## Usage: arxiref.py 1711.07059
## or: arxiref.py paper.pdf
## If given something that looks like an arXiv id, searches for something with that name
## If given a filename, treats it as a pdf, looks for an arXiv id on page 1, then proceeds as in first case
## The produced bibtex code has "**" where you need to choose an id for the reference.
regex = "arXiv:([0-9]+\.[0-9]+)"
p = re.compile(regex)
def check_id(arg):
"""Test if input is an arXiv id"""
dot = "." in arg
numv = True
for char in arg:
if not(char in "1234567890v."):
numv = False
return dot and numv
def print_ref(arxiv_id):
paper = arxiv.query(id_list=[arxiv_id])[0]
print('''@Article{**
Title = {%s},
Author = {%s},
Year = {%s},
archivePrefix = "arXiv",
eprint = {%s}
}''' % (paper['title']," and ".join(paper['authors']),paper['published'][:4],arxiv_id))
def get_id_file(filename):
with open(filename, 'rb') as f:
pdf = PyPDF4.PdfFileReader(f)
text = pdf.getPage(0).extractText()
m = p.search(text)
return m.group(0)[6:]
argument = sys.argv[1]
if check_id(argument):
print_ref(argument)
else: #argument is a filename
print_ref(get_id_file(argument))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment