Skip to content

Instantly share code, notes, and snippets.

@jdahm
Created March 19, 2015 01:15
Show Gist options
  • Save jdahm/97246bd39cfbe4ace492 to your computer and use it in GitHub Desktop.
Save jdahm/97246bd39cfbe4ace492 to your computer and use it in GitHub Desktop.
Convert bibtex library to use cite keys
#!/usr/bin/env python
import sys
import os.path as op
import re
import glob
import shutil
CITETYPES = ('@{}{{(?P<{}>[^,]+)'.format(x[0], x[1]) for x in (('Article', 'A'), ('InCollection', 'I'), ('PhdThesis', 'P'), ('Unpublished', 'U'), ('InProceedings', 'IP')))
CITERE = '|'.join(x for x in CITETYPES)
KEYTYPES = ('\s*{}\s*=\s*{{(?P<{}>[^,]+)'.format(x[0], x[1]) for x in (('Title', 'T'), ('Author', 'A'), ('Year', 'Y')))
KEYRE = '|'.join(x for x in KEYTYPES)
def convert_bibfiles(directory, bibfile):
with open(op.join(directory, bibfile), mode='r') as f:
indef = False
for l in f.readlines():
print(indef, l, end='')
m = re.match(CITERE, l)
if m:
kind = m.lastgroup
key = m.group(kind)
indef = True
titlewords = None
authorname = None
year = None
m = re.match('}', l)
if m:
if not indef:
print(titlewords, authorname, year)
raise ValueError('End of key found when not in key')
indef = False
if titlewords is None or authorname is None or year is None:
print(titlewords, authorname, year)
raise ValueError('Did not find all required files')
matches = glob.glob(op.join(directory, authorname, str(year))+'*')
print(matches)
if len(matches) == 0:
print('Could not find matches for {} {}', authorname, year)
continue
if len(matches) == 1:
filename = matches[0]
elif len(matches) > 1:
print('Multiple matches found')
done = False
for w in titlewords:
for c in matches:
if w in c:
filename = c
done = True
break
if done: break
shutil.move(filename, op.join(directory, key)+'.pdf')
if indef:
m = re.match(KEYRE, l)
if m:
kind = m.lastgroup
content = m.group(kind).rstrip('}')
if kind == 'T':
titlewords = content.split()
elif kind == 'A':
while '{' in content:
print(content)
content = re.sub('\{[^}]+\}', '*', content)
print(content)
if '{' in content: content = content.replace('{', '*')
if '}' in content: content = content.replace('}', '*')
authorname = content
elif kind == 'Y':
year = int(content)
if __name__ == '__main__':
convert_bibfiles(sys.argv[1], 'main.bib')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment