Skip to content

Instantly share code, notes, and snippets.

@lucainnocenti
Last active December 13, 2023 06:17
Show Gist options
  • Save lucainnocenti/bbfee069c5bc87c08a308753513acb96 to your computer and use it in GitHub Desktop.
Save lucainnocenti/bbfee069c5bc87c08a308753513acb96 to your computer and use it in GitHub Desktop.
Python script to sort the bibitem entries according to the citation order in the text. Useful when one has for some reason to not use bibtex and .bib files. Usage is as `python bibitem-sorter.py file.tex sorted_file.tex`
import argparse
import os
import sys
import re
def printBold(msg):
HEADER = '\033[95m'
BOLD = '\033[1m'
print(HEADER + BOLD + msg + '\x1b[0m')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('path', type=str)
parser.add_argument('output_path', type=str)
args = parser.parse_args()
# read from file
with open(args.path, 'r') as fp:
text = fp.read()
# backslashes are fucking annoying
text = re.sub('\\\\', '\\\\\\\\', text)
# extract text inside thebibliography environment
matcher = re.compile(r'\\\\begin{thebibliography}.*\n'
r'([\s\S]*)'
r'\\\\end{thebibliography}')
bibitems_text = matcher.findall(text)[0]
matcher = re.compile(r'\\\\cite{([^}]+)}')
cites = []
for match in matcher.findall(text):
if match in cites:
continue
if ',' in match:
matches = [x.strip() for x in match.split(',')]
for smatch in matches:
if smatch not in cites:
cites.append(smatch)
continue
cites.append(match)
bibitems = []
matcher = re.compile(r'\\\\bibitem{([^}]+)}\s*((?:.*\n)*?)(?=\n)')
for match in matcher.findall(bibitems_text):
bibitems.append(match)
# check that everything has a reference and that every reference has been
# cited
bibitems_refs = [item[0] for item in bibitems]
unused_bibitems = []
for cite in cites:
if cite not in bibitems_refs:
raise ValueError('Citing missing reference: {}.'.format(cite))
for bibitem in bibitems:
if bibitem[0] not in cites:
# raise ValueError('Uncited reference: {}.'.format(bibitem[0]))
printBold('WARNING: Uncited reference: {}.'.format(bibitem[0]))
unused_bibitems.append(bibitem)
for unused_bibitem in unused_bibitems:
del bibitems_refs[bibitems.index(unused_bibitem)]
del bibitems[bibitems.index(unused_bibitem)]
# check if already sorted
same = cites == [bibitem[0] for bibitem in bibitems]
if same:
printBold('References are sorted')
else:
printBold('References are not sorted, proceeding to sort them.')
# sort references
sorted_bibitems = sorted(bibitems, key=lambda x: cites.index(x[0]))
sorted_bibitems += unused_bibitems
output_bibitem_text = r'\n\n'
for bibitem in sorted_bibitems:
output_bibitem_text += r'\\\\bibitem{{{0}}}\n{1}\n'.format(
bibitem[0], bibitem[1])
output_bibitem_text += r'\n'
# do the replacement
output_text = re.sub(
(r'(?<=\\\\begin{thebibliography})'
r'(\S*)[\s\S]*'
r'(?=\\\\end{thebibliography})'),
r'\1\n' + output_bibitem_text,
text
)
# convert back @ to \
output_text = re.sub('\\\\\\\\', '\\\\', output_text)
# save result
printBold('Done. Saving results to {}.'.format(args.output_path))
with open(args.output_path, 'w') as fp:
fp.write(output_text)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment