Skip to content

Instantly share code, notes, and snippets.

@leifdenby
Created June 10, 2020 09:47
Show Gist options
  • Save leifdenby/a68486a5578c494728af95a6139563b8 to your computer and use it in GitHub Desktop.
Save leifdenby/a68486a5578c494728af95a6139563b8 to your computer and use it in GitHub Desktop.
Filter bibliography file based on citations used in tex document
# coding: utf-8
"""
Created a filtered .bib-file from citations used in a specific .tex source file
"""
import pybtex
import pybtex.database
import pybtex.errors
import re
RE_CITATIONS = re.compile(r"\\cite[p|t]?\{(?P<ref>[\w,\s]+)\}")
def create_filtered_bibtex_file(fn_bibtex_in, fn_bibtex_out, needed_refs):
# just issue warnings with duplicate entries
pybtex.errors.strict = False
bib_all = pybtex.database.parse_file(fn_bibtex_in)
bib_new = pybtex.database.BibliographyData(
{k: v for (k, v) in bib_all.entries.items() if k in needed_refs}
)
missing_refs = [r for r in needed_refs if not r in bib_new.entries.keys()]
if len(missing_refs) > 0:
print("Missing references:\n{}".format("\n".join(missing_refs)))
bib_new.to_file(fn_bibtex_out)
def find_citations_in_latex_file(fn):
with open(fn) as fh:
file_content = fh.read()
parsed_refs = RE_CITATIONS.findall(file_content)
# regex doesn't split grouped refs, so we do that here
all_refs = []
for ref in parsed_refs:
if "," in ref:
all_refs += [s.strip() for s in ref.split(",")]
else:
all_refs.append(ref)
return sorted(set(all_refs))
def main():
import argparse
argparser = argparse.ArgumentParser()
argparser.add_argument('tex_document')
argparser.add_argument(
'--src-bibtex-path',
default='$HOME/Documents/library.bib'
)
argparser.add_argument(
'--bibtex-out', default='library.bib'
)
args = argparser.parse_args()
needed_refs = find_citations_in_latex_file(args.tex_document)
print("Found {} unique citations in `{}` ({})".format(
len(needed_refs), args.tex_document, ", ".join(needed_refs)
)
)
create_filtered_bibtex_file(
fn_bibtex_in=args.src_bibtex_path,
fn_bibtex_out=args.bibtex_out,
needed_refs=needed_refs
)
print("Wrote new bibtex file to {}".format(args.bibtex_out))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment