Created
June 10, 2020 09:47
-
-
Save leifdenby/a68486a5578c494728af95a6139563b8 to your computer and use it in GitHub Desktop.
Filter bibliography file based on citations used in tex document
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
""" | |
Created a filtered .bib-file from citations used in a specific .tex source file | |
""" | |
import pybtex | |
import pybtex.database | |
import pybtex.errors | |
import re | |
RE_CITATIONS = re.compile(r"\\cite[p|t]?\{(?P<ref>[\w,\s]+)\}") | |
def create_filtered_bibtex_file(fn_bibtex_in, fn_bibtex_out, needed_refs): | |
# just issue warnings with duplicate entries | |
pybtex.errors.strict = False | |
bib_all = pybtex.database.parse_file(fn_bibtex_in) | |
bib_new = pybtex.database.BibliographyData( | |
{k: v for (k, v) in bib_all.entries.items() if k in needed_refs} | |
) | |
missing_refs = [r for r in needed_refs if not r in bib_new.entries.keys()] | |
if len(missing_refs) > 0: | |
print("Missing references:\n{}".format("\n".join(missing_refs))) | |
bib_new.to_file(fn_bibtex_out) | |
def find_citations_in_latex_file(fn): | |
with open(fn) as fh: | |
file_content = fh.read() | |
parsed_refs = RE_CITATIONS.findall(file_content) | |
# regex doesn't split grouped refs, so we do that here | |
all_refs = [] | |
for ref in parsed_refs: | |
if "," in ref: | |
all_refs += [s.strip() for s in ref.split(",")] | |
else: | |
all_refs.append(ref) | |
return sorted(set(all_refs)) | |
def main(): | |
import argparse | |
argparser = argparse.ArgumentParser() | |
argparser.add_argument('tex_document') | |
argparser.add_argument( | |
'--src-bibtex-path', | |
default='$HOME/Documents/library.bib' | |
) | |
argparser.add_argument( | |
'--bibtex-out', default='library.bib' | |
) | |
args = argparser.parse_args() | |
needed_refs = find_citations_in_latex_file(args.tex_document) | |
print("Found {} unique citations in `{}` ({})".format( | |
len(needed_refs), args.tex_document, ", ".join(needed_refs) | |
) | |
) | |
create_filtered_bibtex_file( | |
fn_bibtex_in=args.src_bibtex_path, | |
fn_bibtex_out=args.bibtex_out, | |
needed_refs=needed_refs | |
) | |
print("Wrote new bibtex file to {}".format(args.bibtex_out)) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment