Skip to content

Instantly share code, notes, and snippets.

@mbollmann
Created November 17, 2016 11:10
Show Gist options
  • Save mbollmann/54110b5409f956727f7f96a459d8c869 to your computer and use it in GitHub Desktop.
Save mbollmann/54110b5409f956727f7f96a459d8c869 to your computer and use it in GitHub Desktop.
Collecting stats about paper titles per year in a .bib file
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import bibtexparser
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
import sys
keywords = ["neural", "deep learning", "lstm", "rnn", "long short", "cnn"]
def main(args):
sys.stderr.write("### Loading BibTeX file... ")
sys.stderr.flush()
bib = bibtexparser.load(args.bibfile)
sys.stderr.write("done!\n")
per_year_match = Counter()
per_year_total = Counter()
skipped = Counter()
for entry in bib.entries:
if 'author' not in entry:
# probably a full proceedings entry
skipped['no author'] += 1
continue
if 'title' not in entry or 'year' not in entry:
# probably a parse error...
skipped['no title/year'] += 1
continue
year = entry['year']
title = entry['title']
if "Proceedings" in title:
if 'chapter' in entry:
title = entry['chapter']
else:
# probably a full proceedings entry
skipped['proceedings in title'] += 1
continue
per_year_total[year] += 1
title = title.lower()
if any(kw in title for kw in keywords):
per_year_match[year] += 1
sys.stderr.write("### Skipped {} items:\n".format(sum(skipped.values())))
for reason, count in skipped.items():
sys.stderr.write(" {0:4d} = {1}\n".format(count, reason))
years = sorted(per_year_total.keys())
matches = []
for year in years:
match = per_year_match[year]
total = per_year_total[year]
percentage = match / total * 100
print("{0}: {1:3n}/{2:4n} = {3:2.2f}%".format(year, match, total, percentage))
matches.append(match)
if args.plot is not False:
if args.fromyear:
idx = years.index(args.fromyear)
years, matches = years[idx:], matches[idx:]
fig, ax = plt.subplots(figsize=(16,9))
sns.barplot(years, matches, palette=sns.xkcd_palette(("deep sea blue",)), ax=ax)
if args.plot is None:
plt.show()
else:
fig.savefig(args.plot, dpi=700)
if __name__ == "__main__":
description = ""
epilog = ""
parser = argparse.ArgumentParser(description=description, epilog=epilog)
parser.add_argument('bibfile',
metavar='BIBFILE',
type=argparse.FileType('r', encoding="UTF-8"),
default=sys.stdin,
help='BibTeX file to analyze (default: STDIN)')
parser.add_argument('-p', '--plot',
nargs='?',
type=str,
default=False,
help='Plot the output (default: %(default)b)')
parser.add_argument('-f', '--fromyear',
metavar='YEAR',
type=str,
default=None,
help='Plot should start from year (default: %(default)s)')
args = parser.parse_args()
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment