shyuep/collaborators_list.py

## collaborators_list.py
#!/usr/bin/env python

"""
Script for generating collaborators in a csv file from a bibtex, for easy
insertion into the new NSF format.
"""

from __future__ import division

import datetime
import os
import collections
import csv

from pybtex.database import parse_file, BibliographyData


greek_alphabet = {
    u'\u0391': 'Alpha',
    u'\u0392': 'Beta',
    u'\u0393': 'Gamma',
    u'\u0394': 'Delta',
    u'\u0395': 'Epsilon',
    u'\u0396': 'Zeta',
    u'\u0397': 'Eta',
    u'\u0398': 'Theta',
    u'\u0399': 'Iota',
    u'\u039A': 'Kappa',
    u'\u039B': 'Lamda',
    u'\u039C': 'Mu',
    u'\u039D': 'Nu',
    u'\u039E': 'Xi',
    u'\u039F': 'Omicron',
    u'\u03A0': 'Pi',
    u'\u03A1': 'Rho',
    u'\u03A3': 'Sigma',
    u'\u03A4': 'Tau',
    u'\u03A5': 'Upsilon',
    u'\u03A6': 'Phi',
    u'\u03A7': 'Chi',
    u'\u03A8': 'Psi',
    u'\u03A9': 'Omega',
    u'\u03B1': 'alpha',
    u'\u03B2': 'beta',
    u'\u03B3': 'gamma',
    u'\u03B4': 'delta',
    u'\u03B5': 'epsilon',
    u'\u03B6': 'zeta',
    u'\u03B7': 'eta',
    u'\u03B8': 'theta',
    u'\u03B9': 'iota',
    u'\u03BA': 'kappa',
    u'\u03BB': 'lamda',
    u'\u03BC': 'mu',
    u'\u03BD': 'nu',
    u'\u03BE': 'xi',
    u'\u03BF': 'omicron',
    u'\u03C0': 'pi',
    u'\u03C1': 'rho',
    u'\u03C3': 'sigma',
    u'\u03C4': 'tau',
    u'\u03C5': 'upsilon',
    u'\u03C6': 'phi',
    u'\u03C7': 'chi',
    u'\u03C8': 'psi',
    u'\u03C9': 'omega',
}
greek_alphabet = {v: k for k, v in greek_alphabet.items()}


def process_unicode(s):
    m = re.search(r"\$\\(\w+)\$", s)
    if m:
        replacement = greek_alphabet[m.group(1)]
        s = re.sub(r"\$\\(\w+)\$", replacement, s)
    return s


def generate_csv(args):
    db = parse_file(args.bib)
    collaborators = set()
    for k, v in db.entries.items():
        if v.type == "article":
            year = int(v.fields["year"])
            if int(year) >= args.year:
                for p in v.persons["author"]:
                    first_middle = p.first_names + p.middle_names
                    collaborators.add((" ".join(first_middle), " ".join(p.last_names)))
    collaborators = sorted(collaborators, key=lambda c: (c[1], c[0]))

    collaborators = [{"first_name": c[0], "last_name": c[1]} for c in collaborators]

    unfound = []
    if args.input_csv:
        with open(args.input_csv, 'rt') as f:
            for row in csv.reader(f):
                if row[0] == "First Name":
                    continue
                first_name, last_name, affiliation = row
                found = False
                for c in collaborators:
                    if (c["first_name"].lower() in first_name.lower()) and (c["last_name"].lower() in last_name.lower()):
                        c["first_name"] = first_name
                        c["last_name"] = last_name
                        c["affiliation"] = affiliation
                        found = True
                        break
                if not found:
                    unfound.append(row)

    unique = []
    done = []
    for c in collaborators:
        full_name = "%s %s" % (c["first_name"].lower(), c["last_name"].lower())
        if full_name not in done:
            unique.append(c)
            done.append(full_name)
    collaborators = unique

    with open("collaborators_%s.csv" % datetime.datetime.now().date(), "wt") as f:
        writer = csv.writer(f)
        writer.writerow(["First Name", "Last Name", "Affiliation"])
        for c in collaborators:
            writer.writerow([c["first_name"], c["last_name"], c.get("affiliation", "")])
        if unfound:
            print("Old collaborators")
            print("-----------------")
            for c in unfound:
                print(", ".join(c))


def csv2str(args):
    home_institution = args.excluded_institution or "-"
    with open(args.csv, "rt") as f:
        output = []
        for row in csv.reader(f):
            if row[0] == "First Name":
                continue
            if home_institution not in row[-1]:
                output.append("%s %s, %s" % (row[0], row[1], row[2]))
        print("; ".join(output))


if __name__ == "__main__":
    import argparse
    import sys

    desc = """
This script helps automate the generation and updating of collaborator lists from
bibtex files and/or csv file. Note that the first time this script is used, the csv
generated do not contain affiliations. These have to be entered by hand.
Subsequently, supply old processed csv using -i and affiliations will be obtained
from the old list where possible. After you generate the csv, you can convert it to
a string for pasting into the biosketch using make_str.
    """

    p = argparse.ArgumentParser(
        description="Generate collaborator list for proposals.",
        epilog="Author: Shyue Ping Ong")

    sp = p.add_subparsers()

    sp_csv = sp.add_parser(
        "make_csv", help="Make csv file from bibtex.")

    sp_csv.add_argument("bib", metavar="bib", type=str,
                   help="Bibtex file to process")

    sp_csv.add_argument("-y", "--year", dest="year",
        type=int, default=datetime.datetime.now().year - 4,
        help="Year from which to update. Defaults to current year - 4, based on the usual NSF guideline of past 48 months.")

    sp_csv.add_argument("-i", "--input_csv", dest="input_csv",
        type=str,
        help="An input CSV file. This is used mainly for prior information on institutions.")

    sp_csv.set_defaults(func=generate_csv)

    sp_str = sp.add_parser(
        "make_str", help="Convert data from a csv to a string for pasting into biosketches")

    sp_str.add_argument("csv", metavar="csv", type=str,
                   help="Csv file to process")

    sp_str.add_argument("-e", "--excluded_institution", dest="excluded_institution",
        type=str,
        help="Supply a home institution to exclude collaborators within the same instutition. A sufficiently unique part of the string should suffice.")

    sp_str.set_defaults(func=csv2str)

    args = p.parse_args()
    try:
        getattr(args, "func")
    except AttributeError:
        p.print_help()
        sys.exit(0)
    args.func(args)
	#!/usr/bin/env python

	"""
	Script for generating collaborators in a csv file from a bibtex, for easy
	insertion into the new NSF format.
	"""

	from __future__ import division

	import datetime
	import os
	import collections
	import csv

	from pybtex.database import parse_file, BibliographyData


	greek_alphabet = {
	u'\u0391': 'Alpha',
	u'\u0392': 'Beta',
	u'\u0393': 'Gamma',
	u'\u0394': 'Delta',
	u'\u0395': 'Epsilon',
	u'\u0396': 'Zeta',
	u'\u0397': 'Eta',
	u'\u0398': 'Theta',
	u'\u0399': 'Iota',
	u'\u039A': 'Kappa',
	u'\u039B': 'Lamda',
	u'\u039C': 'Mu',
	u'\u039D': 'Nu',
	u'\u039E': 'Xi',
	u'\u039F': 'Omicron',
	u'\u03A0': 'Pi',
	u'\u03A1': 'Rho',
	u'\u03A3': 'Sigma',
	u'\u03A4': 'Tau',
	u'\u03A5': 'Upsilon',
	u'\u03A6': 'Phi',
	u'\u03A7': 'Chi',
	u'\u03A8': 'Psi',
	u'\u03A9': 'Omega',
	u'\u03B1': 'alpha',
	u'\u03B2': 'beta',
	u'\u03B3': 'gamma',
	u'\u03B4': 'delta',
	u'\u03B5': 'epsilon',
	u'\u03B6': 'zeta',
	u'\u03B7': 'eta',
	u'\u03B8': 'theta',
	u'\u03B9': 'iota',
	u'\u03BA': 'kappa',
	u'\u03BB': 'lamda',
	u'\u03BC': 'mu',
	u'\u03BD': 'nu',
	u'\u03BE': 'xi',
	u'\u03BF': 'omicron',
	u'\u03C0': 'pi',
	u'\u03C1': 'rho',
	u'\u03C3': 'sigma',
	u'\u03C4': 'tau',
	u'\u03C5': 'upsilon',
	u'\u03C6': 'phi',
	u'\u03C7': 'chi',
	u'\u03C8': 'psi',
	u'\u03C9': 'omega',
	}
	greek_alphabet = {v: k for k, v in greek_alphabet.items()}


	def process_unicode(s):
	m = re.search(r"\$\\(\w+)\$", s)
	if m:
	replacement = greek_alphabet[m.group(1)]
	s = re.sub(r"\$\\(\w+)\$", replacement, s)
	return s


	def generate_csv(args):
	db = parse_file(args.bib)
	collaborators = set()
	for k, v in db.entries.items():
	if v.type == "article":
	year = int(v.fields["year"])
	if int(year) >= args.year:
	for p in v.persons["author"]:
	first_middle = p.first_names + p.middle_names
	collaborators.add((" ".join(first_middle), " ".join(p.last_names)))
	collaborators = sorted(collaborators, key=lambda c: (c[1], c[0]))

	collaborators = [{"first_name": c[0], "last_name": c[1]} for c in collaborators]

	unfound = []
	if args.input_csv:
	with open(args.input_csv, 'rt') as f:
	for row in csv.reader(f):
	if row[0] == "First Name":
	continue
	first_name, last_name, affiliation = row
	found = False
	for c in collaborators:
	if (c["first_name"].lower() in first_name.lower()) and (c["last_name"].lower() in last_name.lower()):
	c["first_name"] = first_name
	c["last_name"] = last_name
	c["affiliation"] = affiliation
	found = True
	break
	if not found:
	unfound.append(row)

	unique = []
	done = []
	for c in collaborators:
	full_name = "%s %s" % (c["first_name"].lower(), c["last_name"].lower())
	if full_name not in done:
	unique.append(c)
	done.append(full_name)
	collaborators = unique

	with open("collaborators_%s.csv" % datetime.datetime.now().date(), "wt") as f:
	writer = csv.writer(f)
	writer.writerow(["First Name", "Last Name", "Affiliation"])
	for c in collaborators:
	writer.writerow([c["first_name"], c["last_name"], c.get("affiliation", "")])
	if unfound:
	print("Old collaborators")
	print("-----------------")
	for c in unfound:
	print(", ".join(c))


	def csv2str(args):
	home_institution = args.excluded_institution or "-"
	with open(args.csv, "rt") as f:
	output = []
	for row in csv.reader(f):
	if row[0] == "First Name":
	continue
	if home_institution not in row[-1]:
	output.append("%s %s, %s" % (row[0], row[1], row[2]))
	print("; ".join(output))


	if __name__ == "__main__":
	import argparse
	import sys

	desc = """
	This script helps automate the generation and updating of collaborator lists from
	bibtex files and/or csv file. Note that the first time this script is used, the csv
	generated do not contain affiliations. These have to be entered by hand.
	Subsequently, supply old processed csv using -i and affiliations will be obtained
	from the old list where possible. After you generate the csv, you can convert it to
	a string for pasting into the biosketch using make_str.
	"""

	p = argparse.ArgumentParser(
	description="Generate collaborator list for proposals.",
	epilog="Author: Shyue Ping Ong")

	sp = p.add_subparsers()

	sp_csv = sp.add_parser(
	"make_csv", help="Make csv file from bibtex.")

	sp_csv.add_argument("bib", metavar="bib", type=str,
	help="Bibtex file to process")

	sp_csv.add_argument("-y", "--year", dest="year",
	type=int, default=datetime.datetime.now().year - 4,
	help="Year from which to update. Defaults to current year - 4, based on the usual NSF guideline of past 48 months.")

	sp_csv.add_argument("-i", "--input_csv", dest="input_csv",
	type=str,
	help="An input CSV file. This is used mainly for prior information on institutions.")

	sp_csv.set_defaults(func=generate_csv)

	sp_str = sp.add_parser(
	"make_str", help="Convert data from a csv to a string for pasting into biosketches")

	sp_str.add_argument("csv", metavar="csv", type=str,
	help="Csv file to process")

	sp_str.add_argument("-e", "--excluded_institution", dest="excluded_institution",
	type=str,
	help="Supply a home institution to exclude collaborators within the same instutition. A sufficiently unique part of the string should suffice.")

	sp_str.set_defaults(func=csv2str)

	args = p.parse_args()
	try:
	getattr(args, "func")
	except AttributeError:
	p.print_help()
	sys.exit(0)
	args.func(args)