Skip to content

Instantly share code, notes, and snippets.

@cthoyt
Created June 2, 2022 10:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cthoyt/f64930564454752991ce53d96aa88566 to your computer and use it in GitHub Desktop.
Save cthoyt/f64930564454752991ce53d96aa88566 to your computer and use it in GitHub Desktop.
Generate author list text from a Google Sheet, originally used for the Bioregistry paper.
"""Generate author list text from a Google Sheet."""
from operator import itemgetter
import click
import pandas as pd
ROLE = {"Lead": 0, "Senior": 2}
SUPERSCRIPTS = {
"0": "⁰",
"1": "¹",
"2": "²",
"3": "³",
"4": "⁴",
"5": "⁵",
"6": "⁶",
"7": "⁷",
"8": "⁸",
"9": "⁹",
}
SUPERSCRIPTS_TRANS = str.maketrans(SUPERSCRIPTS)
def sort_key(row: tuple[str, ...]) -> tuple[int, str]:
"""
Sort rows first by author role (i.e., lead goes first, senior goes
last, everyone else middle) then by last name within the middle authors.
"""
return ROLE.get(row[3], 1), row[2]
@click.command()
@click.argument("google_sheet")
@click.option("--gid", type=int, default=0)
def main(google_sheet: str, gid: int):
"""Create author list text from a google sheet."""
url = f"https://docs.google.com/spreadsheets/d/{google_sheet}/export?format=tsv&gid={gid}"
df = pd.read_csv(url, sep="\t", skiprows=1)
column_to_idx = {column: i for i, column in enumerate(df.columns)}
rows = sorted(df.values, key=sort_key)
affiliation_counts = {}
names = []
for row in rows:
affiliations = []
for affiliation_key in ["Affiliation", "Affiliation 2"]:
affiliation = row[column_to_idx[affiliation_key]]
if pd.notna(affiliation):
affiliations.append(affiliation)
if affiliation not in affiliation_counts:
affiliation_counts[affiliation] = len(affiliation_counts)
affiliation_text = "𝄒".join(
str(affiliation_counts[affiliation] + 1).translate(SUPERSCRIPTS_TRANS)
for affiliation in affiliations
)
if pd.notna(row[1]):
# there's a middle name/initial
name = f"{row[0]} {row[1]} {row[2]}{affiliation_text}"
else:
name = f"{row[0]} {row[2]}{affiliation_text}"
names.append(name)
print(", ".join(names))
for affiliation, index in sorted(affiliation_counts.items(), key=itemgetter(1)):
print(f"{index + 1}. {affiliation}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment