Created
August 19, 2018 19:00
-
-
Save bbengfort/e451e244f41574de17c018a7ee1851ef to your computer and use it in GitHub Desktop.
Grepping through the reviewers list to find matches
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
from tabulate import tabulate | |
from operator import itemgetter | |
from collections import Counter | |
def parse_inner_csv(val): | |
return list( | |
map(lambda s: s.strip(), val.lower().split(",")) | |
) | |
def rank_for_row(row): | |
score = 0 | |
ppl = parse_inner_csv(row['Preferred Programming Languages']) | |
if ppl[0] == 'python': | |
score += 1 | |
if 'python' in ppl: | |
score += 1 | |
key_topics = ( | |
'machine learning', 'data visualization', 'visualization', 'data science', 'plotting', 'plotting in python', | |
'visualisations', 'data visualisation', 'visualisations', 'information visualization', 'graphs', 'viz', | |
'machine-learning', 'scientific writing tools', 'statistics/data science', | |
) | |
topics = parse_inner_csv(row['Domains/topic areas you are comfortable reviewing']) | |
if topics[0] in key_topics: | |
score += 1 | |
for key_topic in key_topics: | |
if key_topic in topics: | |
score += 1 | |
return score | |
def get_ranked_reviewers(path='joss_reviewers - reviewers.csv'): | |
with open(path, 'r') as f: | |
reader = csv.DictReader(f) | |
return sorted([ | |
(rank_for_row(row), row) | |
for row in reader | |
], key=itemgetter(0), reverse=True) | |
def unique_topics(path='joss_reviewers - reviewers.csv'): | |
with open(path, 'r') as f: | |
reader = csv.DictReader(f) | |
topics = Counter() | |
for row in reader: | |
for topic in parse_inner_csv(row['Domains/topic areas you are comfortable reviewing']): | |
topics[topic] += 1 | |
return topics | |
if __name__ == "__main__": | |
#topics = unique_topics() | |
#for topic, count in topics.most_common(): | |
# print(count, topic) | |
table = [["Rank", "Reviewer", "Email", "Programming Langauges", "Topics"]] | |
for idx, (rank, row) in enumerate(get_ranked_reviewers()): | |
if rank < 2 or idx > 50: | |
break | |
table.append([ | |
rank, row['GitHub Username'], row['email'], | |
row['Preferred Programming Languages'][:10], | |
row['Domains/topic areas you are comfortable reviewing'][:70], | |
]) | |
print(tabulate(table, headers='firstrow', tablefmt='simple')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment