Script to get top contributors in a GitHub org
#!/usr/bin/env python3 | |
import datetime | |
import json | |
import os | |
import textwrap | |
import github | |
GITHUB_TOKEN = os.environ['GITHUB_TOKEN'] | |
"""Required. Token for interaction with GitHub. Must have scope to write Gists.""" | |
ORG_NAMES = os.environ['ORG_NAMES'] | |
"""Required. Comma-separated list of GitHub organizations to inspect, e.g. 'awslabs'.""" | |
IGNORED_LOGINS = os.environ.get('IGNORED_LOGINS') or '' | |
"""An optional comma-separated list of logins to ignore, e.g. 'alice, bob.'""" | |
def valid_login(login): | |
""" | |
Checks if the supplied string is a valid user login name. | |
Valid logins are not in the environment's comma-separated ignore list, | |
and don't contain the string '[bot]'. | |
:param login: A login to test for validity | |
:return: True if the login is valid | |
""" | |
ignored_logins = [login.strip() for login in IGNORED_LOGINS.split(',')] | |
return ('[bot]' not in login) and (login not in ignored_logins) | |
def get_recent_pulls_in_repo(repo): | |
""" | |
Gets a list of pull requests that have been made against a repository | |
within the last 30 days. | |
:param repo: A GitHub Repository object | |
:return: A list of pulls on that repository in the last 30 days | |
""" | |
# get_pulls() doesn't accept a timeframe as a search filter | |
# so we sort by creation timestamp, then stop iterating over the | |
# paginated search once we start seeing pulls older than one month. | |
month_ago = datetime.datetime.now() - datetime.timedelta(30) | |
pulls = [] | |
for pull in repo.get_pulls(sort='created', direction='desc', state='all'): | |
if pull.created_at < month_ago: | |
break | |
pulls.append(pull) | |
return pulls | |
def get_recent_pulls_in_org(org): | |
""" | |
Gets a list of pull requests within a github GitHub organization. | |
:param org: A GitHub organization | |
:return: A list of recent pull requests against the org's repos | |
""" | |
pulls = [] | |
for repo in org.get_repos(): | |
print('Inspecting pulls on repo {0}...'.format(str(repo))) | |
pulls += get_recent_pulls_in_repo(repo) | |
return pulls | |
def get_recent_pulls_for_org_names(gh, org_names): | |
pulls = [] | |
for org in org_names: | |
pulls += get_recent_pulls_in_org(gh.get_organization(org)) | |
return pulls | |
def author_stats(pulls): | |
""" | |
Generate stats about who authored the provided pull requests. | |
:param pulls: A list of pull requests | |
:return: A dict from login to number of pulls authored by that login | |
""" | |
authors = {} | |
for pull in pulls: | |
login = pull.user.login | |
if not valid_login(login): | |
continue | |
print('{0} created a pull named "{1}."'.format(login, pull.title)) | |
authors[login] = authors.get(login, 0) + 1 | |
return authors | |
def reviewer_stats(pulls): | |
""" | |
Generate stats about who reviewed the provided pull requests. | |
:param pulls: A list of pull requests | |
:return: A dict from login to number of comments left on the pull requests by that login | |
""" | |
comments = [] | |
for pull in pulls: | |
# There are two types of comments, those left as part of a review, | |
# And those left directly in the conversation tab of the PR. | |
# List comprehension is used to exhaust paginator. | |
merged = [] | |
merged += [c for c in pull.get_review_comments()] | |
merged += [c for c in pull.get_issue_comments()] | |
comments += [c for c in merged if (valid_login(c.user.login) and (c.user.login != pull.user.login))] | |
reviewers = {} | |
for comment in comments: | |
login = comment.user.login | |
print('{0} left a comment: "{1}"'.format(login, comment.body)) | |
reviewers[login] = reviewers.get(login, 0) + 1 | |
return reviewers | |
def top_ten_table(key_label, val_label, entries): | |
""" | |
Builds a Markdown table representation of a dictionary. | |
Displays the top items in the entries that have the largest values. | |
:param key_label: Column label for the entries keys | |
:param val_label: Column label for the entries values | |
:param entries: A Python dictionary | |
:return: | |
""" | |
row_template = '|{0}|{1}|{2}|\n' | |
# Build header | |
table = row_template.format('Rank', key_label, val_label) | |
table += row_template.format('--------', '--------', '--------') | |
# Sort the dictionary by into a list of tuple, ordered by value, | |
# starting with the largest values. Limit results to no more than 10. | |
item_list = sorted(entries.items(), key=lambda x: x[1], reverse=True) | |
for index, (key, value) in enumerate(item_list[:10]): | |
table += row_template.format(str(1 + index), key, str(value)) | |
return table | |
def build_document(author_data, reviewer_data): | |
""" | |
Builds a Markdown document describing the top contributors. | |
:param author_data: A dict with number of PRs authored by login | |
:param reviewer_data: A dict with number of PRs commented by login | |
:return: A String containing Markdown | |
""" | |
authors_table = top_ten_table('PR Author', '# PRs Authored', author_data) | |
reviewers_table = top_ten_table('PR Reviewer', '# PRs Comments', reviewer_data) | |
return textwrap.dedent(""" | |
# Top Contributors, Last 30 Days | |
This is a list of the top contributors to the aws-amplify Github org's public repos. | |
Contributions from the last 30 days are considered. | |
This document is updated by a cron job every day. | |
Contributors are from AWS and from the community. | |
Contribution counts are a running sum of a user's contributions across all repos. | |
### Top 10 Authors | |
{0} | |
### Top 10 Reviewers (by total comments) | |
{1} | |
----------------------- | |
Last updated {2}. | |
""").format(authors_table, reviewers_table, str(datetime.datetime.today())) | |
def write_gist(gh, filename, description, content): | |
""" | |
A generic utility to write a file out to to GitHub's Gist APIs. | |
If there is already a Gist matching this description, then it is updated with `content`. | |
If there is *not* a Gist with the provided description, a new one is created. | |
It will use the provided filename and content. | |
:param gh: Handle to PyGitHub | |
:param filename: The name of the file in which to store content, e.g. "content.md" | |
:param description: A description for the Gist | |
:param content: The content to put in the file in the Gist | |
""" | |
# "files":{"test.txt":{"content":"String file contents"}} | |
files = {filename: github.InputFileContent(content=content)} | |
user = gh.get_user() | |
print("Looking for matching Gists....") | |
for gist in user.get_gists(): | |
if gist.description == description: | |
print("Found a matching Gist. We'll updated it.") | |
gist.edit(files=files, description=description) | |
return | |
print("No existing Gist, creating a new one....") | |
user.create_gist(public=True, files=files, description=description) | |
def update_top_contributors_gist(gh): | |
""" | |
Top-level driver, which obtains GitHub data, builds report, and writes to Gist. | |
:param gh: GitHub instance | |
""" | |
org_names = [name.strip() for name in ORG_NAMES.split(',')] | |
pulls = get_recent_pulls_for_org_names(gh, org_names) | |
author_data = author_stats(pulls) | |
reviewer_data = reviewer_stats(pulls) | |
content = build_document(author_data, reviewer_data) | |
write_gist(gh, 'contrib.md', 'Top Contributors, Last 30 Days', content) | |
# noinspection PyUnusedLocal | |
def lambda_handler(event, context): | |
""" | |
Hook into AWS Lambda. | |
This is intended to be triggered by a periodic CloudWatch event. | |
:param event: Unused | |
:param context: Unused | |
""" | |
event_json = json.dumps(event, indent=2) | |
context_json = json.dumps(context, indent=2) | |
print('Lambda handler invoked. event={0}, context={1}.'.format(event_json, context_json)) | |
gh = github.Github(GITHUB_TOKEN) | |
update_top_contributors_gist(gh) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment