-
-
Save jamesonwilliams/bd188d1682e882046bb3bceb327ec666 to your computer and use it in GitHub Desktop.
Script to get top contributors in a GitHub org
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import datetime | |
import json | |
import os | |
import textwrap | |
import github | |
GITHUB_TOKEN = os.environ['GITHUB_TOKEN'] | |
"""Required. Token for interaction with GitHub. Must have scope to write Gists.""" | |
ORG_NAMES = os.environ['ORG_NAMES'] | |
"""Required. Comma-separated list of GitHub organizations to inspect, e.g. 'awslabs'.""" | |
IGNORED_LOGINS = os.environ.get('IGNORED_LOGINS') or '' | |
"""An optional comma-separated list of logins to ignore, e.g. 'alice, bob.'""" | |
def valid_login(login): | |
""" | |
Checks if the supplied string is a valid user login name. | |
Valid logins are not in the environment's comma-separated ignore list, | |
and don't contain the string '[bot]'. | |
:param login: A login to test for validity | |
:return: True if the login is valid | |
""" | |
ignored_logins = [login.strip() for login in IGNORED_LOGINS.split(',')] | |
return ('[bot]' not in login) and (login not in ignored_logins) | |
def get_recent_pulls_in_repo(repo): | |
""" | |
Gets a list of pull requests that have been made against a repository | |
within the last 30 days. | |
:param repo: A GitHub Repository object | |
:return: A list of pulls on that repository in the last 30 days | |
""" | |
# get_pulls() doesn't accept a timeframe as a search filter | |
# so we sort by creation timestamp, then stop iterating over the | |
# paginated search once we start seeing pulls older than one month. | |
month_ago = datetime.datetime.now() - datetime.timedelta(30) | |
pulls = [] | |
for pull in repo.get_pulls(sort='created', direction='desc', state='all'): | |
if pull.created_at < month_ago: | |
break | |
pulls.append(pull) | |
return pulls | |
def get_recent_pulls_in_org(org): | |
""" | |
Gets a list of pull requests within a github GitHub organization. | |
:param org: A GitHub organization | |
:return: A list of recent pull requests against the org's repos | |
""" | |
pulls = [] | |
for repo in org.get_repos(): | |
print('Inspecting pulls on repo {0}...'.format(str(repo))) | |
pulls += get_recent_pulls_in_repo(repo) | |
return pulls | |
def get_recent_pulls_for_org_names(gh, org_names): | |
pulls = [] | |
for org in org_names: | |
pulls += get_recent_pulls_in_org(gh.get_organization(org)) | |
return pulls | |
def author_stats(pulls): | |
""" | |
Generate stats about who authored the provided pull requests. | |
:param pulls: A list of pull requests | |
:return: A dict from login to number of pulls authored by that login | |
""" | |
authors = {} | |
for pull in pulls: | |
login = pull.user.login | |
if not valid_login(login): | |
continue | |
print('{0} created a pull named "{1}."'.format(login, pull.title)) | |
authors[login] = authors.get(login, 0) + 1 | |
return authors | |
def reviewer_stats(pulls): | |
""" | |
Generate stats about who reviewed the provided pull requests. | |
:param pulls: A list of pull requests | |
:return: A dict from login to number of comments left on the pull requests by that login | |
""" | |
comments = [] | |
for pull in pulls: | |
# There are two types of comments, those left as part of a review, | |
# And those left directly in the conversation tab of the PR. | |
# List comprehension is used to exhaust paginator. | |
merged = [] | |
merged += [c for c in pull.get_review_comments()] | |
merged += [c for c in pull.get_issue_comments()] | |
comments += [c for c in merged if (valid_login(c.user.login) and (c.user.login != pull.user.login))] | |
reviewers = {} | |
for comment in comments: | |
login = comment.user.login | |
print('{0} left a comment: "{1}"'.format(login, comment.body)) | |
reviewers[login] = reviewers.get(login, 0) + 1 | |
return reviewers | |
def top_ten_table(key_label, val_label, entries): | |
""" | |
Builds a Markdown table representation of a dictionary. | |
Displays the top items in the entries that have the largest values. | |
:param key_label: Column label for the entries keys | |
:param val_label: Column label for the entries values | |
:param entries: A Python dictionary | |
:return: | |
""" | |
row_template = '|{0}|{1}|{2}|\n' | |
# Build header | |
table = row_template.format('Rank', key_label, val_label) | |
table += row_template.format('--------', '--------', '--------') | |
# Sort the dictionary by into a list of tuple, ordered by value, | |
# starting with the largest values. Limit results to no more than 10. | |
item_list = sorted(entries.items(), key=lambda x: x[1], reverse=True) | |
for index, (key, value) in enumerate(item_list[:10]): | |
table += row_template.format(str(1 + index), key, str(value)) | |
return table | |
def build_document(author_data, reviewer_data): | |
""" | |
Builds a Markdown document describing the top contributors. | |
:param author_data: A dict with number of PRs authored by login | |
:param reviewer_data: A dict with number of PRs commented by login | |
:return: A String containing Markdown | |
""" | |
authors_table = top_ten_table('PR Author', '# PRs Authored', author_data) | |
reviewers_table = top_ten_table('PR Reviewer', '# PRs Comments', reviewer_data) | |
return textwrap.dedent(""" | |
# Top Contributors, Last 30 Days | |
This is a list of the top contributors to the aws-amplify Github org's public repos. | |
Contributions from the last 30 days are considered. | |
This document is updated by a cron job every day. | |
Contributors are from AWS and from the community. | |
Contribution counts are a running sum of a user's contributions across all repos. | |
### Top 10 Authors | |
{0} | |
### Top 10 Reviewers (by total comments) | |
{1} | |
----------------------- | |
Last updated {2}. | |
""").format(authors_table, reviewers_table, str(datetime.datetime.today())) | |
def write_gist(gh, filename, description, content): | |
""" | |
A generic utility to write a file out to to GitHub's Gist APIs. | |
If there is already a Gist matching this description, then it is updated with `content`. | |
If there is *not* a Gist with the provided description, a new one is created. | |
It will use the provided filename and content. | |
:param gh: Handle to PyGitHub | |
:param filename: The name of the file in which to store content, e.g. "content.md" | |
:param description: A description for the Gist | |
:param content: The content to put in the file in the Gist | |
""" | |
# "files":{"test.txt":{"content":"String file contents"}} | |
files = {filename: github.InputFileContent(content=content)} | |
user = gh.get_user() | |
print("Looking for matching Gists....") | |
for gist in user.get_gists(): | |
if gist.description == description: | |
print("Found a matching Gist. We'll updated it.") | |
gist.edit(files=files, description=description) | |
return | |
print("No existing Gist, creating a new one....") | |
user.create_gist(public=True, files=files, description=description) | |
def update_top_contributors_gist(gh): | |
""" | |
Top-level driver, which obtains GitHub data, builds report, and writes to Gist. | |
:param gh: GitHub instance | |
""" | |
org_names = [name.strip() for name in ORG_NAMES.split(',')] | |
pulls = get_recent_pulls_for_org_names(gh, org_names) | |
author_data = author_stats(pulls) | |
reviewer_data = reviewer_stats(pulls) | |
content = build_document(author_data, reviewer_data) | |
write_gist(gh, 'contrib.md', 'Top Contributors, Last 30 Days', content) | |
# noinspection PyUnusedLocal | |
def lambda_handler(event, context): | |
""" | |
Hook into AWS Lambda. | |
This is intended to be triggered by a periodic CloudWatch event. | |
:param event: Unused | |
:param context: Unused | |
""" | |
event_json = json.dumps(event, indent=2) | |
context_json = json.dumps(context, indent=2) | |
print('Lambda handler invoked. event={0}, context={1}.'.format(event_json, context_json)) | |
gh = github.Github(GITHUB_TOKEN) | |
update_top_contributors_gist(gh) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment