Skip to content

Instantly share code, notes, and snippets.

@jamesonwilliams

jamesonwilliams/top_contribs.py Secret

Created Dec 25, 2020
Embed
What would you like to do?
Script to get top contributors in a GitHub org
#!/usr/bin/env python3
import datetime
import json
import os
import textwrap
import github
GITHUB_TOKEN = os.environ['GITHUB_TOKEN']
"""Required. Token for interaction with GitHub. Must have scope to write Gists."""
ORG_NAMES = os.environ['ORG_NAMES']
"""Required. Comma-separated list of GitHub organizations to inspect, e.g. 'awslabs'."""
IGNORED_LOGINS = os.environ.get('IGNORED_LOGINS') or ''
"""An optional comma-separated list of logins to ignore, e.g. 'alice, bob.'"""
def valid_login(login):
"""
Checks if the supplied string is a valid user login name.
Valid logins are not in the environment's comma-separated ignore list,
and don't contain the string '[bot]'.
:param login: A login to test for validity
:return: True if the login is valid
"""
ignored_logins = [login.strip() for login in IGNORED_LOGINS.split(',')]
return ('[bot]' not in login) and (login not in ignored_logins)
def get_recent_pulls_in_repo(repo):
"""
Gets a list of pull requests that have been made against a repository
within the last 30 days.
:param repo: A GitHub Repository object
:return: A list of pulls on that repository in the last 30 days
"""
# get_pulls() doesn't accept a timeframe as a search filter
# so we sort by creation timestamp, then stop iterating over the
# paginated search once we start seeing pulls older than one month.
month_ago = datetime.datetime.now() - datetime.timedelta(30)
pulls = []
for pull in repo.get_pulls(sort='created', direction='desc', state='all'):
if pull.created_at < month_ago:
break
pulls.append(pull)
return pulls
def get_recent_pulls_in_org(org):
"""
Gets a list of pull requests within a github GitHub organization.
:param org: A GitHub organization
:return: A list of recent pull requests against the org's repos
"""
pulls = []
for repo in org.get_repos():
print('Inspecting pulls on repo {0}...'.format(str(repo)))
pulls += get_recent_pulls_in_repo(repo)
return pulls
def get_recent_pulls_for_org_names(gh, org_names):
pulls = []
for org in org_names:
pulls += get_recent_pulls_in_org(gh.get_organization(org))
return pulls
def author_stats(pulls):
"""
Generate stats about who authored the provided pull requests.
:param pulls: A list of pull requests
:return: A dict from login to number of pulls authored by that login
"""
authors = {}
for pull in pulls:
login = pull.user.login
if not valid_login(login):
continue
print('{0} created a pull named "{1}."'.format(login, pull.title))
authors[login] = authors.get(login, 0) + 1
return authors
def reviewer_stats(pulls):
"""
Generate stats about who reviewed the provided pull requests.
:param pulls: A list of pull requests
:return: A dict from login to number of comments left on the pull requests by that login
"""
comments = []
for pull in pulls:
# There are two types of comments, those left as part of a review,
# And those left directly in the conversation tab of the PR.
# List comprehension is used to exhaust paginator.
merged = []
merged += [c for c in pull.get_review_comments()]
merged += [c for c in pull.get_issue_comments()]
comments += [c for c in merged if (valid_login(c.user.login) and (c.user.login != pull.user.login))]
reviewers = {}
for comment in comments:
login = comment.user.login
print('{0} left a comment: "{1}"'.format(login, comment.body))
reviewers[login] = reviewers.get(login, 0) + 1
return reviewers
def top_ten_table(key_label, val_label, entries):
"""
Builds a Markdown table representation of a dictionary.
Displays the top items in the entries that have the largest values.
:param key_label: Column label for the entries keys
:param val_label: Column label for the entries values
:param entries: A Python dictionary
:return:
"""
row_template = '|{0}|{1}|{2}|\n'
# Build header
table = row_template.format('Rank', key_label, val_label)
table += row_template.format('--------', '--------', '--------')
# Sort the dictionary by into a list of tuple, ordered by value,
# starting with the largest values. Limit results to no more than 10.
item_list = sorted(entries.items(), key=lambda x: x[1], reverse=True)
for index, (key, value) in enumerate(item_list[:10]):
table += row_template.format(str(1 + index), key, str(value))
return table
def build_document(author_data, reviewer_data):
"""
Builds a Markdown document describing the top contributors.
:param author_data: A dict with number of PRs authored by login
:param reviewer_data: A dict with number of PRs commented by login
:return: A String containing Markdown
"""
authors_table = top_ten_table('PR Author', '# PRs Authored', author_data)
reviewers_table = top_ten_table('PR Reviewer', '# PRs Comments', reviewer_data)
return textwrap.dedent("""
# Top Contributors, Last 30 Days
This is a list of the top contributors to the aws-amplify Github org's public repos.
Contributions from the last 30 days are considered.
This document is updated by a cron job every day.
Contributors are from AWS and from the community.
Contribution counts are a running sum of a user's contributions across all repos.
### Top 10 Authors
{0}
### Top 10 Reviewers (by total comments)
{1}
-----------------------
Last updated {2}.
""").format(authors_table, reviewers_table, str(datetime.datetime.today()))
def write_gist(gh, filename, description, content):
"""
A generic utility to write a file out to to GitHub's Gist APIs.
If there is already a Gist matching this description, then it is updated with `content`.
If there is *not* a Gist with the provided description, a new one is created.
It will use the provided filename and content.
:param gh: Handle to PyGitHub
:param filename: The name of the file in which to store content, e.g. "content.md"
:param description: A description for the Gist
:param content: The content to put in the file in the Gist
"""
# "files":{"test.txt":{"content":"String file contents"}}
files = {filename: github.InputFileContent(content=content)}
user = gh.get_user()
print("Looking for matching Gists....")
for gist in user.get_gists():
if gist.description == description:
print("Found a matching Gist. We'll updated it.")
gist.edit(files=files, description=description)
return
print("No existing Gist, creating a new one....")
user.create_gist(public=True, files=files, description=description)
def update_top_contributors_gist(gh):
"""
Top-level driver, which obtains GitHub data, builds report, and writes to Gist.
:param gh: GitHub instance
"""
org_names = [name.strip() for name in ORG_NAMES.split(',')]
pulls = get_recent_pulls_for_org_names(gh, org_names)
author_data = author_stats(pulls)
reviewer_data = reviewer_stats(pulls)
content = build_document(author_data, reviewer_data)
write_gist(gh, 'contrib.md', 'Top Contributors, Last 30 Days', content)
# noinspection PyUnusedLocal
def lambda_handler(event, context):
"""
Hook into AWS Lambda.
This is intended to be triggered by a periodic CloudWatch event.
:param event: Unused
:param context: Unused
"""
event_json = json.dumps(event, indent=2)
context_json = json.dumps(context, indent=2)
print('Lambda handler invoked. event={0}, context={1}.'.format(event_json, context_json))
gh = github.Github(GITHUB_TOKEN)
update_top_contributors_gist(gh)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment