Skip to content

Instantly share code, notes, and snippets.

Last active December 5, 2017 15:43
Show Gist options
  • Save jwhitlock/53b52e44f412e596ceba592b6e20d92b to your computer and use it in GitHub Desktop.
Save jwhitlock/53b52e44f412e596ceba592b6e20d92b to your computer and use it in GitHub Desktop.
Generate the first draft of the monthly Kuma Report
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function, unicode_literals
from argparse import ArgumentParser
from cgi import escape
from collections import defaultdict, Counter, OrderedDict
from datetime import date, datetime, timedelta
from textwrap import wrap
import pprint
import re
import sys
import requests
import requests_cache
# Owner / Repository -> Friendly name
repos = {
('mozilla', 'kuma'): 'Kuma',
('mdn', 'interactive-examples'): 'Interactive Examples',
('mdn', 'kumascript'): 'KumaScript',
('mdn', 'browser-compat-data'): 'BCD',
('mdn', 'doc-linter-webextension'): 'Doc Linter WebExtension',
('mdn', 'data'): 'Data',
('mozmeao', 'infra'): 'Infra'
template = """\
layout: post
title: Kuma Report, %(month)s %(year)d
author: John Whitlock
excerpt_separator: <!--more-->
Here's what happened in %(month)s in
the engine of
[MDN Web Docs](
- [Item 1](#item1-%(month_id_suffix)s)
- [Item 2](#item2-%(month_id_suffix)s)
- [Item 3](#item3-%(month_id_suffix)s)
- [Shipped tweaks and fixes](#tweaks-%(month_id_suffix)s)
by merging %(total)s pull requests,
including %(prs_from_new_users)s pull requests
from %(new_users)s new contributors.
Here's the plan for %(next_month)s:
- [Plan 1](#plan1-%(month_id_suffix)s)
- [Plan 2](#plan2-%(month_id_suffix)s)
- [Plan 3](#plan3-%(month_id_suffix)s)
Done in %(month)s
<a name="item1-%(month_id_suffix)s">Item 1
Here's the first item, including lots of images.
<a name="item2-%(month_id_suffix)s">Item 2
The second item is also important.
<a name="item3-%(month_id_suffix)s">Item 3
Everyone stopped reading.
<a name="tweaks-%(month_id_suffix)s">Shipped Tweaks and Fixes
There were %(total)s PRs merged in %(month)s:
Many of these were from external contributors, including several first-time
contributions. Here are some of the highlights:
Planned for %(next_month)s
<a name="plan1-%(month_id_suffix)s">Plan 1
Main effort
<a name="plan2-%(month_id_suffix)s">Plan 2
Second effort
<a name="plan3-%(month_id_suffix)s">Plan 3
Thing we promised last month
def get_report(client_id, client_secret):
"""Create the Kuma Report template for the month."""
month = pick_month()
next_month = month + timedelta(days=28)
while next_month.month == month.month:
next_month += timedelta(days=1)
last_day = next_month - timedelta(days=1)
pr_data = get_pr_data(repos.keys(), client_id, client_secret)
user_data = get_user_data(pr_data, client_id, client_secret)
pr_counts_text, pr_counts = get_pr_counts(pr_data, month, last_day)
pr_details, total, new_users, new_prs = get_pr_details(
pr_data, user_data, month, last_day, pr_counts)
params = {
'month': month.strftime('%B'),
'next_month': next_month.strftime('%B'),
'month_id_suffix': month.strftime('%b-%y').lower(),
'year': month.year,
'total': total,
'pr_counts_text': pr_counts_text,
'pr_details': pr_details,
'prs_from_new_users': new_prs,
'new_users': new_users
return template % params
def pick_month():
"""Pick the report month based on current date.
The month is picked by which end-of-month is closest.
For example, on Oct 20th and November 5th, October is picked.
The return is the 1st of the selected month, such as Oct 1, 2017.
today =
month = date(today.year, today.month, 1)
if < 14:
# Use last month
month -= timedelta(days=27)
while != 1:
month -= timedelta(days=1)
return month
def get_pr_data(repos, client_id, client_secret):
"""Get the data for pull requests merged during the time period."""
pr_data = []
for owner, repo in repos:
repo_prs = get_pr_data_for_repo(
owner, repo, client_id, client_secret)
return pr_data
class PullRequest(object):
"""Represents a merged GitHub pull request."""
def __init__(self, owner, repo, github_response):
"""Initialize from the GitHub API response."""
self.owner = owner
self.repo = repo
self.number = github_response['number']
self.username = github_response['user']['login']
self.user_url = github_response['user']['url']
self.title = github_response['title']
self.merged_at = self.to_datetime(github_response['merged_at'])
self.created_at = self.to_datetime(github_response['created_at'])
def to_datetime(self, raw_date_str):
"""Convert a GitHub date string to a datetime."""
if raw_date_str is None:
return ''
dateformat = '%Y-%m-%dT%H:%M:%S'
date_str = raw_date_str
if raw_date_str.endswith('Z'):
date_str = date_str[:-1]
dt = datetime.strptime(date_str, dateformat)
return dt
def get_pr_data_for_repo(owner, repo, client_id, client_secret):
"""Get the data for pull requests merged in a repo."""
prs = []
is_last = False
url_params = {'owner': owner, 'repo': repo}
url_pat = ''
payload = {
'client_id': client_id,
'client_secret': client_secret,
'state': 'closed',
next_url = None
first = True
resp = requests.get(url_pat % url_params, params=payload)
while not is_last:
# Request next_url if this is not the first request
if first:
first = False
resp = requests.get(next_url)
print(resp.url, file=sys.stderr)
# Abort if the return is an error
out = resp.json()
if 'message' in out:
pprint.pprint(out, file=sys.stderr)
raise Exception(resp.text)
# Process the PRs
for pr in resp.json():
if pr['merged_at']:
# Record the PR
pr_obj = PullRequest(owner, repo, pr)
prs.append((owner, repo, pr_obj.number, pr_obj))
# Process the links and get the next URL
links = get_links(resp.headers['Link'])
next_url = links.get('next')
is_last = next_url is None
return prs
class User(object):
"""Represents a GitHub user."""
def __init__(self, github_response):
"""Initialize from the GitHub API response."""
self.username = github_response['login']
self.fullname = github_response['name'] or self.username
self.prs = defaultdict(list)
self.pr_merge_range = dict()
def add_pr(self, pr_obj):
"""Add a merged PullRequest."""
key = (pr_obj.owner, pr_obj.repo)
if key in self.pr_merge_range:
start, end = self.pr_merge_range[key]
if pr_obj.merged_at < start:
start = pr_obj.merged_at
if pr_obj.merged_at > end:
end = pr_obj.merged_at
start, end = pr_obj.merged_at, pr_obj.merged_at
self.pr_merge_range[key] = (start, end)
def is_new(self, owner, repo, start, end):
"""Is the user a new contributor in this range?"""
key = (owner, repo)
if key not in self.pr_merge_range:
return False
merge_start, merge_end = self.pr_merge_range[key]
return start <= <= end
def get_user_data(prs, client_id, client_secret):
"""Get user data from PR data."""
users = {}
for owner, repo, number, pr in prs:
username = pr.username
# Initialize the User if needed
if username not in users:
print(pr.user_url, file=sys.stderr)
payload = {
'client_id': client_id,
'client_secret': client_secret
resp = requests.get(pr.user_url, params=payload)
# Abort if the return is an error
out = resp.json()
if 'message' in out:
pprint.pprint(out, file=sys.stderr)
raise Exception(resp.text)
user = User(out)
users[username] = user
return users
re_link_spec = re.compile(r"""(?x) # Be verbose
<(?P<url>[^>]*)>; # An URL wrapped in <>
\srel="(?P<rel>[^"]*)" # rel="type"
def get_links(link_header):
"""Process the links from a GitHub API Link: header."""
links = {rel: url for url, rel in re_link_spec.findall(link_header)}
return links
def get_pr_counts(pr_data, start, end):
"""Create links to merged PR lists, with counts."""
# Count all the PRs per repo that are in the range
counts = Counter()
for owner, repo, number, obj in pr_data:
if start <= <= end:
counts[(owner, repo)] += 1
# Create Markdown links to the GitHub paginated view
line_tmpl = ("- [%(count)s %(owner)s/%(repo)s PR%(s)s]"
lines = []
repo_counts = OrderedDict()
for key, count in counts.most_common():
repo_counts[key] = count
owner, repo = key
lines.append(line_tmpl % {
'count': count,
's': '' if count == 1 else 's',
'owner': owner,
'repo': repo,
'start': start.isoformat(),
'end': end.isoformat()
return "\n".join(lines), repo_counts
def get_pr_details(pr_data, user_data, start, end, counts):
"""Create summaries of the PRs merged."""
entry_tmpl = (
"- %(title_wrap)s\n"
" ([%(repo_short)s PR %(number)s]"
" from%(first_time)s\n"
" [%(fullname)s](")
entries = []
new_usernames = set()
new_prs = 0
for key in counts:
for owner, repo, number, pr in pr_data:
if key == (owner, repo) and (start <= <= end):
title_wrap = ' '.join(wrap(escape(pr.title), 75))
user = user_data[pr.username]
is_new = user.is_new(owner, repo, start, end)
if is_new:
new_prs += 1
entries.append(entry_tmpl % {
'repo_short': repos[key],
'number': number,
'owner': owner,
'repo': repo,
'title_wrap': title_wrap,
'first_time': ' first-time contributor' if is_new else '',
'fullname': user.fullname,
'username': pr.username,
details = "\n".join(entries)
total = len(entries)
return details, total, len(new_usernames), new_prs
def get_args():
parser = ArgumentParser(description='Process some integers.')
parser.add_argument('client_id', type=str, help='GitHub OAuth2 client ID')
parser.add_argument('client_secret', type=str,
help='GitHub OAuth2 client secret')
parser.add_argument('--cachefile', type=str,
help='cache file for GitHub requests',
args = parser.parse_args()
return args
if __name__ == '__main__':
args = get_args()
report = get_report(args.client_id, args.client_secret)
print("Done!", file=sys.stderr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment