List pull requests merged since the last tag on the given release branch which have not yet been merged into the release branch.
#!/usr/bin/env python
# I wasn't happy with any of the GitHub libraries for Python that I tried so I
# just used the GitHub API directly. If someone would like to rewrite this
# using a library please be my guest
from __future__ import unicode_literals
import argparse
import base64
import getpass
import io
import json
import logging
import os
import re
import stat
import sys
from urllib.request import Request, urlopen
from urllib.error import HTTPError
from urllib.parse import urlencode
except ImportError:
from urllib2 import Request, urlopen, HTTPError
from urllib import urlencode
input = raw_input
except NameError:
# Because pkg_resources provides better version parsing than distutils
import pkg_resources
# This regex ensures that only the 'Conflicts:' section at the end of the
# commit message is matched (in case there are multiple 'Conflicts' sections
# which can happen).
CONFLICTS_RE = re.compile(r'((?:.|\n)+)\nConflicts:(\n.*)+', flags=re.M)
log = logging.getLogger()
class _MaxLevelFilter(logging.Filter):
def __init__(self, maxlevel):
self.maxlevel = maxlevel
def filter(self, record):
return record.levelno <= self.maxlevel
class GithubRequestError(Exception):
class GithubSuggestBackports(object):
# Cache all the commits found for the given branch so we don't have to
# re-request them for each pull request
_cached_commits = []
def __init__(self, owner, repo, branch, username=None, password=None):
self.owner = owner
self.repo = repo
self.branch = branch
if username is not None and password is not None:
# We can't rely on urllib2 to handle basic authentication in the
# normal way since GitHub requests don't always have
# www-authenticate in the headers
auth = ':'.join((username, password)).encode('ascii')
self._auth = base64.b64encode(auth).decode('ascii')
self._auth = None
def _github_repo_request(self, *resource, **parameters):
resource = tuple(str(r) for r in resource)
url = BASE_URL + '/'.join((self.owner, self.repo) + resource)
if parameters:
url += '?' + urlencode(parameters)
log.debug('Requesting ' + url)
req = Request(url)
if self._auth:
req.add_header('Authorization', 'Basic ' + self._auth)
f = urlopen(req)
enc = f.headers.get_content_charset()
content =
response = json.loads(content)
except HTTPError as e:
response = json.loads('utf8'))
if 'message' in response:
raise GithubRequestError(response['message'])
raise e
return response
def get_tags(self):
return self._github_repo_request('tags')
def get_milestones(self, state=None):
parameters = {}
if state is not None:
parameters['state'] = state
return self._github_repo_request('milestones', **parameters)
def iter_issues(self, milestone=None, state=None):
parameters = {}
if milestone is not None:
parameters['milestone'] = milestone
if state is not None:
parameters['state'] = state
parameters['page'] = 1
issues = []
while True:
if not issues:
response = self._github_repo_request('issues', **parameters)
if response:
parameters['page'] += 1
raise StopIteration
yield issues.pop(0)
def iter_issue_events(self, issue, filter_=None, count=None):
events = []
page = 1
while (count is None or count):
# Events can be paginated
if not events:
next = self._github_repo_request('issues', issue, 'events',
if not next:
raise StopIteration
if filter_ is not None:
next = filter(lambda e: e['event'] == filter_, next)
page += 1
# Either continue to the next page of events or start popping
# any found events off the first page
yield events.pop(0)
count -= 1
def get_pull_request_merge_commit(self, pr):
"""Returns the full commit object of the merge commit for a pull
request or `None` if the given PR has not been merged.
This is different from the commit named by merge_commit_sha listed in a
pull request in that it's the commit that actually goes into mainline
branch. The commit listed in merge_commit_sha only seems to be an
artifact of how GitHub implements pull requests.
events = list(self.iter_issue_events(pr, filter_='merged', count=1))
if events:
return self.get_commit(events[0]['commit_id'])
def get_commits(self, sha):
"""Get the first page of commits in the tree starting at sha.
Commits are returned 30 at a time and paginated according to sha. So in
order to get the second page of commits it's necessary to use a
subsequent call to get_commits using the sha of the last commit from
the previous call (which will be the first commit listed in the second
return self._github_repo_request('commits', sha=sha)
def get_commit(self, sha):
"""Return a single commit."""
return self._github_repo_request('commits', sha)
def iter_pull_requests(self, state=None):
parameters = {}
if state is not None:
parameters['state'] = state
parameters['page'] = 1
prs = []
while True:
if not prs:
response = self._github_repo_request('pulls', **parameters)
if response:
parameters['page'] += 1
raise StopIteration
yield prs.pop(0)
def get_pull_request(self, number):
pr = self._github_repo_request('pulls', str(number))
except GithubRequestError as e:
if e.message == 'Not Found':
return None
return pr
def find_merged_commit(self, commit, since=None):
Determines whether or not this commit was already merged into the
release branch, and if so returns the merge commit from the branch.
Returns `None` if the commit was not found to be merged.
def expand_cache():
if not self._cached_commits:
# Initialize with the first page of commits from the bug fix
# branch
next_commits = self.get_commits(self.branch)
last_commit = self._cached_commits[-1]
if last_commit['commit']['committer']['date'] <= since:
return False
next_commits = self.get_commits(last_commit['sha'])[1:]
if next_commits:
return True
return False
idx = 0
while True:
merged_commit = self._cached_commits[idx]
except IndexError:
# Try growing the list of commits; but if there are no more to be
# found return None
if expand_cache():
return None
# For cherry-picks we can't rely on comparing the sha, but the
# author and commit message should be close enough
a = commit['commit']
b = merged_commit['commit']
# Remove conflicts from the cherry-picked commit's commit message;
# conflicts can cause the message to be different where it
# otherwise wouldn't have been, and we don't care if there were
# conflicts so long as it was merged successfully
b['message'] = CONFLICTS_RE.sub(r'\1', b['message'])
if a['author'] == b['author'] and a['message'] == b['message']:
return merged_commit
idx += 1
def get_next_milestone(self):
"""Get the next open milestone that has the same version prefix as the
branch. For example if the repo has milestones v0.2.1 and v0.2.2 and the
branch is v0.2.x, this will return v0.2.1.
prefix = self.branch[:-1]
milestones = [m for m in self.get_milestones(state='open')
if m['title'].startswith(prefix)]
sort_key = lambda m: int(m['title'].rsplit('.', 1)[1])
return sorted(milestones, key=sort_key)[0]
_last_tag = None
def get_last_tag(self):
if self._last_tag is not None:
return self._last_tag
branch_ver = pkg_resources.parse_version(self.branch.lstrip('v'))
tags = sorted(self.get_tags(),
key=lambda t: pkg_resources.parse_version(t['name']),
# Get the last tag that should be in this branch
for tag in tags:
tag_ver = pkg_resources.parse_version(tag['name'].lstrip('v'))
branch_base_ver = branch_ver[:branch_ver.index('*x')]
cmp_indx = len(branch_base_ver)
if tag_ver[:cmp_indx] == branch_ver[:cmp_indx]:
self._last_tag = tag
return tag
self._last_tag = False
_last_tag_commit = None
def get_last_tag_commit(self):
if self._last_tag_commit is not None:
return self._last_tag_commit
last_tag = self.get_last_tag()
if last_tag:
last_tag_commit = self.get_commit(last_tag['commit']['sha'])
last_tag_commit = False
self._last_tag_commit = last_tag_commit
return last_tag_commit
def iter_suggested_prs(self):
next_milestone = self.get_next_milestone()
next_ms_num = next_milestone['number']"Finding PRs in milestone {0} that haven't been merged into "
"{1}".format(next_milestone['title'], self.branch))'Merge these into {0} by doing "git checkout {0}; git pull; '
'git cherry-pick -m 1 <SHA>"'.format(self.branch))
last_tag_commit = self.get_last_tag_commit()
if not last_tag_commit:
# There have *been* no tags of this release line so just quit
raise StopIteration
last_tag_date = last_tag_commit['commit']['committer']['date']
# Get the issue #s of all closed issues in the relevant milestone
milestone_issues = set(issue['number'] for issue in
# Now get all PRs and filter by whether or not they belong to the
# milestone; requesting them all at once is still faster than
# requesting one at a time. This would also be easier if the API
# supported sorting on PR lists
for pr in self.iter_pull_requests(state='closed'):
if (pr['number'] not in milestone_issues or not pr['merged_at']):
merge_commit = self.get_pull_request_merge_commit(pr['number'])
# Ignore commits that were merged before the last tag date
if merge_commit['commit']['committer']['date'] < last_tag_date:
if not self.find_merged_commit(merge_commit,
yield pr, merge_commit['sha']
def main(argv):
parser = argparse.ArgumentParser(
description='Find pull requests that need be backported to a bug fix '
parser.add_argument('owner', metavar='OWNER',
help='owner of the repository')
parser.add_argument('repo', metavar='REPO', help='the repository name')
parser.add_argument('branch', metavar='BRANCH',
help='the name of the bug fix branch (eg. v0.2.x)')
parser.add_argument('-f', '--file', metavar='FILE',
help='save the cherry-pick script to a file; '
'otherwise it is written to stdout')
parser.add_argument('--debug', action='store_true')
args = parser.parse_args(argv)
# Configure log
stdout_handler = logging.StreamHandler(sys.stdout)
if args.debug:
stderr_handler = logging.StreamHandler(sys.stderr)
log.addHandler(stderr_handler)"Enter your GitHub username and password so that API requests "
"aren't as severely rate-limited...")
username = input('Username: ')
password = getpass.getpass('Password: ')
suggester = GithubSuggestBackports(args.owner, args.repo, args.branch,
username, password)
pr_format = '[#{0}][{1}]: {2}'
suggestions = []
for pr, sha in suggester.iter_suggested_prs():
# If sys.stdout's default encoding has a limited codepage this blows up if
# the PR title contains unencodable characters =_=
title = pr['title'].encode('ascii',
errors='replace').decode('ascii')['number'], sha, title))
suggestions.append((pr, sha))
suggestions.sort(key=lambda p: p[0]['merged_at'])
script_lines = [
'# git commands:',
'git checkout {0} || exit 1'.format(args.branch),
'git pull upstream {0} || exit 1'.format(args.branch)
for pr, sha in suggestions:
script_lines.append('# ' + pr_format.format(pr['number'], sha,
script_lines.append('git cherry-pick -m 1 {0} || exit 1'.format(sha))
if args.file:
with, 'w', encoding='utf8') as f:
f.writelines(line + '\n' for line in script_lines)
os.chmod(args.file, stat.S_IRWXU)
for line in script_lines:
if __name__ == '__main__':
@BlackRider97 I had to change line 89 to

enc = f.headers['Content-Type'].split('charset=')[1] # Content-Type will look like:   application/json; charset=utf-8

