Skip to content

Instantly share code, notes, and snippets.

@jdurgin
Created May 4, 2021 23:31
Show Gist options
  • Save jdurgin/2be3708337e6b3ffec8e2dd23cd3b4c6 to your computer and use it in GitHub Desktop.
Save jdurgin/2be3708337e6b3ffec8e2dd23cd3b4c6 to your computer and use it in GitHub Desktop.
use py3 strings to avoid b'' around output
#!/usr/bin/env python
# Originally modified from A. Israel's script seen at
# https://gist.github.com/aisrael/b2b78d9dfdd176a232b9
"""To run this script first install the dependencies
virtualenv v
source v/bin/activate
pip install githubpy GitPython requests
Generate a github access token; this is needed as the anonymous access
to Github's API will easily hit the limit even with a single invocation.
For details see:
https://help.github.com/articles/creating-an-access-token-for-command-line-use/
Next either set the github token as an env variable
`GITHUB_ACCESS_TOKEN` or alternatively invoke the script with
`--token` switch.
Example:
ceph-release-notes -r tags/v0.87..origin/giant \
$(git rev-parse --show-toplevel)
"""
from __future__ import print_function
import argparse
import github
import os
import re
import sys
import requests
import time
from git import Repo
fixes_re = re.compile(r"Fixes\:? #(\d+)")
reviewed_by_re = re.compile(r"Rev(.*)By", re.IGNORECASE)
# labels is the list of relevant labels defined for github.com/ceph/ceph
labels = {'bluestore', 'build/ops', 'cephfs', 'common', 'core', 'mgr',
'mon', 'performance', 'pybind', 'rdma', 'rgw', 'rbd', 'tests',
'tools'}
merge_re = re.compile("Merge (pull request|PR) #(\d+).*")
# prefixes is the list of commit description prefixes we recognize
prefixes = ['bluestore', 'build/ops', 'cephfs', 'cephx', 'cli', 'cmake',
'common', 'core', 'crush', 'doc', 'fs', 'librados', 'librbd',
'log', 'mds', 'mgr', 'mon', 'msg', 'objecter', 'osd', 'pybind',
'rbd', 'rbd-mirror', 'rbd-nbd', 'rgw', 'tests', 'tools']
signed_off_re = re.compile("Signed-off-by: (.+) <")
tracker_re = re.compile("http://tracker.ceph.com/issues/(\d+)")
rst_link_re = re.compile(r"([a-zA-Z0-9])_(\W)")
tracker_uri = "http://tracker.ceph.com/issues/{0}.json"
def get_original_issue(issue, verbose):
r = requests.get(tracker_uri.format(issue),
params={"include": "relations"}).json()
# looking up for the original issue only makes sense
# when dealing with an issue in the Backport tracker
if r["issue"]["tracker"]["name"] != "Backport":
if verbose:
print ("http://tracker.ceph.com/issues/" + issue +
" is from the tracker " + r["issue"]["tracker"]["name"] +
", do not look for the original issue")
return issue
# if a Backport issue does not have a relation, keep it
if "relations" not in r["issue"]:
if verbose:
print ("http://tracker.ceph.com/issues/" + issue +
" has no relations, do not look for the original issue")
return issue
copied_to = [
str(i['issue_id']) for i in r["issue"]["relations"]
if i["relation_type"] == "copied_to"
]
if copied_to:
if len(copied_to) > 1:
if verbose:
print ("ERROR: http://tracker.ceph.com/issues/" + issue +
" has more than one Copied To relation")
return issue
if verbose:
print ("http://tracker.ceph.com/issues/" + issue +
" is the backport of http://tracker.ceph.com/issues/" +
copied_to[0])
return copied_to[0]
else:
if verbose:
print ("http://tracker.ceph.com/issues/" + issue +
" has no copied_to relations; do not look for the" +
" original issue")
return issue
def split_component(title, gh, number):
title_re = '(' + '|'.join(prefixes) + ')(:.*)'
match = re.match(title_re, title)
if match:
return match.group(1)+match.group(2)
else:
issue = gh.repos("ceph")("ceph").issues(number).get()
issue_labels = {it['name'] for it in issue['labels']}
if 'documentation' in issue_labels:
return 'doc: ' + title
item = set(prefixes).intersection(issue_labels)
if item:
return ",".join(sorted(item)) + ': ' + title
else:
return 'UNKNOWN: ' + title
def _title_message(commit, pr, strict):
title = pr['title']
message_lines = commit.message.split('\n')
if strict or len(message_lines) < 1:
return (title, None)
lines = []
for line in message_lines[1:]:
if reviewed_by_re.match(line):
continue
line = line.strip()
if line:
lines.append(line)
if len(lines) == 0:
return (title, None)
duplicates_pr_title = lines[0] == pr['title'].strip()
if duplicates_pr_title:
return (title, None)
assert len(lines) > 0, "missing message content"
if len(lines) == 1:
# assume that a single line means the intention is to
# re-write the PR title
return (lines[0], None)
message = " " + "\n ".join(lines)
return (title, message)
def make_release_notes(gh, repo, ref, plaintext, html, verbose, strict, use_tags):
issue2prs = {}
pr2issues = {}
pr2info = {}
for commit in repo.iter_commits(ref, merges=True):
merge = merge_re.match(commit.summary)
if not merge:
continue
number = merge.group(2)
print ("Considering PR#" + number)
# do not pick up ceph/ceph-qa-suite.git PRs
if int(number) < 1311:
print ("Ignoring low-numbered PR, probably picked up from"
" ceph/ceph-qa-suite.git")
continue
attempts = 0
retries = 30
while attempts < retries:
try:
pr = gh.repos("ceph")("ceph").pulls(number).get()
break
except Exception:
if attempts < retries:
attempts += 1
sleep_time = 2 * attempts
print(f"Failed to fetch PR {number}, sleeping for {sleep_time} seconds")
time.sleep(sleep_time)
else:
print(f"Could not fetch PR {number} in {retries} tries.")
raise
(title, message) = _title_message(commit, pr, strict)
issues = []
if pr['body']:
issues = fixes_re.findall(pr['body']) + tracker_re.findall(
pr['body']
)
authors = {}
for c in repo.iter_commits(
"{sha1}^1..{sha1}^2".format(sha1=commit.hexsha)
):
for author in re.findall(
"Signed-off-by:\s*(.*?)\s*<", c.message
):
authors[author] = 1
issues.extend(fixes_re.findall(c.message) +
tracker_re.findall(c.message))
if authors:
author = ", ".join(authors.keys())
else:
author = commit.parents[-1].author.name
if strict and not issues:
print ("ERROR: https://github.com/ceph/ceph/pull/" +
str(number) + " has no associated issue")
continue
if strict:
title_re = (
'^(?:hammer|infernalis|jewel|kraken|luminous|mimic|nautilus|octopus|pacific):\s+(' +
'|'.join(prefixes) +
')(:.*)'
)
match = re.match(title_re, title)
if not match:
print ("ERROR: https://github.com/ceph/ceph/pull/" +
str(number) + " title " + title +
" does not match " + title_re)
else:
title = match.group(1) + match.group(2)
if use_tags:
title = split_component(title, gh, number)
title = title.strip(' \t\n\r\f\v\.\,\;\:\-\=')
# escape asterisks, which is used by reStructuredTextrst for inline
# emphasis
title = title.replace('*', '\*')
# and escape the underscores for noting a link
title = rst_link_re.sub(r'\1\_\2', title)
pr2info[number] = (author, title, message)
for issue in set(issues):
if strict:
issue = get_original_issue(issue, verbose)
issue2prs.setdefault(issue, set([])).add(number)
pr2issues.setdefault(number, set([])).add(issue)
sys.stdout.write('.')
print (" done collecting merges.")
if strict:
for (issue, prs) in issue2prs.items():
if len(prs) > 1:
print (">>>>>>> " + str(len(prs)) + " pr for issue " +
issue + " " + str(prs))
for (pr, (author, title, message)) in sorted(
pr2info.items(), key=lambda title: title[1][1]
):
if pr in pr2issues:
if plaintext:
issues = map(lambda issue: '#' + str(issue), pr2issues[pr])
elif html:
issues = map(lambda issue: (
'<a href="http://tracker.ceph.com/issues/{issue}">issue#{issue}</a>'
).format(issue=issue), pr2issues[pr]
)
else:
issues = map(lambda issue: (
'`issue#{issue} <http://tracker.ceph.com/issues/{issue}>`_'
).format(issue=issue), pr2issues[pr]
)
issues = ", ".join(issues) + ", "
else:
issues = ''
if plaintext:
print ("* {title} ({issues}{author})".format(
title=title,
issues=issues,
author=author
)
)
elif html:
print (
(
"<li><p>{title} ({issues}<a href=\""
"https://github.com/ceph/ceph/pull/{pr}\""
">pr#{pr}</a>, {author})</p></li>"
).format(
title=title,
issues=issues,
author=author, pr=pr
)
)
else:
print (
(
"* {title} ({issues}`pr#{pr} <"
"https://github.com/ceph/ceph/pull/{pr}"
">`_, {author})"
).format(
title=title,
issues=issues,
author=author, pr=pr
)
)
if message:
print (message)
if __name__ == "__main__":
desc = '''
Make ceph release notes for a given revision. Eg usage:
$ ceph-release-notes -r tags/v0.87..origin/giant \
$(git rev-parse --show-toplevel)
It is recommended to set the github env. token in order to avoid
hitting the api rate limits.
'''
parser = argparse.ArgumentParser(
description=desc,
formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument("--rev", "-r",
help="git revision range for creating release notes")
parser.add_argument("--text", "-t",
action='store_true', default=None,
help="output plain text only, no links")
parser.add_argument("--html",
action='store_true', default=None,
help="output html format for website blog")
parser.add_argument("--verbose", "-v",
action='store_true', default=None,
help="verbose")
parser.add_argument("--strict",
action='store_true', default=None,
help="strict, recommended only for backport releases")
parser.add_argument("repo", metavar="repo",
help="path to ceph git repo")
parser.add_argument(
"--token",
default=os.getenv("GITHUB_ACCESS_TOKEN"),
help="Github Access Token ($GITHUB_ACCESS_TOKEN otherwise)",
)
parser.add_argument("--use-tags", default=False,
help="Use github tags to guess the component")
args = parser.parse_args()
gh = github.GitHub(
access_token=args.token)
make_release_notes(
gh,
Repo(args.repo),
args.rev,
args.text,
args.html,
args.verbose,
args.strict,
args.use_tags
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment