jdurgin/ceph-release-notes

## ceph-release-notes
#!/usr/bin/env python
# Originally modified from A. Israel's script seen at
# https://gist.github.com/aisrael/b2b78d9dfdd176a232b9
"""To run this script first install the dependencies


  virtualenv v
  source v/bin/activate
  pip install githubpy GitPython requests

Generate a github access token; this is needed as the anonymous access
to Github's API will easily hit the limit even with a single invocation.
For details see:
https://help.github.com/articles/creating-an-access-token-for-command-line-use/

Next either set the github token as an env variable
`GITHUB_ACCESS_TOKEN` or alternatively invoke the script with
`--token` switch.

Example:

  ceph-release-notes -r tags/v0.87..origin/giant \
      $(git rev-parse --show-toplevel)

"""

from __future__ import print_function
import argparse
import github
import os
import re
import sys
import requests
import time

from git import Repo


fixes_re = re.compile(r"Fixes\:? #(\d+)")
reviewed_by_re = re.compile(r"Rev(.*)By", re.IGNORECASE)
# labels is the list of relevant labels defined for github.com/ceph/ceph
labels = {'bluestore', 'build/ops', 'cephfs', 'common', 'core', 'mgr',
          'mon', 'performance', 'pybind', 'rdma', 'rgw', 'rbd', 'tests',
          'tools'}
merge_re = re.compile("Merge (pull request|PR) #(\d+).*")
# prefixes is the list of commit description prefixes we recognize
prefixes = ['bluestore', 'build/ops', 'cephfs', 'cephx', 'cli', 'cmake',
            'common', 'core', 'crush', 'doc', 'fs', 'librados', 'librbd',
            'log', 'mds', 'mgr', 'mon', 'msg', 'objecter', 'osd', 'pybind',
            'rbd', 'rbd-mirror', 'rbd-nbd', 'rgw', 'tests', 'tools']
signed_off_re = re.compile("Signed-off-by: (.+) <")
tracker_re = re.compile("http://tracker.ceph.com/issues/(\d+)")
rst_link_re = re.compile(r"([a-zA-Z0-9])_(\W)")
tracker_uri = "http://tracker.ceph.com/issues/{0}.json"


def get_original_issue(issue, verbose):
    r = requests.get(tracker_uri.format(issue),
                     params={"include": "relations"}).json()

    # looking up for the original issue only makes sense
    # when dealing with an issue in the Backport tracker
    if r["issue"]["tracker"]["name"] != "Backport":
        if verbose:
            print ("http://tracker.ceph.com/issues/" + issue +
                   " is from the tracker " + r["issue"]["tracker"]["name"] +
                   ", do not look for the original issue")
        return issue

    # if a Backport issue does not have a relation, keep it
    if "relations" not in r["issue"]:
        if verbose:
            print ("http://tracker.ceph.com/issues/" + issue +
                   " has no relations, do not look for the original issue")
        return issue

    copied_to = [
                    str(i['issue_id']) for i in r["issue"]["relations"]
                    if i["relation_type"] == "copied_to"
                ]
    if copied_to:
        if len(copied_to) > 1:
            if verbose:
                print ("ERROR: http://tracker.ceph.com/issues/" + issue +
                       " has more than one Copied To relation")
            return issue
        if verbose:
            print ("http://tracker.ceph.com/issues/" + issue +
                   " is the backport of http://tracker.ceph.com/issues/" +
                   copied_to[0])
        return copied_to[0]
    else:
        if verbose:
            print ("http://tracker.ceph.com/issues/" + issue +
                   " has no copied_to relations; do not look for the" +
                   " original issue")
        return issue


def split_component(title, gh, number):
    title_re = '(' + '|'.join(prefixes) + ')(:.*)'
    match = re.match(title_re, title)
    if match:
        return match.group(1)+match.group(2)
    else:
        issue = gh.repos("ceph")("ceph").issues(number).get()
        issue_labels = {it['name'] for it in issue['labels']}
        if 'documentation' in issue_labels:
            return 'doc: ' + title
        item = set(prefixes).intersection(issue_labels)
        if item:
            return ",".join(sorted(item)) + ': ' + title
        else:
            return 'UNKNOWN: ' + title

def _title_message(commit, pr, strict):
    title = pr['title']
    message_lines = commit.message.split('\n')
    if strict or len(message_lines) < 1:
        return (title, None)
    lines = []
    for line in message_lines[1:]:
        if reviewed_by_re.match(line):
            continue
        line = line.strip()
        if line:
            lines.append(line)
    if len(lines) == 0:
        return (title, None)
    duplicates_pr_title = lines[0] == pr['title'].strip()
    if duplicates_pr_title:
        return (title, None)
    assert len(lines) > 0, "missing message content"
    if len(lines) == 1:
        # assume that a single line means the intention is to
        # re-write the PR title
        return (lines[0], None)
    message = "    " + "\n    ".join(lines)
    return (title, message)

def make_release_notes(gh, repo, ref, plaintext, html, verbose, strict, use_tags):

    issue2prs = {}
    pr2issues = {}
    pr2info = {}

    for commit in repo.iter_commits(ref, merges=True):
        merge = merge_re.match(commit.summary)
        if not merge:
            continue
        number = merge.group(2)
        print ("Considering PR#" + number)
        # do not pick up ceph/ceph-qa-suite.git PRs
        if int(number) < 1311:
            print ("Ignoring low-numbered PR, probably picked up from"
                   " ceph/ceph-qa-suite.git")
            continue

        attempts = 0
        retries = 30
        while attempts < retries:
            try:
                pr = gh.repos("ceph")("ceph").pulls(number).get()
                break
            except Exception:
                if attempts < retries:
                    attempts += 1
                    sleep_time = 2 * attempts
                    print(f"Failed to fetch PR {number}, sleeping for {sleep_time} seconds")
                    time.sleep(sleep_time)
                else:
                    print(f"Could not fetch PR {number} in {retries} tries.")
                    raise
        (title, message) = _title_message(commit, pr, strict)
        issues = []
        if pr['body']:
            issues = fixes_re.findall(pr['body']) + tracker_re.findall(
                pr['body']
            )

        authors = {}
        for c in repo.iter_commits(
                     "{sha1}^1..{sha1}^2".format(sha1=commit.hexsha)
                 ):
            for author in re.findall(
                              "Signed-off-by:\s*(.*?)\s*<", c.message
                          ):
                authors[author] = 1
            issues.extend(fixes_re.findall(c.message) +
                          tracker_re.findall(c.message))
        if authors:
            author = ", ".join(authors.keys())
        else:
            author = commit.parents[-1].author.name

        if strict and not issues:
            print ("ERROR: https://github.com/ceph/ceph/pull/" +
                   str(number) + " has no associated issue")
            continue

        if strict:
            title_re = (
                '^(?:hammer|infernalis|jewel|kraken|luminous|mimic|nautilus|octopus|pacific):\s+(' +
                '|'.join(prefixes) +
                ')(:.*)'
            )
            match = re.match(title_re, title)
            if not match:
                print ("ERROR: https://github.com/ceph/ceph/pull/" +
                       str(number) + " title " + title +
                       " does not match " + title_re)
            else:
                title = match.group(1) + match.group(2)
        if use_tags:
            title = split_component(title, gh, number)

        title = title.strip(' \t\n\r\f\v\.\,\;\:\-\=')
        # escape asterisks, which is used by reStructuredTextrst for inline
        # emphasis
        title = title.replace('*', '\*')
        # and escape the underscores for noting a link
        title = rst_link_re.sub(r'\1\_\2', title)
        pr2info[number] = (author, title, message)

        for issue in set(issues):
            if strict:
                issue = get_original_issue(issue, verbose)
            issue2prs.setdefault(issue, set([])).add(number)
            pr2issues.setdefault(number, set([])).add(issue)
        sys.stdout.write('.')

    print (" done collecting merges.")

    if strict:
        for (issue, prs) in issue2prs.items():
            if len(prs) > 1:
                print (">>>>>>> " + str(len(prs)) + " pr for issue " +
                       issue + " " + str(prs))

    for (pr, (author, title, message)) in sorted(
        pr2info.items(), key=lambda title: title[1][1]
    ):
        if pr in pr2issues:
            if plaintext:
                issues = map(lambda issue: '#' + str(issue), pr2issues[pr])
            elif html:
                issues = map(lambda issue: (
                    '<a href="http://tracker.ceph.com/issues/{issue}">issue#{issue}</a>'
                    ).format(issue=issue), pr2issues[pr]
                )
            else:
                issues = map(lambda issue: (
                    '`issue#{issue} <http://tracker.ceph.com/issues/{issue}>`_'
                    ).format(issue=issue), pr2issues[pr]
                )
            issues = ", ".join(issues) + ", "
        else:
            issues = ''
        if plaintext:
            print ("* {title} ({issues}{author})".format(
                    title=title,
                    issues=issues,
                    author=author
                )
            )
        elif html:
            print (
                (
                    "<li><p>{title} ({issues}<a href=\""
                    "https://github.com/ceph/ceph/pull/{pr}\""
                    ">pr#{pr}</a>, {author})</p></li>"
                ).format(
                    title=title,
                    issues=issues,
                    author=author, pr=pr
                )
            )
        else:
            print (
                (
                    "* {title} ({issues}`pr#{pr} <"
                    "https://github.com/ceph/ceph/pull/{pr}"
                    ">`_, {author})"
                ).format(
                    title=title,
                    issues=issues,
                    author=author, pr=pr
                )
            )
        if message:
            print (message)


if __name__ == "__main__":
    desc = '''
    Make ceph release notes for a given revision. Eg usage:

    $ ceph-release-notes -r tags/v0.87..origin/giant \
        $(git rev-parse --show-toplevel)

    It is recommended to set the github env. token in order to avoid
    hitting the api rate limits.
    '''

    parser = argparse.ArgumentParser(
        description=desc,
        formatter_class=argparse.RawTextHelpFormatter
    )

    parser.add_argument("--rev", "-r",
                        help="git revision range for creating release notes")
    parser.add_argument("--text", "-t",
                        action='store_true', default=None,
                        help="output plain text only, no links")
    parser.add_argument("--html",
                        action='store_true', default=None,
                        help="output html format for website blog")
    parser.add_argument("--verbose", "-v",
                        action='store_true', default=None,
                        help="verbose")
    parser.add_argument("--strict",
                        action='store_true', default=None,
                        help="strict, recommended only for backport releases")
    parser.add_argument("repo", metavar="repo",
                        help="path to ceph git repo")
    parser.add_argument(
        "--token",
        default=os.getenv("GITHUB_ACCESS_TOKEN"),
        help="Github Access Token ($GITHUB_ACCESS_TOKEN otherwise)",
    )
    parser.add_argument("--use-tags", default=False,
                        help="Use github tags to guess the component")

    args = parser.parse_args()
    gh = github.GitHub(
        access_token=args.token)

    make_release_notes(
        gh,
        Repo(args.repo),
        args.rev,
        args.text,
        args.html,
        args.verbose,
        args.strict,
        args.use_tags
    )
	#!/usr/bin/env python
	# Originally modified from A. Israel's script seen at
	# https://gist.github.com/aisrael/b2b78d9dfdd176a232b9
	"""To run this script first install the dependencies


	virtualenv v
	source v/bin/activate
	pip install githubpy GitPython requests

	Generate a github access token; this is needed as the anonymous access
	to Github's API will easily hit the limit even with a single invocation.
	For details see:
	https://help.github.com/articles/creating-an-access-token-for-command-line-use/

	Next either set the github token as an env variable
	`GITHUB_ACCESS_TOKEN` or alternatively invoke the script with
	`--token` switch.

	Example:

	ceph-release-notes -r tags/v0.87..origin/giant \
	$(git rev-parse --show-toplevel)

	"""

	from __future__ import print_function
	import argparse
	import github
	import os
	import re
	import sys
	import requests
	import time

	from git import Repo


	fixes_re = re.compile(r"Fixes\:? #(\d+)")
	reviewed_by_re = re.compile(r"Rev(.*)By", re.IGNORECASE)
	# labels is the list of relevant labels defined for github.com/ceph/ceph
	labels = {'bluestore', 'build/ops', 'cephfs', 'common', 'core', 'mgr',
	'mon', 'performance', 'pybind', 'rdma', 'rgw', 'rbd', 'tests',
	'tools'}
	merge_re = re.compile("Merge (pull request\|PR) #(\d+).*")
	# prefixes is the list of commit description prefixes we recognize
	prefixes = ['bluestore', 'build/ops', 'cephfs', 'cephx', 'cli', 'cmake',
	'common', 'core', 'crush', 'doc', 'fs', 'librados', 'librbd',
	'log', 'mds', 'mgr', 'mon', 'msg', 'objecter', 'osd', 'pybind',
	'rbd', 'rbd-mirror', 'rbd-nbd', 'rgw', 'tests', 'tools']
	signed_off_re = re.compile("Signed-off-by: (.+) <")
	tracker_re = re.compile("http://tracker.ceph.com/issues/(\d+)")
	rst_link_re = re.compile(r"([a-zA-Z0-9])_(\W)")
	tracker_uri = "http://tracker.ceph.com/issues/{0}.json"


	def get_original_issue(issue, verbose):
	r = requests.get(tracker_uri.format(issue),
	params={"include": "relations"}).json()

	# looking up for the original issue only makes sense
	# when dealing with an issue in the Backport tracker
	if r["issue"]["tracker"]["name"] != "Backport":
	if verbose:
	print ("http://tracker.ceph.com/issues/" + issue +
	" is from the tracker " + r["issue"]["tracker"]["name"] +
	", do not look for the original issue")
	return issue

	# if a Backport issue does not have a relation, keep it
	if "relations" not in r["issue"]:
	if verbose:
	print ("http://tracker.ceph.com/issues/" + issue +
	" has no relations, do not look for the original issue")
	return issue

	copied_to = [
	str(i['issue_id']) for i in r["issue"]["relations"]
	if i["relation_type"] == "copied_to"
	]
	if copied_to:
	if len(copied_to) > 1:
	if verbose:
	print ("ERROR: http://tracker.ceph.com/issues/" + issue +
	" has more than one Copied To relation")
	return issue
	if verbose:
	print ("http://tracker.ceph.com/issues/" + issue +
	" is the backport of http://tracker.ceph.com/issues/" +
	copied_to[0])
	return copied_to[0]
	else:
	if verbose:
	print ("http://tracker.ceph.com/issues/" + issue +
	" has no copied_to relations; do not look for the" +
	" original issue")
	return issue


	def split_component(title, gh, number):
	title_re = '(' + '\|'.join(prefixes) + ')(:.*)'
	match = re.match(title_re, title)
	if match:
	return match.group(1)+match.group(2)
	else:
	issue = gh.repos("ceph")("ceph").issues(number).get()
	issue_labels = {it['name'] for it in issue['labels']}
	if 'documentation' in issue_labels:
	return 'doc: ' + title
	item = set(prefixes).intersection(issue_labels)
	if item:
	return ",".join(sorted(item)) + ': ' + title
	else:
	return 'UNKNOWN: ' + title

	def _title_message(commit, pr, strict):
	title = pr['title']
	message_lines = commit.message.split('\n')
	if strict or len(message_lines) < 1:
	return (title, None)
	lines = []
	for line in message_lines[1:]:
	if reviewed_by_re.match(line):
	continue
	line = line.strip()
	if line:
	lines.append(line)
	if len(lines) == 0:
	return (title, None)
	duplicates_pr_title = lines[0] == pr['title'].strip()
	if duplicates_pr_title:
	return (title, None)
	assert len(lines) > 0, "missing message content"
	if len(lines) == 1:
	# assume that a single line means the intention is to
	# re-write the PR title
	return (lines[0], None)
	message = " " + "\n ".join(lines)
	return (title, message)

	def make_release_notes(gh, repo, ref, plaintext, html, verbose, strict, use_tags):

	issue2prs = {}
	pr2issues = {}
	pr2info = {}

	for commit in repo.iter_commits(ref, merges=True):
	merge = merge_re.match(commit.summary)
	if not merge:
	continue
	number = merge.group(2)
	print ("Considering PR#" + number)
	# do not pick up ceph/ceph-qa-suite.git PRs
	if int(number) < 1311:
	print ("Ignoring low-numbered PR, probably picked up from"
	" ceph/ceph-qa-suite.git")
	continue

	attempts = 0
	retries = 30
	while attempts < retries:
	try:
	pr = gh.repos("ceph")("ceph").pulls(number).get()
	break
	except Exception:
	if attempts < retries:
	attempts += 1
	sleep_time = 2 * attempts
	print(f"Failed to fetch PR {number}, sleeping for {sleep_time} seconds")
	time.sleep(sleep_time)
	else:
	print(f"Could not fetch PR {number} in {retries} tries.")
	raise
	(title, message) = _title_message(commit, pr, strict)
	issues = []
	if pr['body']:
	issues = fixes_re.findall(pr['body']) + tracker_re.findall(
	pr['body']
	)

	authors = {}
	for c in repo.iter_commits(
	"{sha1}^1..{sha1}^2".format(sha1=commit.hexsha)
	):
	for author in re.findall(
	"Signed-off-by:\s(.?)\s*<", c.message
	):
	authors[author] = 1
	issues.extend(fixes_re.findall(c.message) +
	tracker_re.findall(c.message))
	if authors:
	author = ", ".join(authors.keys())
	else:
	author = commit.parents[-1].author.name

	if strict and not issues:
	print ("ERROR: https://github.com/ceph/ceph/pull/" +
	str(number) + " has no associated issue")
	continue

	if strict:
	title_re = (
	'^(?:hammer\|infernalis\|jewel\|kraken\|luminous\|mimic\|nautilus\|octopus\|pacific):\s+(' +
	'\|'.join(prefixes) +
	')(:.*)'
	)
	match = re.match(title_re, title)
	if not match:
	print ("ERROR: https://github.com/ceph/ceph/pull/" +
	str(number) + " title " + title +
	" does not match " + title_re)
	else:
	title = match.group(1) + match.group(2)
	if use_tags:
	title = split_component(title, gh, number)

	title = title.strip(' \t\n\r\f\v\.\,\;\:\-\=')
	# escape asterisks, which is used by reStructuredTextrst for inline
	# emphasis
	title = title.replace('', '\')
	# and escape the underscores for noting a link
	title = rst_link_re.sub(r'\1\_\2', title)
	pr2info[number] = (author, title, message)

	for issue in set(issues):
	if strict:
	issue = get_original_issue(issue, verbose)
	issue2prs.setdefault(issue, set([])).add(number)
	pr2issues.setdefault(number, set([])).add(issue)
	sys.stdout.write('.')

	print (" done collecting merges.")

	if strict:
	for (issue, prs) in issue2prs.items():
	if len(prs) > 1:
	print (">>>>>>> " + str(len(prs)) + " pr for issue " +
	issue + " " + str(prs))

	for (pr, (author, title, message)) in sorted(
	pr2info.items(), key=lambda title: title[1][1]
	):
	if pr in pr2issues:
	if plaintext:
	issues = map(lambda issue: '#' + str(issue), pr2issues[pr])
	elif html:
	issues = map(lambda issue: (
	'<a href="http://tracker.ceph.com/issues/{issue}">issue#{issue}</a>'
	).format(issue=issue), pr2issues[pr]
	)
	else:
	issues = map(lambda issue: (
	'`issue#{issue} <http://tracker.ceph.com/issues/{issue}>`_'
	).format(issue=issue), pr2issues[pr]
	)
	issues = ", ".join(issues) + ", "
	else:
	issues = ''
	if plaintext:
	print ("* {title} ({issues}{author})".format(
	title=title,
	issues=issues,
	author=author
	)
	)
	elif html:
	print (
	(
	"<li><p>{title} ({issues}<a href=\""
	"https://github.com/ceph/ceph/pull/{pr}\""
	">pr#{pr}</a>, {author})</p></li>"
	).format(
	title=title,
	issues=issues,
	author=author, pr=pr
	)
	)
	else:
	print (
	(
	"* {title} ({issues}`pr#{pr} <"
	"https://github.com/ceph/ceph/pull/{pr}"
	">`_, {author})"
	).format(
	title=title,
	issues=issues,
	author=author, pr=pr
	)
	)
	if message:
	print (message)


	if __name__ == "__main__":
	desc = '''
	Make ceph release notes for a given revision. Eg usage:

	$ ceph-release-notes -r tags/v0.87..origin/giant \
	$(git rev-parse --show-toplevel)

	It is recommended to set the github env. token in order to avoid
	hitting the api rate limits.
	'''

	parser = argparse.ArgumentParser(
	description=desc,
	formatter_class=argparse.RawTextHelpFormatter
	)

	parser.add_argument("--rev", "-r",
	help="git revision range for creating release notes")
	parser.add_argument("--text", "-t",
	action='store_true', default=None,
	help="output plain text only, no links")
	parser.add_argument("--html",
	action='store_true', default=None,
	help="output html format for website blog")
	parser.add_argument("--verbose", "-v",
	action='store_true', default=None,
	help="verbose")
	parser.add_argument("--strict",
	action='store_true', default=None,
	help="strict, recommended only for backport releases")
	parser.add_argument("repo", metavar="repo",
	help="path to ceph git repo")
	parser.add_argument(
	"--token",
	default=os.getenv("GITHUB_ACCESS_TOKEN"),
	help="Github Access Token ($GITHUB_ACCESS_TOKEN otherwise)",
	)
	parser.add_argument("--use-tags", default=False,
	help="Use github tags to guess the component")

	args = parser.parse_args()
	gh = github.GitHub(
	access_token=args.token)

	make_release_notes(
	gh,
	Repo(args.repo),
	args.rev,
	args.text,
	args.html,
	args.verbose,
	args.strict,
	args.use_tags
	)