Skip to content

Instantly share code, notes, and snippets.

@oliverholworthy
Created March 10, 2023 12:48
Show Gist options
  • Save oliverholworthy/6d4ce5eebd077b85f234919fc5fcfc75 to your computer and use it in GitHub Desktop.
Save oliverholworthy/6d4ce5eebd077b85f234919fc5fcfc75 to your computer and use it in GitHub Desktop.
Check Release Descriptions - Linked Pull Requests
#!/usr/bin/env python3
#
# Copyright (c) 2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Check Release Description contains correct set of Pull Requests
This script is intended to be run from the directory of the repository to be checked.
Compares:
- pull requests mentioned in release descriptions
- pull requests mentioned in git log (between two tags)
If there are any differences:
Prints out the Pull Requests that are either:
- missing from the release description
- present in the release description, but shouldn't be
(because it wasn't present in the git history between the tags)
Assumptions:
If not true, can result in invalid results reported by this script:
- All tags have a corresponding release and published in logical tag order
- Pull requests are linked using `(#123)` or with a url https://github.com/{org}/{repo}/pull/123
- development branch is called `main` (can be changed in the script options)
"""
import requests
import re
import os
import click
import subprocess
from colorama import Fore, Style
from typing import List, Set
GITHUB_API_TOKEN = os.environ["GITHUB_API_TOKEN"]
def list_releases(owner: str, repo: str) -> List[dict]:
"""List releases for repository"""
url = f"https://api.github.com/repos/{owner}/{repo}/releases"
headers = {
"Accept": "application/vnd.github+json",
"Authorization": f"Bearer {GITHUB_API_TOKEN}",
"X-GitHub-Api-Version": "2022-11-28",
}
response = requests.get(url, headers=headers)
return response.json()
def pull_requests_from_release(release: dict) -> Set[str]:
"""Get set of pull requets from release body."""
description = release["body"]
pull_requests = re.findall(r"\(\#(\d+)\)", description)
if not pull_requests:
pull_requests = re.findall(r"by @.* in .*pull/(\d+)", description)
return set(pull_requests)
def should_skip_changelog(owner: str, repo: str, issue_number: str) -> bool:
"""Check if the Pull Request has a skip-changelog label"""
url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}/labels"
headers = {
"Accept": "application/vnd.github+json",
"Authorization": f"Bearer {GITHUB_API_TOKEN}",
"X-GitHub-Api-Version": "2022-11-28",
}
response = requests.get(url, headers=headers)
label_names = {label["name"] for label in response.json()}
return "skip-changelog" in label_names
def get_branch_pull_requests(branch_name: str, development_branch_name: str):
"""Extract pull requests contained only on the specified branch and not in the main branch"""
output = subprocess.check_output([
"git", "log",
"--format=format:'%H %s'",
"--not", development_branch_name,
f"^{branch_name}"
])
pull_requests = re.findall(r"\(\#(\d+)\)'", output.decode("utf-8"))
return pull_requests
def get_pull_requests_between(from_ref, to_ref):
"""List pull requests between two tags based on PR in commit subject"""
output = subprocess.check_output(
[
"git",
"log",
"--format=format:'%H %s'",
f"{from_ref}..{to_ref}",
]
)
pull_requests = re.findall(r"\(\#(\d+)\)'", output.decode("utf-8"))
return set(pull_requests)
def pull_requests_from_git(from_ref, to_ref, development_branch_name):
"""Extract pull requests between two tags from git log"""
tag_diff_pull_requests = get_pull_requests_between(from_ref, to_ref)
prev_tag_only_pull_requests = get_branch_pull_requests(from_ref, development_branch_name)
return tag_diff_pull_requests.difference(prev_tag_only_pull_requests)
@click.command()
@click.option("--repo", help="Repository name", required=True)
@click.option("--org", help="Organization name", default="NVIDIA-Merlin")
@click.option("--development-branch-name", help="Development Branch Name", default="main")
def compare_releases(repo, org, development_branch_name):
releases = list_releases(org, repo)
releases = sorted(releases, key=lambda release: release["published_at"])
tags = [release["tag_name"] for release in releases]
release_by_tag = {release["tag_name"]: release for release in releases}
for prev_tag, tag in zip(tags, tags[1:]):
print("")
print(tag)
release_body_prs = pull_requests_from_release(release_by_tag[tag])
git_prs = pull_requests_from_git(prev_tag, tag, development_branch_name)
only_in_release_body = release_body_prs.difference(git_prs)
only_in_git_body = git_prs.difference(release_body_prs)
only_in_git_body = {
pr_number
for pr_number in only_in_git_body
if not should_skip_changelog(org, repo, pr_number)
}
if only_in_git_body or only_in_release_body:
print("release body:")
print(sorted(release_body_prs))
print("git:")
print(sorted(git_prs))
else:
print(f"{Fore.GREEN}{release_body_prs}{Style.RESET_ALL}")
if only_in_release_body:
print("only in release body:")
print(f"{Fore.RED}{only_in_release_body}{Style.RESET_ALL}")
if only_in_git_body:
print("only in git log:")
print(f"{Fore.RED}{only_in_git_body}{Style.RESET_ALL}")
if __name__ == "__main__":
compare_releases()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment