Skip to content

Instantly share code, notes, and snippets.

@Remi-Gau
Last active March 2, 2023 13:20
Show Gist options
  • Save Remi-Gau/6194563cabcfb1b89ce885a4f762ecbc to your computer and use it in GitHub Desktop.
Save Remi-Gau/6194563cabcfb1b89ce885a4f762ecbc to your computer and use it in GitHub Desktop.
Script to get all files touched by PRs
"""Script to get all files touched by PRs."""
import shutil
from pathlib import Path
from warnings import warn
import requests
from rich import print
USERNAME = "Remi-Gau"
# may require a token if run often
TOKEN_FILE = Path(__file__).parent.joinpath("token.txt")
# repo to check
GH_USERNAME = "nilearn"
GH_REPO = "nilearn"
DEBUG = False
USE_LOCAL = True
OUTPUT_FOLDER = Path(__file__).parent / "tmp"
OUTPUT_FILE = OUTPUT_FOLDER / "output.md"
EXLUDE_PR = {"title": ["format"]}
def print_to_output(output_file, all_files):
unique_files = set(all_files)
unique_files = sorted(unique_files)
print_line_to_output(
output_file=output_file, text="| file | nb of PR found in |"
)
print_line_to_output(
output_file=output_file, text="| ---- | ----------------- |"
)
for file in unique_files:
print_line_to_output(
output_file=output_file,
text=f"| {file} | {all_files.count(file)} |",
)
def print_line_to_output(output_file, text):
"""Print to file or stdout."""
if output_file is not None:
with open(output_file, "a") as f:
print(f"{text}", file=f)
return
else:
print(text)
def save_diffs(pulls, auth, output_folder):
for i, pull_ in enumerate(pulls):
if DEBUG and i == 2:
break
print(f"\n{pull_['number']}, {pull_['title']}")
diff = get_this_pr_diff(url=pull_["diff_url"], auth=auth)
if diff is None:
continue
save_diff_to_file(
number=pull_["number"],
name=pull_["title"],
diff=diff,
output_folder=output_folder,
)
def save_diff_to_file(number, name, diff, output_folder):
filename = (
output_folder
/ f"{number}_{name.replace(' ', '_').replace('/', '_')}.diff"
)
with open(filename, "w") as f:
f.write(diff)
def get_list_of_prs(gh_username, gh_repo, auth=None):
url = f"https://api.github.com/repos/{gh_username}/{gh_repo}/pulls?per_page=100"
response = requests.get(url, auth=auth)
if response.status_code != 200:
warn(f"Error {response.status_code}: {response.text}")
return None
return response.json()
def get_this_pr_diff(url, auth=None):
response = requests.get(url, auth=auth)
if response.status_code != 200:
warn(f"Error {response.status_code}: {response.text}")
return
return response.text
def list_all_files_in_prs(input_folder, exclude_pr):
all_files = []
pulls = input_folder.glob("*.diff")
for pull_ in pulls:
pr_number = pull_.stem.split("_")[0]
pr_title = pull_.stem.split("_")[1:]
if any(ex in pr_title for ex in exclude_pr["title"]):
print(f"[red]skipping {pr_number}, {' '.join(pr_title)}[/red]")
continue
print(f"{pr_number}, {' '.join(pr_title)}")
diff = Path(pull_).read_text()
for line in diff.splitlines():
if line.startswith("diff --git "):
this_file = line.split(" ")[2][2:]
all_files.append(this_file)
return all_files
def main():
if not USE_LOCAL:
shutil.rmtree(OUTPUT_FOLDER, ignore_errors=True)
OUTPUT_FILE.unlink(missing_ok=True)
OUTPUT_FOLDER.mkdir(exist_ok=True)
TOKEN = None
if TOKEN_FILE.exists():
with open(Path(__file__).parent.joinpath("token.txt")) as f:
TOKEN = f.read().strip()
auth = None if USERNAME is None or TOKEN is None else (USERNAME, TOKEN)
if not USE_LOCAL:
pulls = get_list_of_prs(
gh_username=GH_USERNAME, gh_repo=GH_REPO, auth=auth
)
save_diffs(pulls=pulls, auth=auth, output_folder=OUTPUT_FOLDER)
all_files = list_all_files_in_prs(
input_folder=OUTPUT_FOLDER, exclude_pr=EXLUDE_PR
)
print_to_output(output_file=OUTPUT_FILE, all_files=all_files)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment