Skip to content

Instantly share code, notes, and snippets.

@tomncooper
Created December 17, 2024 16:11
Script for analysing the age of PRs on a given GitHub repo
from argparse import ArgumentParser, Namespace
from github import Github, PullRequest
from pandas import DataFrame
def process_pr(pull_request: PullRequest) -> dict:
pr_row = {
"title": pull_request.title,
"number": pull_request.number,
"created": pull_request.created_at,
"updated": pull_request.updated_at,
}
comments = pull_request.get_comments(sort="update", direction="desc")
pr_row["comments"] = comments.totalCount
if comments.totalCount:
pr_row["latest_comment"] = comments[0].updated_at
return pr_row
def read_pull_requests(gh: Github, repository: str) -> DataFrame:
repo = gh.get_repo(repository)
pull_requests = repo.get_pulls()
print(f"Found {pull_requests.totalCount} PRs in repository {repository}")
if pull_requests.totalCount:
output: list[dict] = []
for i, pull_request in enumerate(pull_requests):
percent_complete = ((i+1) / pull_requests.totalCount) * 100
print(f"({percent_complete:.2f}%) Processing PR {pull_request.number}")
output.append(process_pr(pull_request))
return DataFrame(output)
def create_parser() -> ArgumentParser:
parser = ArgumentParser("Script for analysing PRs in a GitHub Repository")
parser.add_argument("repo", help="The path ({owner}/{repo}) to the repository to analyse.")
parser.add_argument("-t", "--token", required=False, help="The GitHub access token to use")
return parser
if __name__ == "__main__":
ARGS: Namespace = create_parser().parse_args()
if ARGS.token is not None:
GH = Github(ARGS.token)
else:
print("No access token provided, this will use the public API rate limit (60 rph)")
GH = Github()
FLINK_PRS: DataFrame = read_pull_requests(GH, ARGS.repo)
FLINK_PRS.to_csv(f"raw_pull_request_data_{ARGS.repo.replace("/", "-")}.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment