Created
July 9, 2024 09:49
-
-
Save codeinthehole/a356d4fbf5b729c23b280dea6193340c to your computer and use it in GitHub Desktop.
A Python script to categorise a list of filepaths by code owners
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Print a breakdown of the passed filepaths by CODEOWNER team. | |
This script requires the `codeowners` CLI tool to be installed and available on the $PATH. | |
https://github.com/hmarr/codeowners | |
Intended usage is to pipe filepaths into this script from the root of a repo: | |
cat filepaths.txt | codeowner-breakdown | |
On an M3 MBP, breaking down 5000 lines of input takes ~1 minute. | |
""" | |
import sys | |
import itertools | |
import collections | |
import subprocess | |
def _group_by_codeowner(filepaths: list[str]) -> str: | |
""" | |
Return a codeowner summary of the passed filepaths. | |
""" | |
# Group the filepaths to minimise the number of codeowner lookups. | |
grouped = {filepath: len(list(group)) for filepath, group in itertools.groupby(sorted(filepaths))} | |
codeowner_map = collections.defaultdict(int) | |
for filepath, count in grouped.items(): | |
for codeowner in _determine_codeowners(filepath): | |
codeowner_map[codeowner] += count | |
# Sort results with most frequent team first. | |
ranked = sorted(codeowner_map.items(), key=lambda x: x[1], reverse=True) | |
# Create a string summary | |
output_lines = [f"{codeowner}: {count}" for codeowner, count in ranked] | |
return "\n".join(output_lines) | |
def _determine_codeowners(filepath: str) -> str: | |
""" | |
Return the codeowner for the passed filepath. | |
""" | |
command = ["codeowners", filepath] | |
output = subprocess.check_output(command, shell=False).decode('utf-8') | |
# The output has form: | |
# <filepath> @team1 @team2 ... | |
# We return all the teams. | |
return output.split()[1:] | |
if __name__ == "__main__": | |
# Convert STDIN into a list of filepaths. | |
filepaths = [line.strip() for line in sys.stdin.readlines()] | |
print(_group_by_codeowner(filepaths)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment