Skip to content

Instantly share code, notes, and snippets.

@kalj
Last active December 15, 2023 10:24
Show Gist options
  • Save kalj/80b94b0a529d3426db7fd47b0f076c7b to your computer and use it in GitHub Desktop.
Save kalj/80b94b0a529d3426db7fd47b0f076c7b to your computer and use it in GitHub Desktop.
Clang format a repository
#!/usr/bin/env python3
# if -i, apply formatting, don't just report diff
# if -r <refspec>, only format diff relative to revision <refspec>
#
# This looks in the repo root for `.clang-format-ignore` containing path patterns to ignore, e.g.,
#
# /some/subdir/.*
#
#
# As always, clang-format settings are taken from `.clang-format` in the repo root.
#
import argparse
from pathlib import Path
import difflib
import re
import subprocess
import sys
import multiprocessing
from multiprocessing.pool import ThreadPool
BINARY_NAME = "clang-format-11"
SUFFIXES = [".h", ".c", ".cpp", ".cc", ".cu", ".cuh", ".t", ".hpp", ".cut"]
REPO_ROOT = Path(subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode().strip())
def gather_files(revision):
ignores_file = REPO_ROOT / ".clang-format-ignore"
ignore_patterns = []
if ignores_file.exists():
ignore_patterns = [p.strip() for p in open(ignores_file).readlines()]
def excluded(relpath):
if relpath.suffix not in SUFFIXES:
return True
for i in ignore_patterns:
i_pattern = re.compile(i[1:]) # ignore leading /
if i_pattern.match(relpath.as_posix()):
return True
return False
if revision:
# Extract changed lines for each file.
ret = subprocess.run(["git", "diff", args.revision], capture_output=True, cwd=REPO_ROOT)
diff = ret.stdout
lines_per_file = {}
filename = None
for line in diff.splitlines():
line = line.decode()
match = re.search(r"^\+\+\+\ [^/]/(\S*)", line)
if match:
filename = match.group(1)
if filename is None:
continue
match = re.search(r"^@@.*\+(\d+)(,(\d+))?", line)
if match:
start_line = int(match.group(1))
line_count = 1
if match.group(3):
line_count = int(match.group(3))
if line_count == 0:
continue
end_line = start_line + line_count - 1
if not excluded(Path(filename)):
lines_per_file.setdefault(filename, []).append(f"{start_line}:{end_line}")
return [(fn, lines_per_file[fn]) for fn in lines_per_file]
else:
ret = subprocess.run(["git", "ls-files"], capture_output=True, cwd=REPO_ROOT)
files = ret.stdout.decode().splitlines()
return [(fn, []) for fn in files if Path(fn).is_file() and not excluded(Path(fn))]
def do_formatting(files_to_format, in_place, verbose):
command_prefix = [BINARY_NAME]
if in_place:
command_prefix.append("-i")
# Reformat files containing changes in place.
num_files = len(files_to_format)
q = multiprocessing.Queue()
def format_file(ctx):
filename, lines = ctx
command = command_prefix.copy()
if lines:
for ln in lines:
command.extend(["--lines", ln])
command.append(filename)
ret = subprocess.run(command, capture_output=True, cwd=REPO_ROOT)
q.put((ctx, ret))
needed_formatting = []
with ThreadPool() as pool:
res = pool.map_async(format_file, files_to_format)
i = 1
while not res.ready() or not q.empty():
res.wait(0.1)
while not q.empty():
ctx, ret = q.get()
if ret.returncode != 0:
print(ret.stderr.decode())
sys.exit(ret.returncode)
# successful
filename, lines = ctx
msg = f"{i}/{num_files}: "
if in_place:
msg += "Formatting"
else:
msg += "Checking"
msg += f" {filename}"
if lines:
msg += ":" + ",".join(lines)
i += 1
print(msg + " " * 30, end="\r")
if not in_place:
newcontents = ret.stdout.decode().splitlines(keepends=True)
# filename is relative to repo root
oldcontents = (REPO_ROOT / filename).read_text().splitlines(keepends=True)
diff = difflib.unified_diff(
oldcontents,
newcontents,
filename,
filename,
"(before formatting)",
"(after formatting)",
)
diff_string = "".join(diff)
if len(diff_string) > 0:
needed_formatting.append(filename)
if verbose:
print("") # skip a line to remove garbled remains of loop output
print(diff_string)
# Check for success by getting. rethrows task exceptions if they happened.
res.get()
return needed_formatting
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="")
parser.add_argument(
"-i",
"--in-place",
action="store_true",
help="Apply formatting in-place instead of reporting the diff",
)
parser.add_argument("-r", "--revision", help="Format only the diff relative to <REVISION>")
parser.add_argument(
"-c",
"--until-convergence",
action="store_true",
default=True,
help="Format diff iteratively convergence (only in effect together with -i and -r)",
)
parser.add_argument("-v", "--verbose", action="store_true", help="be verbose")
args = parser.parse_args()
if args.until_convergence and args.revision is not None and args.in_place:
if args.verbose:
print("Doing in-place formatting in diff mode until convergence")
while True:
files_to_format = gather_files(args.revision)
needed_formatting = do_formatting(files_to_format, False, False)
if not needed_formatting:
break
do_formatting(files_to_format, True, args.verbose)
print("") # skip a line to remove garbled remains of formatting loop output
if args.verbose:
print("Formatting converged")
else:
files_to_format = gather_files(args.revision)
needed_formatting = do_formatting(files_to_format, args.in_place, args.verbose)
print("") # skip a line to remove garbled remains of formatting loop output
if not args.in_place and len(needed_formatting):
print("The following files need to be formatted:")
for fn in needed_formatting:
print(" ", fn)
sys.exit(1)
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment