Skip to content

Instantly share code, notes, and snippets.

@mossheim
Last active March 30, 2019 19:47
Show Gist options
  • Save mossheim/cba31152d8757e957cbf013d36bee2a8 to your computer and use it in GitHub Desktop.
Save mossheim/cba31152d8757e957cbf013d36bee2a8 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from __future__ import print_function, absolute_import
import difflib
import glob
import os
import re
import string
import subprocess
import sys
import threading
from argparse import ArgumentParser
##############################################################################
#
# Constants
#
CLANG_FORMAT_ACCEPTABLE_VERSIONS = ["7.0.0", "7.0.1", "8.0.0"]
FILES_REGEX = re.compile('\\.(cpp|hpp|h|c|m|mm)$') # all the extensions we format in SC (no JS!)
##############################################################################
def callo(args):
"""Call a program, and capture its output
"""
return subprocess.check_output(args)
class ClangFormat(object):
"""Class encapsulates finding a suitable copy of clang-format,
and linting/formating an individual file
"""
def __init__(self, cf_cmd):
self.cf_cmd = cf_cmd
self._validate_version()
def _validate_version(self):
cf_version = callo([self.cf_cmd, "--version"])
for version in CLANG_FORMAT_ACCEPTABLE_VERSIONS:
if version in cf_version:
print("clang-format " + version + " found: " + cf_version)
return
raise ValueError("clang-format found, but incorrect version at " +
self.cf_cmd + " with version: " + cf_version + "\nAcceptable versions: " +
str(CLANG_FORMAT_ACCEPTABLE_VERSIONS))
sys.exit(1)
def _lint(self, file_name, print_diff):
"""Check the specified file has the correct format
"""
with open(file_name, 'rb') as original_text:
original_file = original_text.read()
# Get formatted file as clang-format would format the file
formatted_file = callo([self.cf_cmd, "--style=file", file_name])
if original_file != formatted_file:
if print_diff:
original_lines = original_file.splitlines()
formatted_lines = formatted_file.splitlines()
result = difflib.unified_diff(original_lines, formatted_lines)
print("ERROR: Found diff for " + file_name)
print("To fix formatting errors, run %s --style=file -i %s" % (self.cf_cmd, file_name))
for line in result:
print(line.rstrip())
return False
return True
def format(self, file_name):
"""Update the format of the specified file
"""
if self._lint(file_name, print_diff=False):
return True
# Update the file with clang-format
formatted = not subprocess.call([self.cf_cmd, "--style=file", "-i", file_name])
# Version 3.8 generates files like foo.cpp~RF83372177.TMP when it formats foo.cpp
# on Windows, we must clean these up
if sys.platform == "win32":
glob_pattern = file_name + "*.TMP"
for fglob in glob.glob(glob_pattern):
os.unlink(fglob)
return formatted
def get_base_dir():
"""Get the base directory for mongo repo.
This script assumes that it is running in buildscripts/, and uses
that to find the base directory.
"""
try:
return subprocess.check_output(['git', 'rev-parse', '--show-toplevel']).rstrip()
except:
print("This script must be running in a git repo")
sys.exit(2)
class Repo(object):
"""Class encapsulates all knowledge about a git repository, and its metadata
to run clang-format.
"""
def __init__(self, path):
self.path = path
self.root = self._get_root()
def _callgito(self, args):
"""Call git for this repository, and return the captured output
"""
# These two flags are the equivalent of -C in newer versions of Git
# but we use these to support versions pre 1.8.5 but it depends on the command
# and what the current directory is
return callo(['git', '--git-dir', os.path.join(self.path, ".git"),
'--work-tree', self.path] + args)
def _callgit(self, args, stdout=None):
"""Call git for this repository without capturing output
This is designed to be used when git returns non-zero exit codes.
"""
# These two flags are the equivalent of -C in newer versions of Git
# but we use these to support versions pre 1.8.5 but it depends on the command
# and what the current directory is
return subprocess.call(['git', '--git-dir', os.path.join(self.path, ".git"),
'--work-tree', self.path] + args, stdout=stdout)
def _get_local_dir(self, path):
"""Get a directory path relative to the git root directory
"""
if os.path.isabs(path):
return os.path.relpath(path, self.root)
return path
def get_candidates(self, candidates):
"""Get the set of candidate files to check by querying the repository
Returns the full path to the file for clang-format to consume.
"""
if candidates is not None and len(candidates) > 0:
candidates = [self._get_local_dir(f) for f in candidates]
valid_files = list(set(candidates).intersection(self.get_candidate_files()))
else:
valid_files = list(self.get_candidate_files())
# Get the full file name here
valid_files = [os.path.normpath(os.path.join(self.root, f)) for f in valid_files]
return valid_files
def get_root(self):
return self.root
def _get_root(self):
gito = self._callgito(['rev-parse', '--show-toplevel'])
return gito.rstrip()
def _git_ls_files(self, cmd):
"""Run git-ls-files and filter the list of files to a valid candidate list
"""
gito = self._callgito(cmd)
# This allows us to pick all the interesting files
# in the mongo and mongo-enterprise repos
file_list = [line.rstrip()
for line in gito.splitlines()
if (line.startswith("jstests") or line.startswith("src"))
and not line.startswith("src/third_party")]
return [a for a in file_list if FILES_REGEX.search(a)]
def get_candidate_files(self):
"""Query git to get a list of all files in the repo to consider for analysis
"""
return self._git_ls_files(["ls-files", "--cached"])
def get_working_tree_candidate_files(self):
"""Query git to get a list of all files in the working tree to consider for analysis
"""
return self._git_ls_files(["ls-files", "--cached", "--others"])
def get_working_tree_candidates(self):
"""Get the set of candidate files to check by querying the repository
Returns the full path to the file for clang-format to consume.
"""
valid_files = list(self.get_working_tree_candidate_files())
# Get the full file name here
return [os.path.normpath(os.path.join(self.root, f)) for f in valid_files]
def is_detached(self):
# symbolic-ref returns 1 if the repo is in a detached HEAD state
with open(os.devnull, 'w') as DEVNULL:
return self._callgit(["symbolic-ref", "--quiet", "HEAD"], stdout=DEVNULL)
def is_ancestor(self, parent, child):
# merge base returns 0 if parent is an ancestor of child
return not self._callgit(["merge-base", "--is-ancestor", parent, child])
def is_commit(self, sha1):
# cat-file -e returns 0 if it is a valid hash
return not self._callgit(["cat-file", "-e", "%s^{commit}" % sha1])
def is_working_tree_dirty(self):
# diff returns 1 if the working tree has local changes
return self._callgit(["diff", "--quiet"])
def does_branch_exist(self, branch):
# rev-parse returns 0 if the branch exists
return not self._callgit(["rev-parse", "--verify", "--quiet", branch])
def get_merge_base(self, commit):
return self._callgito(["merge-base", "HEAD", commit]).rstrip()
def get_branch_name(self):
"""Get the current branch name, short form
This returns "master", not "refs/head/master"
Will not work if the current branch is detached
"""
branch = self.rev_parse(["--abbrev-ref", "HEAD"])
if branch == "HEAD":
raise ValueError("Branch is currently detached")
return branch
def add(self, command): return self._callgito(["add"] + command)
def checkout(self, command): return self._callgito(["checkout"] + command)
def commit(self, command): return self._callgito(["commit"] + command)
def diff(self, command): return self._callgito(["diff"] + command)
def log(self, command): return self._callgito(["log"] + command)
def rev_parse(self, command): return self._callgito(["rev-parse"] + command).rstrip()
def rm(self, command): return self._callgito(["rm"] + command)
def show(self, command): return self._callgito(["show"] + command)
def get_list_from_lines(lines):
""""Convert a string containing a series of lines into a list of strings
"""
return [line.rstrip() for line in lines.splitlines()]
def validate_repo_state(commit_before_reformat, commit_after_reformat, target_branch):
repo = Repo(get_base_dir())
if not repo.is_commit(commit_before_reformat):
raise ValueError("Commit before reformat '%s' is not a valid commit in this repo" %
commit_before_reformat)
if not repo.is_commit(commit_after_reformat):
raise ValueError("Commit after reformat '%s' is not a valid commit in this repo" %
commit_after_reformat)
if not repo.is_ancestor(commit_before_reformat, commit_after_reformat):
raise ValueError(("Commit before reformat '%s' is not a valid ancestor of commit after" +
" reformat '%s' in this repo") % (commit_before_reformat, commit_after_reformat))
if repo.is_detached():
raise ValueError("You must not run this script in a detached HEAD state")
if repo.is_working_tree_dirty():
raise ValueError("Your working tree has pending changes. You must have a clean working" +
" tree before proceeding.\n\nRun `git status` to see your pending changes, and then" +
" try `git stash save`, `git reset --hard`, `git submodule update` and/or committing" +
" your changes.")
merge_base = repo.get_merge_base(commit_before_reformat)
if not merge_base == repo.rev_parse([commit_before_reformat]):
raise ValueError(("Merge base is '%s'. Please rebase to '%s' and resolve all conflicts" +
" before running this script.\n\nTo interactively rebase, use `git rebase -i %s`") %
(merge_base, commit_before_reformat, commit_before_reformat))
# We assume the target branch is master, it could be a different branch if needed for testing
merge_base = repo.get_merge_base(target_branch)
if not merge_base == repo.rev_parse([commit_before_reformat]):
raise ValueError("This branch appears to already have advanced too far through the merge process")
return repo
def get_branch_names(repo):
# Everything looks good so lets start going through all the commits
branch_name = repo.get_branch_name()
new_branch = branch_name + "-reformatted"
if repo.does_branch_exist(new_branch):
raise ValueError("The branch '%s' already exists. Please delete the branch '%s', or rename the current branch." % (new_branch, new_branch))
return (branch_name, new_branch)
def reformat_branch(clang_format, commit_before_reformat, commit_after_reformat, target_branch):
"""Reformat a branch made before a clang-format run
"""
clang_format = ClangFormat(clang_format)
if os.getcwd() != get_base_dir():
raise ValueError("reformat-branch must be run from the repo root")
repo = validate_repo_state(commit_before_reformat, commit_after_reformat, target_branch)
old_branch, new_branch = get_branch_names(repo)
commits = get_list_from_lines(repo.log(["--reverse", "--pretty=format:%H", "%s..HEAD" % commit_before_reformat]))
previous_commit_base = commit_after_reformat
# Go through all the commits the user made on the local branch and migrate to a new branch
# that is based on post_reformat commits instead
for idx, commit_hash in enumerate(commits):
print("--- Formatting " + commit_hash + (" (%s of %s)" % (idx + 1, len(commits))))
repo.checkout(["--quiet", "--detach", commit_hash])
deleted_files = []
# Format each of the files by checking out just a single commit from the user's branch
commit_files = get_list_from_lines(repo.diff(["HEAD~", "--name-only"]))
for commit_file in commit_files:
# Format each file needed if it was not deleted
if not os.path.exists(commit_file):
print("\tSkipping file '%s' since it has been deleted in commit '%s'" % (
commit_file, commit_hash))
deleted_files.append(commit_file)
continue
if FILES_REGEX.search(commit_file):
clang_format.format(commit_file)
else:
print("\tSkipping file '%s' since it is not a file clang_format should format" %
commit_file)
# Check if anything needed reformatting, and if so amend the commit
if not repo.is_working_tree_dirty():
print ("Commit %s needed no reformatting" % commit_hash)
else:
repo.commit(["--all", "--amend", "--no-edit"])
# Rebase our new commit on top the post-reformat commit
previous_commit = repo.rev_parse(["HEAD"])
# Checkout the new branch with the reformatted commits
# Note: we will not name as a branch until we are done with all commits on the local branch
repo.checkout(["--quiet", "--detach", previous_commit_base])
# Copy each file from the reformatted commit on top of the post reformat
diff_files = get_list_from_lines(repo.diff(["%s~..%s" % (previous_commit, previous_commit),
"--name-only"]))
for diff_file in diff_files:
# If the file was deleted in the commit we are reformatting, we need to delete it again
if diff_file in deleted_files:
repo.rm([diff_file])
continue
# The file has been added or modified, continue as normal
file_contents = repo.show(["%s:%s" % (previous_commit, diff_file)])
root_dir = os.path.dirname(diff_file)
if root_dir and not os.path.exists(root_dir):
os.makedirs(root_dir)
with open(diff_file, "w+") as new_file:
new_file.write(file_contents)
repo.add([diff_file])
# Create a new commit onto clang-formatted branch
repo.commit(["--reuse-message=%s" % previous_commit])
previous_commit_base = repo.rev_parse(["HEAD"])
# Create a new branch to mark the hashes we have been using
repo.checkout(["-b", new_branch])
print("reformat-branch is done running.\n")
print("A copy of your branch has been made named '%s', and formatted with clang-format.\n" % new_branch)
print("The original branch has been left unchanged.")
print("If you have not just done so, the next step is to rebase the new branch on '%s'.\n" % target_branch)
print("To undo this, run `git checkout %s && git branch -D %s`" % (old_branch, new_branch))
def main():
"""Main entry point
"""
parser = ArgumentParser(
usage='''clang_format.py -b 3.10 ~OR~ clang_format.py tag-reformat-3.10 3.10 3.10'
PLEASE READ.
This script formats a branch past the great reformatting wall. It can be run two ways:
1. clang_format.py commit-right-before-reformat commit-after-reformat original-branch
2. clang_format.py -b 3.10 # or develop
The first usage is better if you think you may encounter extra merge conflicts.
This script requires:
- you have a clean working directory
- you have rebased your branch on commit-right-before-reformat (implicitly for the second usage)
- you have the branch currently checked out
- various other logical requirements for the rebase to work
If there is an issue, this script will most likely detect it and provide you with
commands to fix it.
''')
parser.add_argument("-c", "--clang-format", dest="clang_format", default='clang-format',
help='Command to use for clang-format')
parser.add_argument("-b", "--base", dest="base_branch", help='Tries to rebase on the tip of this'
+ ' branch given a base branch name (experimental). This should be the main branch the'
+ ' current branch is based on (3.10 or develop)')
parser.add_argument("commit1", help="commit immediately prior to reformat", nargs='?', default='')
parser.add_argument("commit2", help="commit after reformat", nargs='?', default='')
parser.add_argument("target", help="target branch name (likely 3.10 or develop)", nargs='?', default='')
args = sys.argv
options = parser.parse_args()
if not options.commit1 or not options.commit2 or not options.target:
if not options.base_branch:
parser.print_help()
sys.exit(2)
# TODO update this when formatting is done
if options.base_branch == '3.10':
options.commit1 = 'tag-reformat-3.10'
options.commit2 = options.target = 'format-3.10'
elif options.base_branch == 'develop':
options.commit1 = 'tag-reformat-develop'
options.commit2 = options.target = 'format-develop'
else:
print("Don't know how to use this base branch: %s. Try using the three-argument version of this script")
sys.exit(3)
try:
reformat_branch(options.clang_format, options.commit1, options.commit2, options.target)
except ValueError as ve:
print("*** ERROR:\n" + str(ve) + "\n")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment