Skip to content

Instantly share code, notes, and snippets.

Last active March 30, 2019 19:47
Show Gist options
  • Save mossheim/cba31152d8757e957cbf013d36bee2a8 to your computer and use it in GitHub Desktop.
Save mossheim/cba31152d8757e957cbf013d36bee2a8 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from __future__ import print_function, absolute_import
import difflib
import glob
import os
import re
import string
import subprocess
import sys
import threading
from argparse import ArgumentParser
# Constants
CLANG_FORMAT_ACCEPTABLE_VERSIONS = ["7.0.0", "7.0.1", "8.0.0"]
FILES_REGEX = re.compile('\\.(cpp|hpp|h|c|m|mm)$') # all the extensions we format in SC (no JS!)
def callo(args):
"""Call a program, and capture its output
return subprocess.check_output(args)
class ClangFormat(object):
"""Class encapsulates finding a suitable copy of clang-format,
and linting/formating an individual file
def __init__(self, cf_cmd):
self.cf_cmd = cf_cmd
def _validate_version(self):
cf_version = callo([self.cf_cmd, "--version"])
if version in cf_version:
print("clang-format " + version + " found: " + cf_version)
raise ValueError("clang-format found, but incorrect version at " +
self.cf_cmd + " with version: " + cf_version + "\nAcceptable versions: " +
def _lint(self, file_name, print_diff):
"""Check the specified file has the correct format
with open(file_name, 'rb') as original_text:
original_file =
# Get formatted file as clang-format would format the file
formatted_file = callo([self.cf_cmd, "--style=file", file_name])
if original_file != formatted_file:
if print_diff:
original_lines = original_file.splitlines()
formatted_lines = formatted_file.splitlines()
result = difflib.unified_diff(original_lines, formatted_lines)
print("ERROR: Found diff for " + file_name)
print("To fix formatting errors, run %s --style=file -i %s" % (self.cf_cmd, file_name))
for line in result:
return False
return True
def format(self, file_name):
"""Update the format of the specified file
if self._lint(file_name, print_diff=False):
return True
# Update the file with clang-format
formatted = not[self.cf_cmd, "--style=file", "-i", file_name])
# Version 3.8 generates files like foo.cpp~RF83372177.TMP when it formats foo.cpp
# on Windows, we must clean these up
if sys.platform == "win32":
glob_pattern = file_name + "*.TMP"
for fglob in glob.glob(glob_pattern):
return formatted
def get_base_dir():
"""Get the base directory for mongo repo.
This script assumes that it is running in buildscripts/, and uses
that to find the base directory.
return subprocess.check_output(['git', 'rev-parse', '--show-toplevel']).rstrip()
print("This script must be running in a git repo")
class Repo(object):
"""Class encapsulates all knowledge about a git repository, and its metadata
to run clang-format.
def __init__(self, path):
self.path = path
self.root = self._get_root()
def _callgito(self, args):
"""Call git for this repository, and return the captured output
# These two flags are the equivalent of -C in newer versions of Git
# but we use these to support versions pre 1.8.5 but it depends on the command
# and what the current directory is
return callo(['git', '--git-dir', os.path.join(self.path, ".git"),
'--work-tree', self.path] + args)
def _callgit(self, args, stdout=None):
"""Call git for this repository without capturing output
This is designed to be used when git returns non-zero exit codes.
# These two flags are the equivalent of -C in newer versions of Git
# but we use these to support versions pre 1.8.5 but it depends on the command
# and what the current directory is
return['git', '--git-dir', os.path.join(self.path, ".git"),
'--work-tree', self.path] + args, stdout=stdout)
def _get_local_dir(self, path):
"""Get a directory path relative to the git root directory
if os.path.isabs(path):
return os.path.relpath(path, self.root)
return path
def get_candidates(self, candidates):
"""Get the set of candidate files to check by querying the repository
Returns the full path to the file for clang-format to consume.
if candidates is not None and len(candidates) > 0:
candidates = [self._get_local_dir(f) for f in candidates]
valid_files = list(set(candidates).intersection(self.get_candidate_files()))
valid_files = list(self.get_candidate_files())
# Get the full file name here
valid_files = [os.path.normpath(os.path.join(self.root, f)) for f in valid_files]
return valid_files
def get_root(self):
return self.root
def _get_root(self):
gito = self._callgito(['rev-parse', '--show-toplevel'])
return gito.rstrip()
def _git_ls_files(self, cmd):
"""Run git-ls-files and filter the list of files to a valid candidate list
gito = self._callgito(cmd)
# This allows us to pick all the interesting files
# in the mongo and mongo-enterprise repos
file_list = [line.rstrip()
for line in gito.splitlines()
if (line.startswith("jstests") or line.startswith("src"))
and not line.startswith("src/third_party")]
return [a for a in file_list if]
def get_candidate_files(self):
"""Query git to get a list of all files in the repo to consider for analysis
return self._git_ls_files(["ls-files", "--cached"])
def get_working_tree_candidate_files(self):
"""Query git to get a list of all files in the working tree to consider for analysis
return self._git_ls_files(["ls-files", "--cached", "--others"])
def get_working_tree_candidates(self):
"""Get the set of candidate files to check by querying the repository
Returns the full path to the file for clang-format to consume.
valid_files = list(self.get_working_tree_candidate_files())
# Get the full file name here
return [os.path.normpath(os.path.join(self.root, f)) for f in valid_files]
def is_detached(self):
# symbolic-ref returns 1 if the repo is in a detached HEAD state
with open(os.devnull, 'w') as DEVNULL:
return self._callgit(["symbolic-ref", "--quiet", "HEAD"], stdout=DEVNULL)
def is_ancestor(self, parent, child):
# merge base returns 0 if parent is an ancestor of child
return not self._callgit(["merge-base", "--is-ancestor", parent, child])
def is_commit(self, sha1):
# cat-file -e returns 0 if it is a valid hash
return not self._callgit(["cat-file", "-e", "%s^{commit}" % sha1])
def is_working_tree_dirty(self):
# diff returns 1 if the working tree has local changes
return self._callgit(["diff", "--quiet"])
def does_branch_exist(self, branch):
# rev-parse returns 0 if the branch exists
return not self._callgit(["rev-parse", "--verify", "--quiet", branch])
def get_merge_base(self, commit):
return self._callgito(["merge-base", "HEAD", commit]).rstrip()
def get_branch_name(self):
"""Get the current branch name, short form
This returns "master", not "refs/head/master"
Will not work if the current branch is detached
branch = self.rev_parse(["--abbrev-ref", "HEAD"])
if branch == "HEAD":
raise ValueError("Branch is currently detached")
return branch
def add(self, command): return self._callgito(["add"] + command)
def checkout(self, command): return self._callgito(["checkout"] + command)
def commit(self, command): return self._callgito(["commit"] + command)
def diff(self, command): return self._callgito(["diff"] + command)
def log(self, command): return self._callgito(["log"] + command)
def rev_parse(self, command): return self._callgito(["rev-parse"] + command).rstrip()
def rm(self, command): return self._callgito(["rm"] + command)
def show(self, command): return self._callgito(["show"] + command)
def get_list_from_lines(lines):
""""Convert a string containing a series of lines into a list of strings
return [line.rstrip() for line in lines.splitlines()]
def validate_repo_state(commit_before_reformat, commit_after_reformat, target_branch):
repo = Repo(get_base_dir())
if not repo.is_commit(commit_before_reformat):
raise ValueError("Commit before reformat '%s' is not a valid commit in this repo" %
if not repo.is_commit(commit_after_reformat):
raise ValueError("Commit after reformat '%s' is not a valid commit in this repo" %
if not repo.is_ancestor(commit_before_reformat, commit_after_reformat):
raise ValueError(("Commit before reformat '%s' is not a valid ancestor of commit after" +
" reformat '%s' in this repo") % (commit_before_reformat, commit_after_reformat))
if repo.is_detached():
raise ValueError("You must not run this script in a detached HEAD state")
if repo.is_working_tree_dirty():
raise ValueError("Your working tree has pending changes. You must have a clean working" +
" tree before proceeding.\n\nRun `git status` to see your pending changes, and then" +
" try `git stash save`, `git reset --hard`, `git submodule update` and/or committing" +
" your changes.")
merge_base = repo.get_merge_base(commit_before_reformat)
if not merge_base == repo.rev_parse([commit_before_reformat]):
raise ValueError(("Merge base is '%s'. Please rebase to '%s' and resolve all conflicts" +
" before running this script.\n\nTo interactively rebase, use `git rebase -i %s`") %
(merge_base, commit_before_reformat, commit_before_reformat))
# We assume the target branch is master, it could be a different branch if needed for testing
merge_base = repo.get_merge_base(target_branch)
if not merge_base == repo.rev_parse([commit_before_reformat]):
raise ValueError("This branch appears to already have advanced too far through the merge process")
return repo
def get_branch_names(repo):
# Everything looks good so lets start going through all the commits
branch_name = repo.get_branch_name()
new_branch = branch_name + "-reformatted"
if repo.does_branch_exist(new_branch):
raise ValueError("The branch '%s' already exists. Please delete the branch '%s', or rename the current branch." % (new_branch, new_branch))
return (branch_name, new_branch)
def reformat_branch(clang_format, commit_before_reformat, commit_after_reformat, target_branch):
"""Reformat a branch made before a clang-format run
clang_format = ClangFormat(clang_format)
if os.getcwd() != get_base_dir():
raise ValueError("reformat-branch must be run from the repo root")
repo = validate_repo_state(commit_before_reformat, commit_after_reformat, target_branch)
old_branch, new_branch = get_branch_names(repo)
commits = get_list_from_lines(repo.log(["--reverse", "--pretty=format:%H", "%s..HEAD" % commit_before_reformat]))
previous_commit_base = commit_after_reformat
# Go through all the commits the user made on the local branch and migrate to a new branch
# that is based on post_reformat commits instead
for idx, commit_hash in enumerate(commits):
print("--- Formatting " + commit_hash + (" (%s of %s)" % (idx + 1, len(commits))))
repo.checkout(["--quiet", "--detach", commit_hash])
deleted_files = []
# Format each of the files by checking out just a single commit from the user's branch
commit_files = get_list_from_lines(repo.diff(["HEAD~", "--name-only"]))
for commit_file in commit_files:
# Format each file needed if it was not deleted
if not os.path.exists(commit_file):
print("\tSkipping file '%s' since it has been deleted in commit '%s'" % (
commit_file, commit_hash))
print("\tSkipping file '%s' since it is not a file clang_format should format" %
# Check if anything needed reformatting, and if so amend the commit
if not repo.is_working_tree_dirty():
print ("Commit %s needed no reformatting" % commit_hash)
repo.commit(["--all", "--amend", "--no-edit"])
# Rebase our new commit on top the post-reformat commit
previous_commit = repo.rev_parse(["HEAD"])
# Checkout the new branch with the reformatted commits
# Note: we will not name as a branch until we are done with all commits on the local branch
repo.checkout(["--quiet", "--detach", previous_commit_base])
# Copy each file from the reformatted commit on top of the post reformat
diff_files = get_list_from_lines(repo.diff(["%s~..%s" % (previous_commit, previous_commit),
for diff_file in diff_files:
# If the file was deleted in the commit we are reformatting, we need to delete it again
if diff_file in deleted_files:
# The file has been added or modified, continue as normal
file_contents =["%s:%s" % (previous_commit, diff_file)])
root_dir = os.path.dirname(diff_file)
if root_dir and not os.path.exists(root_dir):
with open(diff_file, "w+") as new_file:
# Create a new commit onto clang-formatted branch
repo.commit(["--reuse-message=%s" % previous_commit])
previous_commit_base = repo.rev_parse(["HEAD"])
# Create a new branch to mark the hashes we have been using
repo.checkout(["-b", new_branch])
print("reformat-branch is done running.\n")
print("A copy of your branch has been made named '%s', and formatted with clang-format.\n" % new_branch)
print("The original branch has been left unchanged.")
print("If you have not just done so, the next step is to rebase the new branch on '%s'.\n" % target_branch)
print("To undo this, run `git checkout %s && git branch -D %s`" % (old_branch, new_branch))
def main():
"""Main entry point
parser = ArgumentParser(
usage=''' -b 3.10 ~OR~ tag-reformat-3.10 3.10 3.10'
This script formats a branch past the great reformatting wall. It can be run two ways:
1. commit-right-before-reformat commit-after-reformat original-branch
2. -b 3.10 # or develop
The first usage is better if you think you may encounter extra merge conflicts.
This script requires:
- you have a clean working directory
- you have rebased your branch on commit-right-before-reformat (implicitly for the second usage)
- you have the branch currently checked out
- various other logical requirements for the rebase to work
If there is an issue, this script will most likely detect it and provide you with
commands to fix it.
parser.add_argument("-c", "--clang-format", dest="clang_format", default='clang-format',
help='Command to use for clang-format')
parser.add_argument("-b", "--base", dest="base_branch", help='Tries to rebase on the tip of this'
+ ' branch given a base branch name (experimental). This should be the main branch the'
+ ' current branch is based on (3.10 or develop)')
parser.add_argument("commit1", help="commit immediately prior to reformat", nargs='?', default='')
parser.add_argument("commit2", help="commit after reformat", nargs='?', default='')
parser.add_argument("target", help="target branch name (likely 3.10 or develop)", nargs='?', default='')
args = sys.argv
options = parser.parse_args()
if not options.commit1 or not options.commit2 or not
if not options.base_branch:
# TODO update this when formatting is done
if options.base_branch == '3.10':
options.commit1 = 'tag-reformat-3.10'
options.commit2 = = 'format-3.10'
elif options.base_branch == 'develop':
options.commit1 = 'tag-reformat-develop'
options.commit2 = = 'format-develop'
print("Don't know how to use this base branch: %s. Try using the three-argument version of this script")
reformat_branch(options.clang_format, options.commit1, options.commit2,
except ValueError as ve:
print("*** ERROR:\n" + str(ve) + "\n")
if __name__ == "__main__":
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment