Skip to content

Instantly share code, notes, and snippets.

@dgobbi
Created July 3, 2016 19:31
Show Gist options
  • Save dgobbi/bc08a5d7dc76e200467e740556e7e9e4 to your computer and use it in GitHub Desktop.
Save dgobbi/bc08a5d7dc76e200467e740556e7e9e4 to your computer and use it in GitHub Desktop.
Modernize VTK brace style, optionally convert to my own K&R-based style
#! /usr/bin/env python
"""
Usage: python bracefix.py [--test] <file1> [<file2> ...]
This script takes old-style "Whitesmiths" indented VTK source files as
input, and re-indents the braces according to the new VTK style.
Only the brace indentation is modified.
If called with the --test option, then it will print an error message
for each file that it would modify, but it will not actually modify the
files.
If called with the --kr option, then opening braces are moved to the
end of the preceding line if the preceding statement does flow control
(i.e. is if, else, while, for, do, switch, or a label), is not split
across multiple lines, and if the move will not cause the line to go
over the 80 character limit.
If called with the --move-case-break option, then if a case label is
followed by a braced block and then a break, the break will be moved
into the block. This improves code appearance and doesn't change
behavior.
Written by David Gobbi on Sep 30, 2015.
"""
import sys
import os
import re
def reindent(filename, dry_run=False, kr=False, move_case_break=False):
"""Reindent a file from Whitesmiths style to Allman style"""
# The first part of this function clears all strings and comments
# where non-grammatical braces might be hiding. These changes will
# not be saved back to the file, they just simplify the parsing.
# look for ', ", /*, and //
keychar = re.compile(r"""[/"']""")
# comments of the form /* */
c_comment = re.compile(r"\/\*(\*(?!\/)|[^*])*\*\/")
c_comment_start = re.compile(r"\/\*(\*(?!\/)|[^*])*$")
c_comment_end = re.compile(r"^(\*(?!\/)|[^*])*\*\/")
# comments of the form //
cpp_comment = re.compile(r"\/\/.*")
# string literals ""
string_literal = re.compile(r'"([^\\"]|\\.)*"')
string_literal_start = re.compile(r'"([^\\"]|\\.)*\\$')
string_literal_end = re.compile(r'^([^\\"]|\\.)*"')
# character literals ''
char_literal = re.compile(r"'([^\\']|\\.)*'")
char_literal_start = re.compile(r"'([^\\']|\\.)*\\$")
char_literal_end = re.compile(r"^([^\\']|\\.)*'")
# read the file
try:
f = open(filename)
lines = f.readlines()
f.close()
except:
sys.stderr.write(filename + ": ")
sys.stderr.write(str(sys.exc_info()[1]) + "\n")
sys.exit(1)
# convert strings to "", char constants to '', and remove comments
n = len(lines) # 'lines' is the input
newlines = [] # 'newlines' is the output
cont = None # set if e.g. we found /* and we are looking for */
for i in range(n):
line = lines[i].rstrip()
if cont is not None:
# look for closing ' or " or */
match = cont.match(line)
if match:
# found closing ' or " or */
line = line[match.end():]
cont = None
else:
# this whole line is in the middle of a string or comment
if cont is c_comment_end:
# still looking for */, clear the whole line
newlines.append("")
continue
else:
# still looking for ' or ", set line to backslash
newlines.append('\\')
continue
# start at column 0 and search for ', ", /*, or //
pos = 0
while True:
match = keychar.search(line, pos)
if match is None:
break
pos = match.start()
end = match.end()
# was the match /* ... */ ?
match = c_comment.match(line, pos)
if match:
line = line[0:pos] + " " + line[match.end():]
pos += 1
continue
# does the line have /* ... without the */ ?
match = c_comment_start.match(line, pos)
if match:
if line[-1] == '\\':
line = line[0:pos] + ' \\'
else:
line = line[0:pos]
cont = c_comment_end
break
# does the line have // ?
match = cpp_comment.match(line, pos)
if match:
if line[-1] == '\\':
line = line[0:pos] + ' \\'
else:
line = line[0:pos]
break
# did we find "..." ?
match = string_literal.match(line, pos)
if match:
line = line[0:pos] + "\"\"" + line[match.end():]
pos += 2
continue
# did we find "... without the final " ?
match = string_literal_start.match(line, pos)
if match:
line = line[0:pos] + "\"\"\\"
cont = string_literal_end
break
# did we find '...' ?
match = char_literal.match(line, pos)
if match:
line = line[0:pos] + "\' \'" + line[match.end():]
pos += 3
continue
# did we find '... without the final ' ?
match = char_literal_start.match(line, pos)
if match:
line = line[0:pos] + "\' \'\\"
cont = char_literal_end
break
# if we got to here, we found / that wasn't /* or //
pos += 1
# strip any trailing whitespace!
newlines.append(line.rstrip())
# The second part of this function looks for braces in the simplified
# code that we wrote to "newlines" after removing the contents of all
# string literals, character literals, and comments.
# Whenever we encounter an opening brace, we push its position onto a
# stack. Whenever we encounter the matching closing brace, we indent
# the braces as a pair.
# For #if directives, we check whether there are mismatched braces
# within the conditional block, and if so, we print a warning and reset
# the stack to the depth that it had at the start of the block.
# For #define directives, we save the stack and then restart counting
# braces until the end of the #define. Then we restore the stack.
# all changes go through this function
lines_changed = {} # keeps track of each line that was changed
def changeline(i, newtext, lines_changed=lines_changed):
if newtext != lines[i]:
lines[i] = newtext
lines_changed[i] = newtext
# we push a tuple (delim, row, col, newcol) onto this stack whenever
# we find a {, (, or [ delimiter, this keeps track of where we found
# the delimeter and what column we want to move it to
stack = []
lastdepth = 0
# this is a superstack that allows us to save the entire stack when we
# enter into an #if conditional block
dstack = []
# these are syntactic elements we need to look for
directive = re.compile(r" *# *(..)")
label = re.compile(r"""(\s*(case[(\s]+)?(' '|""|\w|\s*::\s*)+[)\s]*:)+$""")
cflow = re.compile(r"\s*(if|else|for|do|while|switch)(\W|\Z)")
cbreak = re.compile(r"\s*break\s*;\s*$")
delims = re.compile(r"[{}()\[\]]")
spaces = re.compile(r" *")
cplusplus = re.compile(r" *# *ifdef *__cplusplus")
lastpos = 0 # previous indentation column
newpos = 0 # current indentation column
continuation = False # true if line continues an unfinished statement
new_context = True # also set when we enter a #define statement
in_else = False # set if in an #else
in_define = False # set if in #define
in_assign = False # set to deal with "= {" or #define x {"
leaving_define = False # set if at the end of a #define
save_stack = None # save stack when entering a #define
for i in range(n):
line = newlines[i]
pos = 0 # column position
# restore stack when leaving #define
if leaving_define:
stack, lastpos, newpos, continuation = save_stack
save_stack = None
in_define = False
leaving_define = False
# handle #if conditionals
is_directive = False
in_else = False
match = directive.match(line)
if match:
is_directive = True
if match.groups()[0] == 'if':
dstack.append((list(stack), line))
elif match.groups()[0] in ('en', 'el'):
oldstack, dline = dstack.pop()
if len(stack) > len(oldstack) and not cplusplus.match(dline):
sys.stderr.write(filename + ":" + str(i) + ": ")
sys.stderr.write("mismatched delimiter in \"" +
dline + "\" block\n")
if match.groups()[0] == 'el':
in_else = True
stack = oldstack
dstack.append((list(stack), line))
elif match.groups()[0] == 'de':
in_define = True
leaving_define = False
save_stack = (stack, lastpos, newpos, continuation)
stack = []
new_context = True
# remove backslash at end of line, if present
if len(line) > 0 and line[-1] == '\\':
line = line[0:-1].rstrip()
elif in_define:
leaving_define = True
if not is_directive and len(line) > 0 and not continuation:
# what is the indentation of the current line?
match = spaces.match(line)
newpos = match.end()
# check for end of statement vs. continuation of statement
if len(line) > 0:
# if #define, {, }, ;, flow control keyword, or label
if (new_context or line[-1] in ('{', '}', ';') or
cflow.match(line) or label.match(line)):
continuation = False
new_context = False
elif not is_directive:
continuation = True
# search for braces
while True:
match = delims.search(line, pos)
if match is None:
break
pos = match.start()
delim = line[pos]
if delim in ('(', '['):
# save delim, row, col, and current indentation
stack.append((delim, i, pos, newpos))
elif delim == '{':
if in_assign:
# do not adjust braces for initializer lists
stack.append((delim, i, pos, pos))
elif ((in_else or in_define) and spaces.sub("", line) == "{"):
# for opening braces that might have no match
indent = " "*lastpos
changeline(i, spaces.sub(indent, lines[i], count=1))
stack.append((delim, i, lastpos, lastpos))
else:
# save delim, row, col, and previous indentation
stack.append((delim, i, pos, lastpos))
newpos = pos + 2
lastpos = newpos
else:
# found a ')', ']', or '}' delimiter, so pop its partner
try:
ldelim, j, k, newpos = stack.pop()
except IndexError:
ldelim = ""
if ldelim != {'}':'{', ')':'(', ']':'['}[delim]:
sys.stderr.write(filename + ":" + str(i) + ": ")
sys.stderr.write("mismatched \'" + delim + "\'\n")
# adjust the indentation of matching '{', '}'
if (ldelim == '{' and delim == '}' and
spaces.sub("", lines[i][0:pos]) == "" and
spaces.sub("", lines[j][0:k]) == ""):
# if case break, put break inside the braces
if (move_case_break and
j > 0 and i+1 < len(lines) and
label.match(lines[j-1]) and
cbreak.match(lines[i+1])):
tmpline = lines[i]
changeline(i, lines[i+1])
changeline(i+1, tmpline)
i += 1
indent = " "*newpos
# reindent the matched braces according to style
if (kr and j > 0 and len(lines[j-1]) < 78 and
(cflow.match(newlines[j-1]) or
label.match(newlines[j-1])) and
not newlines[j-1][-1:] in '{};\\' and
not cpp_comment.search(lines[j-1]) and
not c_comment.search(lines[j-1])):
# change to K&R style braces
changeline(j-1, lines[j-1].rstrip() + " {\n")
temp = lines[j][lines[j].find('{')+1:].lstrip()
if temp:
temp = indent + " " + temp
else:
temp = None
changeline(j, temp)
else:
# reindent the leading brace to Allman style
changeline(j, spaces.sub(indent, lines[j], count=1))
# reindent the trailing brace
changeline(i, spaces.sub(indent, lines[i], count=1))
pos += 1
# check for " = " and #define assignments for the sake of
# the { inializer list } that might be on the following line
if len(line) > 0:
if (line[-1] == '=' or
(is_directive and in_define and not leaving_define)):
in_assign = True
elif not is_directive:
in_assign = False
lastpos = newpos
if len(dstack) != 0:
sys.stderr.write(filename + ": ")
sys.stderr.write("mismatched #if conditional.\n")
if len(stack) != 0:
sys.stderr.write(filename + ":" + str(stack[0][1]) + ": ")
sys.stderr.write("no match for " + stack[0][0] +
" before end of file.\n")
if lines_changed:
# remove any trailing whitespace
trailing = re.compile(r" *$")
for i in range(n):
if lines[i] is not None:
lines[i] = trailing.sub("", lines[i])
while n > 0 and lines[n-1].rstrip() == "":
n -= 1
if dry_run:
errcount = len(lines_changed)
line_numbers = list(lines_changed.keys())
line_numbers.sort()
line_numbers = [str(l + 1) for l in line_numbers[0:10] ]
if errcount > len(line_numbers):
line_numbers.append("...")
sys.stderr.write("Warning: " + filename +
": incorrect brace indentation on " +
str(errcount) +
(" lines: ", "line: ")[errcount == 1] +
", ".join(line_numbers) + "\n")
else:
# rewrite the file
ofile = open(filename, 'w')
ofile.writelines(filter(None, lines))
ofile.close()
return True
return False
if __name__ == "__main__":
# ignore generated files
ignorefiles = ["lex.yy.c", "vtkParse.tab.c"]
files = []
opt_ignore = False # ignore all further options
opt_test = False # the --test option
opt_kr = False # the --kr option, for K&R style
opt_case = False # the --move-case-break option
for arg in sys.argv[1:]:
if arg[0:1] == '-' and not opt_ignore:
if arg == '--':
opt_ignore = True
elif arg == '--test':
opt_test = True
elif arg == '--kr':
opt_kr = True
elif arg == '--move-case-break':
opt_case = True
else:
sys.stderr.write("%s: unrecognized option %s\n" %
(os.path.split(sys.argv[0])[-1], arg))
sys.exit(1)
elif os.path.split(arg)[-1] not in ignorefiles:
files.append(arg)
# if --test was set, whenever a file needs modification, we set
# "failed" and continue checking the rest of the files
failed = False
for filename in files:
# repeat until no further changes occur
while reindent(filename,
dry_run=opt_test,
kr=opt_kr,
move_case_break=opt_case):
if opt_test:
failed = True
break
if failed:
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment