Skip to content

Instantly share code, notes, and snippets.

@Teemperor
Last active February 10, 2017 15:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Teemperor/767cbd5ec74a3c6444d9548414150c0a to your computer and use it in GitHub Desktop.
Save Teemperor/767cbd5ec74a3c6444d9548414150c0a to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from __future__ import print_function
import re, os
import ntpath
# print to stderr...
import sys
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
currentFile = ""
processedIncludes = 0
def edit_distance(s1, s2):
m=len(s1)+1
n=len(s2)+1
tbl = {}
for i in range(m): tbl[i,0]=i
for j in range(n): tbl[0,j]=j
for i in range(1, m):
for j in range(1, n):
cost = 0 if s1[i-1] == s2[j-1] else 1
tbl[i,j] = min(tbl[i, j-1]+1, tbl[i-1, j]+1, tbl[i-1, j-1]+cost)
return tbl[i,j]
def readlines(fname):
with open(fname) as f:
content = f.readlines()
return content
def write(fname, content):
with open(fname, "w") as text_file:
text_file.write(content)
def read(fname):
with open(fname) as f:
content = f.read()
return content
header_path_cache = {}
# Searches the current directoy and ../build/ for a header by it's name
def find_header(header_name):
global header_path_cache
if header_name in header_path_cache:
return header_path_cache[header_name]
for root, dirs, files in os.walk("."):
for f in files:
if f == header_name:
result = os.path.join(root, f)
header_path_cache[header_name] = result
return result
#Fall back to the build folder
for root, dirs, files in os.walk("../build/"):
for f in files:
if f == header_name:
result = os.path.join(root, f)
header_path_cache[header_name] = result
return result
# removes "" and <> around paths
def clean_include_path(include):
if include.startswith('"'):
include = include[1:]
if include.startswith('<'):
include = include[1:]
if include.endswith('"'):
include = include[:-1]
if include.endswith('>'):
include = include[:-1]
return include
def is_no_op_line(line):
line = line.strip()
if line == "":
return True
if line.startswith("*"):
return True
if line.startswith("//"):
return True
if line.startswith("/*"):
return True
if line.endswith("*/"):
return True
return False
# Parses the header and finds the symbol of the header guard
# e.g. BLA_H for
# #ifndef BLA_H
# #define BLA_H
# #endif
def get_guard_symbol(path):
ifndef_re = re.compile(r"#ifndef[ ]+([\S]+) ?")
define_re = re.compile(r"#define[ ]+([\S]+) ?")
lines = readlines(path)
symbol = None
# 0 = looking for ifndef
# 1 = looking for define
# 2 = looking for endif
state = 0
in_block_comment = False
for line in lines:
if state == 0:
if "/*" in line:
in_block_comment = True
if "*/" in line:
in_block_comment = False
if not in_block_comment:
if line.strip().startswith("#ifndef"):
ifndef_match = re.match(ifndef_re, line)
if ifndef_match:
symbol = ifndef_match.group(1)
else:
eprint("IFNDEF REGEX DID NOT WORK: " + line)
state += 1
elif not is_no_op_line(line):
eprint("NO NOOP: " + path + ":" + line)
return
#assert False
pass
return symbol
assumptions = []
unsure_assumptions = []
failed_assumptions = []
# Utility functions for removing prefix/suffix from a string
def remove_prefix(text, prefix):
if text.startswith(prefix):
return text[len(prefix):]
return text
def remove_suffix(text, suffix):
if text.endswith(suffix):
return text[:-len(suffix)]
return text
# Checks if two header guard symbols are similar.
# Examples for similar symbols are:
# ROOT_TTREE and ROOT_TTREE_H
# ROOT_TMVA_XXX and ROOT_XXX
# ROOT_TFILE and ROOT_TFILE
# Depending on the classification of the similarity,
# the comparision is written as a single string to
# either assumptions (if we're sure they are intended
# to be the same symbol) or in unsure_assumptions if
# we are not 100% sure if they are supposed to be
# the same symbol.
# Otherwise returns False and adds no messsage to any
# list.
def similar_guards(a, b):
global assumptions
global unsure_assumptions
global failed_assumptions
aOrig = a[:]
bOrig = b[:]
# this happens so often, we make a special case
if a in ("ROOT_TGButton", "ROOT_TGWidget") and b in ("ROOT_TGButton", "ROOT_TGWidget"):
return True
a = a.lower()
b = b.lower()
# it's pretty obvious that we have double header guards if the ifndef has a _h prefix
if a.endswith("_h") or a.endswith("_hh"):
return True
# Keep removing all the prefixes that people really like to add or remove...
while True:
abak = a[:]
bbak = b[:]
a = remove_prefix(a, "roott")
b = remove_prefix(b, "roott")
a = remove_prefix(a, "root")
b = remove_prefix(b, "root")
a = remove_prefix(a, "roo")
b = remove_prefix(b, "roo")
a = remove_prefix(a, "_")
b = remove_prefix(b, "_")
a = remove_prefix(a, "tmva_")
b = remove_prefix(b, "tmva_")
a = remove_prefix(a, "fit_")
b = remove_prefix(b, "fit_")
a = remove_prefix(a, "math_")
b = remove_prefix(b, "math_")
a = remove_prefix(a, "genvector_")
b = remove_prefix(b, "genvector_")
a = remove_suffix(a, "_hh")
b = remove_suffix(b, "_hh")
a = remove_suffix(a, "_h")
b = remove_suffix(b, "_h")
if a == abak and b == bbak:
break
if a == b:
assumptions.append("\n" + currentFile + ":\n" + aOrig + "\n" + bOrig)
return True
if edit_distance(a, b) <= 5:
unsure_assumptions.append("\n" + currentFile + "\n" + aOrig + "\n" + bOrig)
return True
return False
def fix_guard(guard):
global processedIncludes
global assumptions
global unsure_assumptions
global failed_assumptions
# Check for some false-positives
if guard.group(1) == "__CINT__":
return guard.group(0)
if guard.group(1) == "NDEBUG":
return guard.group(0)
if "WIN32" in guard.group(1):
return guard.group(0)
# This is probably not for header guard checking... e.g. NO_MATHCORE and so on
if guard.group(1).startswith("NO_"):
return guard.group(0)
included_header = ntpath.basename(clean_include_path(guard.group(2)))
#print(included_header)
#print(find_header(included_header))
header_path = find_header(included_header)
if header_path == None:
eprint(currentFile + ": Coulnd't find file for header: " + included_header)
return guard.group(0)
guard_symbol = get_guard_symbol(header_path)
if guard_symbol == None:
eprint(currentFile + ": Couldn't find a header guard for " + header_path)
return guard.group(0)
if guard_symbol == guard.group(1):
processedIncludes += 1
return "#include " + guard.group(2)
if similar_guards(guard_symbol, guard.group(1)):
processedIncludes += 1
return "#include " + guard.group(2)
failed_assumptions.append("\n" + currentFile + ":\n" + guard_symbol + "\n" + guard.group(1))
return guard.group(0)
def fix_double_guards(path):
double_guard_regex = re.compile(r"#ifndef[ ]+([\S]+)[^\n]*\n[\n ]*#include[ ]*([\S]+)\n[\n ]*#endif")
content = read(path)
new_content = re.sub(double_guard_regex, fix_guard, content)
if new_content != content:
write(path, new_content)
def run_tests():
assert is_no_op_line("\n")
assert is_no_op_line(" /* \n")
assert is_no_op_line(" // \n")
assert is_no_op_line(" * \n")
run_tests()
for root, subdirs, files in os.walk("."):
# Skip roottest for now. We should remove this when we do the same for rottest :)
if "/roottest/" in root:
continue
for file in files:
path = os.path.join(root, file)
currentFile = path
try:
fix_double_guards(path)
except KeyboardInterrupt:
exit(1)
except AssertionError:
exit(1)
except UnicodeDecodeError:
pass #print("Error in file: " + path)
#print(processedIncludes)
print("\n\n\n\n\nProbably correct typo corrections:" + str(len(assumptions)))
for a in assumptions:
print(a)
print("\n\n\n\n\nDubious typo corrections:" + str(len(unsure_assumptions)))
for a in unsure_assumptions:
print(a)
print("\n\n\n\n\nUnuccessful typo corrections:" + str(len(failed_assumptions)))
for a in failed_assumptions:
print(a)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment