Last active
February 10, 2017 15:38
-
-
Save Teemperor/767cbd5ec74a3c6444d9548414150c0a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import print_function | |
import re, os | |
import ntpath | |
# print to stderr... | |
import sys | |
def eprint(*args, **kwargs): | |
print(*args, file=sys.stderr, **kwargs) | |
currentFile = "" | |
processedIncludes = 0 | |
def edit_distance(s1, s2): | |
m=len(s1)+1 | |
n=len(s2)+1 | |
tbl = {} | |
for i in range(m): tbl[i,0]=i | |
for j in range(n): tbl[0,j]=j | |
for i in range(1, m): | |
for j in range(1, n): | |
cost = 0 if s1[i-1] == s2[j-1] else 1 | |
tbl[i,j] = min(tbl[i, j-1]+1, tbl[i-1, j]+1, tbl[i-1, j-1]+cost) | |
return tbl[i,j] | |
def readlines(fname): | |
with open(fname) as f: | |
content = f.readlines() | |
return content | |
def write(fname, content): | |
with open(fname, "w") as text_file: | |
text_file.write(content) | |
def read(fname): | |
with open(fname) as f: | |
content = f.read() | |
return content | |
header_path_cache = {} | |
# Searches the current directoy and ../build/ for a header by it's name | |
def find_header(header_name): | |
global header_path_cache | |
if header_name in header_path_cache: | |
return header_path_cache[header_name] | |
for root, dirs, files in os.walk("."): | |
for f in files: | |
if f == header_name: | |
result = os.path.join(root, f) | |
header_path_cache[header_name] = result | |
return result | |
#Fall back to the build folder | |
for root, dirs, files in os.walk("../build/"): | |
for f in files: | |
if f == header_name: | |
result = os.path.join(root, f) | |
header_path_cache[header_name] = result | |
return result | |
# removes "" and <> around paths | |
def clean_include_path(include): | |
if include.startswith('"'): | |
include = include[1:] | |
if include.startswith('<'): | |
include = include[1:] | |
if include.endswith('"'): | |
include = include[:-1] | |
if include.endswith('>'): | |
include = include[:-1] | |
return include | |
def is_no_op_line(line): | |
line = line.strip() | |
if line == "": | |
return True | |
if line.startswith("*"): | |
return True | |
if line.startswith("//"): | |
return True | |
if line.startswith("/*"): | |
return True | |
if line.endswith("*/"): | |
return True | |
return False | |
# Parses the header and finds the symbol of the header guard | |
# e.g. BLA_H for | |
# #ifndef BLA_H | |
# #define BLA_H | |
# #endif | |
def get_guard_symbol(path): | |
ifndef_re = re.compile(r"#ifndef[ ]+([\S]+) ?") | |
define_re = re.compile(r"#define[ ]+([\S]+) ?") | |
lines = readlines(path) | |
symbol = None | |
# 0 = looking for ifndef | |
# 1 = looking for define | |
# 2 = looking for endif | |
state = 0 | |
in_block_comment = False | |
for line in lines: | |
if state == 0: | |
if "/*" in line: | |
in_block_comment = True | |
if "*/" in line: | |
in_block_comment = False | |
if not in_block_comment: | |
if line.strip().startswith("#ifndef"): | |
ifndef_match = re.match(ifndef_re, line) | |
if ifndef_match: | |
symbol = ifndef_match.group(1) | |
else: | |
eprint("IFNDEF REGEX DID NOT WORK: " + line) | |
state += 1 | |
elif not is_no_op_line(line): | |
eprint("NO NOOP: " + path + ":" + line) | |
return | |
#assert False | |
pass | |
return symbol | |
assumptions = [] | |
unsure_assumptions = [] | |
failed_assumptions = [] | |
# Utility functions for removing prefix/suffix from a string | |
def remove_prefix(text, prefix): | |
if text.startswith(prefix): | |
return text[len(prefix):] | |
return text | |
def remove_suffix(text, suffix): | |
if text.endswith(suffix): | |
return text[:-len(suffix)] | |
return text | |
# Checks if two header guard symbols are similar. | |
# Examples for similar symbols are: | |
# ROOT_TTREE and ROOT_TTREE_H | |
# ROOT_TMVA_XXX and ROOT_XXX | |
# ROOT_TFILE and ROOT_TFILE | |
# Depending on the classification of the similarity, | |
# the comparision is written as a single string to | |
# either assumptions (if we're sure they are intended | |
# to be the same symbol) or in unsure_assumptions if | |
# we are not 100% sure if they are supposed to be | |
# the same symbol. | |
# Otherwise returns False and adds no messsage to any | |
# list. | |
def similar_guards(a, b): | |
global assumptions | |
global unsure_assumptions | |
global failed_assumptions | |
aOrig = a[:] | |
bOrig = b[:] | |
# this happens so often, we make a special case | |
if a in ("ROOT_TGButton", "ROOT_TGWidget") and b in ("ROOT_TGButton", "ROOT_TGWidget"): | |
return True | |
a = a.lower() | |
b = b.lower() | |
# it's pretty obvious that we have double header guards if the ifndef has a _h prefix | |
if a.endswith("_h") or a.endswith("_hh"): | |
return True | |
# Keep removing all the prefixes that people really like to add or remove... | |
while True: | |
abak = a[:] | |
bbak = b[:] | |
a = remove_prefix(a, "roott") | |
b = remove_prefix(b, "roott") | |
a = remove_prefix(a, "root") | |
b = remove_prefix(b, "root") | |
a = remove_prefix(a, "roo") | |
b = remove_prefix(b, "roo") | |
a = remove_prefix(a, "_") | |
b = remove_prefix(b, "_") | |
a = remove_prefix(a, "tmva_") | |
b = remove_prefix(b, "tmva_") | |
a = remove_prefix(a, "fit_") | |
b = remove_prefix(b, "fit_") | |
a = remove_prefix(a, "math_") | |
b = remove_prefix(b, "math_") | |
a = remove_prefix(a, "genvector_") | |
b = remove_prefix(b, "genvector_") | |
a = remove_suffix(a, "_hh") | |
b = remove_suffix(b, "_hh") | |
a = remove_suffix(a, "_h") | |
b = remove_suffix(b, "_h") | |
if a == abak and b == bbak: | |
break | |
if a == b: | |
assumptions.append("\n" + currentFile + ":\n" + aOrig + "\n" + bOrig) | |
return True | |
if edit_distance(a, b) <= 5: | |
unsure_assumptions.append("\n" + currentFile + "\n" + aOrig + "\n" + bOrig) | |
return True | |
return False | |
def fix_guard(guard): | |
global processedIncludes | |
global assumptions | |
global unsure_assumptions | |
global failed_assumptions | |
# Check for some false-positives | |
if guard.group(1) == "__CINT__": | |
return guard.group(0) | |
if guard.group(1) == "NDEBUG": | |
return guard.group(0) | |
if "WIN32" in guard.group(1): | |
return guard.group(0) | |
# This is probably not for header guard checking... e.g. NO_MATHCORE and so on | |
if guard.group(1).startswith("NO_"): | |
return guard.group(0) | |
included_header = ntpath.basename(clean_include_path(guard.group(2))) | |
#print(included_header) | |
#print(find_header(included_header)) | |
header_path = find_header(included_header) | |
if header_path == None: | |
eprint(currentFile + ": Coulnd't find file for header: " + included_header) | |
return guard.group(0) | |
guard_symbol = get_guard_symbol(header_path) | |
if guard_symbol == None: | |
eprint(currentFile + ": Couldn't find a header guard for " + header_path) | |
return guard.group(0) | |
if guard_symbol == guard.group(1): | |
processedIncludes += 1 | |
return "#include " + guard.group(2) | |
if similar_guards(guard_symbol, guard.group(1)): | |
processedIncludes += 1 | |
return "#include " + guard.group(2) | |
failed_assumptions.append("\n" + currentFile + ":\n" + guard_symbol + "\n" + guard.group(1)) | |
return guard.group(0) | |
def fix_double_guards(path): | |
double_guard_regex = re.compile(r"#ifndef[ ]+([\S]+)[^\n]*\n[\n ]*#include[ ]*([\S]+)\n[\n ]*#endif") | |
content = read(path) | |
new_content = re.sub(double_guard_regex, fix_guard, content) | |
if new_content != content: | |
write(path, new_content) | |
def run_tests(): | |
assert is_no_op_line("\n") | |
assert is_no_op_line(" /* \n") | |
assert is_no_op_line(" // \n") | |
assert is_no_op_line(" * \n") | |
run_tests() | |
for root, subdirs, files in os.walk("."): | |
# Skip roottest for now. We should remove this when we do the same for rottest :) | |
if "/roottest/" in root: | |
continue | |
for file in files: | |
path = os.path.join(root, file) | |
currentFile = path | |
try: | |
fix_double_guards(path) | |
except KeyboardInterrupt: | |
exit(1) | |
except AssertionError: | |
exit(1) | |
except UnicodeDecodeError: | |
pass #print("Error in file: " + path) | |
#print(processedIncludes) | |
print("\n\n\n\n\nProbably correct typo corrections:" + str(len(assumptions))) | |
for a in assumptions: | |
print(a) | |
print("\n\n\n\n\nDubious typo corrections:" + str(len(unsure_assumptions))) | |
for a in unsure_assumptions: | |
print(a) | |
print("\n\n\n\n\nUnuccessful typo corrections:" + str(len(failed_assumptions))) | |
for a in failed_assumptions: | |
print(a) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment