Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
A helper script to create summarize flake8 output.
#!/usr/bin/env python
"""
A script for parsing a flake8 error log and generating useful stats about the
errors in the code.
Author: Ben Lopatin (I think I wrote it, at least, no guarantee)
License: BSD
"""
import sys
from collections import OrderedDict
ERROR_CODE_GROUPS = {
"E1": "Indentation",
"E2": "Whitespace",
"E3": "Blank line",
"E4": "Import",
"E5": "Line length",
"E7": "Statement",
"E9": "Runtime",
"W1": "Indentation warning",
"W2": "Whitespace warning",
"W3": "Blank line warning",
"W6": "Deprecation warning",
"C9": "Excessive complexity",
"N8": "Naming error",
"F4": "Import warning",
"F8": "Undefined or unused name",
}
ERROR_CODES = {
# PEP8
"E273": "tab after keyword",
"E224": "tab after operator",
"E274": "tab before keyword",
"E223": "tab before operator",
"W291": "trailing whitespace",
"E201": "whitespace after '('",
"E202": "whitespace before ')'",
"E203": "whitespace before ':'",
"E211": "whitespace before '('",
"E113": "unexpected indentation",
"E303": "too many blank lines (3)",
"W191": "indentation contains tabs",
"W292": "no newline at end of file",
"W391": "blank line at end of file",
"E112": "expected an indented block",
"E231": "missing whitespace after ','",
"E401": "multiple imports on one line",
"W603": "'<>' is deprecated, use '!='",
"E271": "multiple spaces after keyword",
"E222": "multiple spaces after operator",
"E272": "multiple spaces before keyword",
"E301": "expected 1 blank line, found 0",
"W293": "blank line contains whitespace",
"E221": "multiple spaces before operator",
"E302": "expected 2 blank lines, found 0",
"E703": "statement ends with a semicolon",
"E901": "SyntaxError or IndentationError",
"E116": "unexpected indentation (comment)",
"E225": "missing whitespace around operator",
"W601": ".has_key() is deprecated, use 'in'",
"E115": "expected an indented block (comment)",
"E265": "block comment should start with '# '",
"W602": "deprecated form of raising exception",
"E111": "indentation is not a multiple of four",
"E262": "inline comment should start with '# '",
"E704": "multiple statements on one line (def)",
"E266": "too many leading '#' for block comment",
"E713": "test for membership should be 'not in'",
"W604": "backticks are deprecated, use 'repr()'",
"E701": "multiple statements on one line (colon)",
"E721": "do not compare types, use 'isinstance()'",
"E228": "missing whitespace around modulo operator",
"E261": "at least two spaces before inline comment",
"E101": "indentation contains mixed spaces and tabs",
"E304": "blank lines found after function decorator",
"E502": "the backslash is redundant between brackets",
"E702": "multiple statements on one line (semicolon)",
"E714": "test for object identity should be 'is not'",
"E731": "do not assign a lambda expression, use a def",
"E114": "indentation is not a multiple of four (comment)",
"E227": "missing whitespace around bitwise or shift operator",
"E251": "unexpected spaces around keyword / parameter equals",
"E242": "tab after ','",
"E241": "multiple spaces after ','",
"E501": "line too long (82 > 79 characters)",
"E133": "closing bracket is missing indentation",
"E226": "missing whitespace around arithmetic operator",
"E131": "continuation line unaligned for hanging indent",
"E711": "comparison to None should be 'if cond is None:'",
"E124": "closing bracket does not match visual indentation",
"E127": "continuation line over-indented for visual indent",
"E122": "continuation line missing indentation or outdented",
"E126": "continuation line over-indented for hanging indent",
"E128": "continuation line under-indented for visual indent",
"E121": "continuation line under-indented for hanging indent",
"E125": "continuation line with same indent as next logical line",
"E129": "visually indented line with same indent as next logical line",
"E712": "comparison to True should be 'if cond is True:' or 'if cond:'",
"E123": "closing bracket does not match indentation of opening bracket's line",
# McCabe
'C901': "function is too complex",
# Naming",
"N801": "class names should use CapWords convention",
"N802": "function name should be lowercase",
"N803": "argument name should be lowercase",
"N804": "first argument of a classmethod should be named 'cls'",
"N805": "first argument of a method should be named 'self'",
"N806": "variable in function should be lowercase",
"N811": "constant imported as non constant",
"N812": "lowercase imported as non lowercase",
"N813": "camelcase imported as lowercase",
"N814": "camelcase imported as constant",
# PyFlakes
"F401": "module imported but unused",
"F402": "import module from line N shadowed by loop variable",
"F403": "'from module import *' used; unable to detect undefined names",
"F404": "future import(s) name after other statements",
"F811": "redefinition of unused name from line N",
"F812": "list comprehension redefines name from line N",
"F821": "undefined name name",
"F822": "undefined name name in __all__",
"F823": "local variable name ... referenced before assignment",
"F831": "duplicate argument name in function definition",
"F841": "local variable name is assigned to but never used",
}
class CodeTree(dict):
"""
>>> x = CodeTree(('mycode/module.py', 10, 2, 'E225', 'missing whitespace'))
>>> x = CodeTree(('mycode/module.py', 10, 2, 'E225', 'missing whitespace'))
"""
pass
def main(filename):
"""
Runs the program
"""
filedata = None
parsing_error_count = 0
parsed_rows = []
with open(filename, 'r') as f:
filedata = f.readlines()
if filedata is None:
print("No file data")
exit(1)
for filerow in filedata:
try:
code, error, msg = filerow.split(' ', 2)
except ValueError:
parsing_error_count += 1
continue
try:
module, line, column = code.split(':')[:3]
except ValueError:
parsing_error_count += 1
continue
parsed_rows.append((module, line, column, error, msg))
print("Skipped {} rows due to parsing errors".format(parsing_error_count))
module_stats = pep8_module_stats(parsed_rows)
format_stats(module_stats)
format_error_summary(module_stats)
def pep8_module_stats(parsed_rows):
"""
:param parsed_rows: list of tuples
:returns: dictionary like {'module/file.py': {'E225': 5, 'E110': 1}, ... }
"""
error_stats = {}
for row in parsed_rows:
file_name, err = row[0], row[3]
if file_name not in error_stats:
error_stats[file_name] = {}
if err in error_stats[file_name]:
error_stats[file_name][err] += 1
else:
error_stats[file_name][err] = 1
return error_stats
def format_stats(stats):
"""
Prints an error stats dictionary of the sum of each error across all files.
"""
ordered_keys = sorted(stats.keys())
for key in ordered_keys:
print(key)
print("~" * len(key))
# Sorted list of counts (values) as keys
counted_stats = OrderedDict(sorted(stats[key].items(),
key=lambda t: t[1], reverse=True))
for stat in counted_stats:
print("({}) {}: {}".format(counted_stats[stat], stat,
ERROR_CODES.get(stat, "<code not matched>")))
print("\n")
def format_error_summary(stats):
"""
Prints a summary of errors across all modules by major category
"""
group_counts = {key: 0 for key in ERROR_CODE_GROUPS.keys()}
key_errors = []
for module, errors in stats.items():
for error, count in errors.items():
try:
group_counts[error[:2]] += count
except KeyError:
key_errors.append(error)
continue
ordered_errors = OrderedDict(sorted(group_counts.items(),
key=lambda t: t[1], reverse=True))
header = "Errors & warnings summary"
print(header)
print("-" * len(header))
for error, total in ordered_errors.items():
print("{desc} ({err}): {count}".format(
desc=ERROR_CODE_GROUPS.get(error, "Unknown"),
err=error, count=total))
if __name__ == '__main__':
if len(sys.argv) != 2:
print("You must provide 1 filename argument")
exit(1)
main(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment