Last active
November 7, 2017 12:41
-
-
Save tueda/489f55142c212aa80ce54f1256028d5e to your computer and use it in GitHub Desktop.
FORM lint. #bin #form #python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
""":" . | |
exec python "$0" "$@" | |
""" | |
import argparse | |
import copy | |
import re | |
import sys | |
from collections import namedtuple | |
__doc__ = """FORM lint. | |
Example | |
------- | |
$ formlint.py myprog.frm | |
Python versions | |
--------------- | |
2.7, 3.2, 3.3, 3.4, 3.5, 3.6 | |
Additionally, 2.6 + argparse should work. | |
""" | |
_PY3 = sys.version_info[0] >= 3 | |
if _PY3: | |
integer_types = (int,) | |
string_types = (str,) | |
else: | |
integer_types = (int, long) # noqa: F821 | |
string_types = (basestring,) # noqa: F821 | |
try: | |
import colorama | |
except ImportError: | |
try: | |
import pip._vendor.colorama as colorama | |
except ImportError: | |
pass | |
try: | |
colorama.init() | |
def _error_str(s): | |
return (colorama.Style.BRIGHT + colorama.Fore.RED + | |
s + colorama.Style.RESET_ALL) | |
def _warning_str(s): | |
return (colorama.Style.BRIGHT + colorama.Fore.MAGENTA + | |
s + colorama.Style.RESET_ALL) | |
except NameError: | |
def _error_str(s): | |
return s | |
def _warning_str(s): | |
return s | |
class LintError(Exception): | |
"""Lint error.""" | |
def __init__(self, error_code, *args): | |
"""Construct a lint error object.""" | |
self._error_code = error_code | |
self._args = args | |
@property | |
def error_code(self): | |
"""Return the error code.""" | |
return self._error_code | |
def format(self, filename, lineno): | |
"""Format the error message.""" | |
msg = self._messages[self._error_code] | |
msg = msg.format(*self._args) | |
return '{0}:{1}: {2} {3}'.format( | |
filename, | |
lineno, | |
_error_str(self._error_code) if self._error_code[0] != 'W' | |
else _warning_str(self._error_code), | |
msg | |
) | |
_messages = { | |
# pattern matchings | |
'E101': 'may-not-work pattern: f(x?)*x? for {0}', | |
'E102': 'may-not-work pattern: f1(f2(?a,x?,?b))*f3(x?) for {0}', | |
'E103': 'may-not-work pattern: f(x^n?) for {0}', | |
# folds | |
'E201': 'end of fold without starting', | |
'E202': 'fold not ended', | |
'E203': 'fold name mismatch', | |
# line length | |
'E301': 'line too long ({0} > {1} characters)', | |
# folds | |
'W201': 'unclosed fold', | |
} | |
_explanations = { | |
'E101': ''' | |
S x,y; | |
CF f; | |
L F = f(x)*f(y)*x; * match | |
L G = f(x)*f(y)*y; * no match (bug) | |
id f(x?)*x? = 1; | |
P; | |
.end | |
''', | |
'E102': ''' | |
* Issue #98 | |
S x,y,z; | |
CF f1,f2,f3; | |
L F = f1(f2(x,y,z))*f3(x); * no match(bug) | |
L G = f1(f2(x,y,z))*f3(y); * no match(bug) | |
L H = f1(f2(x,y,z))*f3(z); * match | |
id f1(f2(?a,x?,?b))*f3(x?) = 1; | |
P; | |
.end | |
''', | |
'E103': ''' | |
* Issue #63 | |
CF f; | |
S x,n; | |
L F = f(x); * no match (bug) | |
L G = f(x^2); * match | |
id f(x^n?) = 1; | |
P; | |
.end | |
''', | |
'E201': ''' | |
*--#] name : {0} | |
'''.format(''), # workaround for W291 | |
'E202': ''' | |
*--#[ name : | |
''', | |
'E203': ''' | |
*--#[ name1 : | |
*--#] name2 : {0} | |
'''.format(''), # workaround for W291 | |
'E301': ''' | |
* The stedi editor cannot edit lines longer than 255 characters. | |
L F ={0}; | |
'''.format(' + 1234567890' * 20), | |
'W201': ''' | |
* The autoclose feature of the stedi editor requires a tailing | |
* space for closed folds. | |
*--#[ name : | |
*--#] name : | |
''', | |
} | |
class Node(list): | |
"""Node for expression trees. | |
Expressions in patterns are stored as expression trees. Because we do not | |
go for parsing all syntax contexts in FORM, there is no distinction for | |
symbols, vectors, functions etc. of FORM. We call an expression tree with | |
a string head but without children as a symbol. When a tree has children, | |
it is called a function. A head of node may contain a leading/trailing | |
``?`` for a wildcard. We also allow an integer for a head. | |
Unary/binary operations ``+``, ``-``, ``*``, ``/```, `^`` and ``.`` are | |
expressed by functions with the heads ``plus_``, `times_`, ``power_`` and | |
``dot_``. | |
Examples | |
-------- | |
>>> f = Node('f') | |
>>> x = Node('x') | |
>>> f.append(x) | |
>>> print(f(x)) | |
f(x) | |
""" | |
def __init__(self, value): | |
"""Construct a node.""" | |
self._str = str(value) | |
self._head = value # without set, $-variable | |
self._refs = tuple() # wildcards referred in the set | |
self._special = value in ('plus_', 'times_', 'power_', 'dot_') | |
self._parent = None | |
self._free_cache = {} | |
if isinstance(value, string_types) and not value.startswith('?'): | |
# Remove set specification and $-variables, | |
# e.g., x?xset$x -> x? | |
self._head = re.sub(r'\?.*', '?', value) | |
# TODO: consider refs | |
@property | |
def head(self): | |
"""Return the head.""" | |
return self._head | |
@property | |
def parent(self): | |
"""Return the parent.""" | |
return self._parent | |
@property | |
def is_special(self): | |
"""Return True for special functions.""" | |
return self._special | |
@property | |
def is_symbol(self): | |
"""Return True for symbols. Maybe a wildcard.""" | |
return len(self) == 0 and isinstance(self._head, string_types) | |
@property | |
def is_function(self): | |
"""Return True for (non-special) functions. Maybe a wildcard.""" | |
return not self._special and len(self) >= 1 | |
@property | |
def is_symbol_wildcard(self): | |
"""Return True for symbol wildcards.""" | |
return (len(self) == 0 and isinstance(self._head, string_types) and | |
self._head.endswith('?')) | |
@property | |
def is_argument_field_wildcard(self): | |
"""Return True for argument field wildcards.""" | |
return (len(self) == 0 and isinstance(self._head, string_types) and | |
self._head.startswith('?')) | |
def __bool__(self): | |
"""Return True.""" | |
return True | |
__nonzero__ = __bool__ # for python2 | |
def __str__(self): | |
"""Informal string representation.""" | |
if len(self): | |
return '{0}({1})'.format( | |
self._str, ', '.join(str(x) for x in self)) | |
else: | |
return self._str | |
def append(self, x): | |
"""Add an item to the end of the list.""" | |
if x._parent: | |
x = copy.copy(x) | |
x._parent = self | |
list.append(self, x) | |
def optimize(self): | |
"""Optimize the node.""" | |
# First, optimize the children. | |
for a in self: | |
a.optimize() | |
if self._head == 'plus_' or self._head == 'times_': | |
# plus_(?a,plus(?b),?c) -> plus(?a,?b,?c) | |
# same for times_ | |
loop = True | |
while loop: | |
loop = False | |
for i in range(len(self)): | |
a = self[i] | |
if a._head == self._head: | |
for b in a: | |
b._parent = self | |
self[i:i + 1] = a[:] | |
loop = True | |
break | |
# Numbers come first. | |
self.sort(key=lambda x: not isinstance(x._head, integer_types)) | |
# Combine numbers. | |
if self._head == 'plus_': | |
while len(self) >= 2: | |
x = self[0] | |
y = self[1] | |
if (isinstance(x._head, integer_types) and | |
isinstance(y._head, integer_types)): | |
assert len(x) == len(y) == 0 | |
x._head += y._head | |
x._str = str(x._head) | |
del self[1] | |
else: | |
break | |
if self._head == 'times_': | |
while len(self) >= 2: | |
x = self[0] | |
y = self[1] | |
if (isinstance(x._head, integer_types) and | |
isinstance(y._head, integer_types)): | |
assert len(x) == len(y) == 0 | |
x._head *= y._head | |
x._str = str(x._head) | |
del self[1] | |
else: | |
break | |
if (len(self) == 1 and | |
(self._head == 'plus_' or self._head == 'times_')): | |
# plus_(x?) -> x | |
# same for times_ | |
a = self[0] | |
self._str = a._str | |
self._head = a._head | |
self._refs = a._refs | |
self._special = a._special | |
self[:] = a[:] | |
for a in self: | |
a._parent = self | |
self._free_cache = {} | |
def free(self, x): | |
"""Return True if the subtree from this node does not contain ``x``.""" | |
if x in self._free_cache: | |
return self._free_cache[x] | |
if self._head == x: | |
self._free_cache[x] = False | |
return False | |
for a in self: | |
if not a.free(x): | |
self._free_cache[x] = False | |
return False | |
self._free_cache[x] = True | |
return True | |
class Token(namedtuple('Token', 'type value')): | |
"""Token.""" | |
__slots__ = () | |
NONE = 0 | |
SYMBOL = -1 | |
NUMBER = -2 | |
def __bool__(self): | |
"""Return True unless it is the end.""" | |
return self.type != self.NONE | |
__nonzero__ = __bool__ # for python2 | |
# xyz, xyz`abc' `abc'xyz, xyz{`j'+1} etc. | |
# TODO: nested constructs, e.g., `a{`j'+1}' | |
_raw_symbol = ( | |
r'(?:' | |
r"(?:`[^`']+')?[0-9a-zA-Z]*`[^`']*`[^`']+'[^`']*'" # hack: `a'b`c`d'' | |
r'|' | |
r"[a-zA-Z][0-9a-zA-Z]*(?:[0-9a-zA-Z]|`[^`']+'|{[^{}]+})*_?" | |
r'|' | |
r"`[^`']+'(?:[0-9a-zA-Z]|`[^`']+'|{[^{}]+})*_?" | |
')' | |
) | |
# raw-symbol + [...] | |
# TODO: nested constructs, e.g., [a+[b]] | |
_symbol = ( | |
r'(?:' + _raw_symbol + | |
'|' | |
'\[[^\[\]]+\]' | |
')' | |
) | |
_dollar = ( | |
r'\$' + _raw_symbol | |
) | |
_trailer = ( | |
r'(?:' | |
r'\?' | |
# set | |
r'(?:' | |
r'!?' | |
r'(?:' | |
r'\{[^}]+\}' | |
r'|' + _symbol + | |
r')' | |
r'(?:' | |
r'\[[^]]+\]' | |
r')?' | |
r')?' | |
# $-variable | |
r'(?:' + _dollar + | |
r')?' | |
# end | |
r')?' | |
) | |
_scanner = re.Scanner([ | |
# symbol and optionally wildcard | |
(_symbol + _trailer, | |
lambda self, token: Token(Token.SYMBOL, token)), | |
# $-variable | |
(_dollar, | |
lambda self, token: Token(Token.SYMBOL, token)), | |
# argument field wildcard | |
(r'\?' + _raw_symbol + r'(?:' + _dollar + r')?', | |
lambda self, token: Token(Token.SYMBOL, token)), | |
# ...: well, it will be expanded as symbols | |
(r'\.\.\.', | |
lambda self, token: Token(Token.SYMBOL, token)), | |
# integer, e.g., '123' | |
(r'[1-9][0-9]*|0', | |
lambda self, token: Token(Token.NUMBER, int(token))), | |
# operators, parentheses, etc. | |
(r'\*\*', lambda self, token: Token(token, '^')), | |
(r'\+', lambda self, token: Token(token, token)), | |
(r'\-', lambda self, token: Token(token, token)), | |
(r'\*', lambda self, token: Token(token, token)), | |
(r'\/', lambda self, token: Token(token, token)), | |
(r'\^', lambda self, token: Token(token, token)), | |
(r'\(', lambda self, token: Token(token, token)), | |
(r'\)', lambda self, token: Token(token, token)), | |
(r'\,', lambda self, token: Token(token, token)), | |
(r'\.', lambda self, token: Token(token, token)), | |
# Ignore < and > of <...>*...*<...>. After this operation, the pattern | |
# should be still syntactically correct. | |
(r'[<>]', None), | |
# skip spaces | |
(r'\s+', None), | |
]) | |
@classmethod | |
def scan(cls, s): | |
"""Tokenize the given string.""" | |
tokens, remainder = cls._scanner.scan(s) | |
tokens.append(Token(cls.NONE, '$')) # append the end token | |
return tokens, remainder | |
def parse_pattern(s): | |
"""Parse the given string as a pattern.""" | |
tokens, remainder_str = Token.scan(s) | |
while True: | |
node, remainder_tokens = _parse_expr(tokens) | |
if node: | |
node.optimize() | |
if (remainder_tokens and node.is_symbol and | |
str(node).startswith("`") and str(node).endswith("'")): | |
# This happens when a preprocessor variable is used for | |
# id-statement options, e.g., | |
# id `IDOPT' x = 1; | |
# Try for the rest. | |
tokens = remainder_tokens | |
if tokens[0].type == ',': | |
tokens = tokens[1:] | |
continue | |
break | |
remainder_tokens = remainder_tokens[:-1] # drop the end token | |
return node, remainder_tokens, remainder_str | |
# Syntax | |
# ------ | |
# expr: term (('+'|'-') term)* | |
# term: factor (('*'|'/') factor)* | |
# factor: ('+'|'-') factor | power | |
# power: atom ['^' factor] | |
# atom: func | dot | SYMBOL | NUMBER | '(' expr ')' | |
# func: SYMBOL '(' [arglist] ')' | |
# dot: SYMBOL '.' SYMBOL | |
# arglist: expr (',' expr)* | |
# Each _parse_xxx() function accepts a list of tokens as the argument, | |
# and returns a tuple of the parsed expression tree (None if failed) and | |
# remainder tokens. | |
def _parse_expr(tokens): | |
a, tokens = _parse_term(tokens) | |
if not a: | |
return None, tokens | |
r = Node('plus_') | |
r.append(a) | |
while True: | |
t = tokens[0] | |
if t.type == '+': | |
a, new_tokens = _parse_term(tokens[1:]) | |
if a: | |
r.append(a) | |
tokens = new_tokens | |
continue | |
elif t.type == '-': | |
a, new_tokens = _parse_term(tokens[1:]) | |
if a: | |
b = Node('times_') | |
b.append(Node(-1)) | |
b.append(a) | |
r.append(b) | |
tokens = new_tokens | |
continue | |
break | |
return r, tokens | |
def _parse_term(tokens): | |
a, tokens = _parse_factor(tokens) | |
if not a: | |
return None, tokens | |
r = Node('times_') | |
r.append(a) | |
while True: | |
t = tokens[0] | |
if t.type == '*': | |
a, new_tokens = _parse_factor(tokens[1:]) | |
if a: | |
r.append(a) | |
tokens = new_tokens | |
continue | |
elif t.type == '/': | |
a, new_tokens = _parse_factor(tokens[1:]) | |
if a: | |
b = Node('power_') | |
b.append(a) | |
b.append(Node(-1)) | |
r.append(b) | |
tokens = new_tokens | |
continue | |
break | |
return r, tokens | |
def _parse_factor(tokens): | |
old_tokens = tokens | |
sign = 1 | |
while True: | |
t = tokens[0] | |
if t.type == '+': | |
tokens = tokens[1:] | |
continue | |
elif t.type == '-': | |
tokens = tokens[1:] | |
sign *= -1 | |
continue | |
else: | |
break | |
a, tokens = _parse_power(tokens) | |
if a: | |
if sign > 0: | |
return a, tokens | |
else: | |
b = Node('times_') | |
b.append(Node(-1)) | |
b.append(a) | |
return b, tokens | |
return None, old_tokens | |
def _parse_power(tokens): | |
a, tokens = _parse_atom(tokens) | |
if not a: | |
return None, tokens | |
t = tokens[0] | |
if t.type == '^': | |
b, new_tokens = _parse_factor(tokens[1:]) | |
if b: | |
r = Node('power_') | |
r.append(a) | |
r.append(b) | |
return r, new_tokens | |
return a, tokens | |
def _parse_atom(tokens): | |
t = tokens[0] | |
if t.type == Token.SYMBOL: | |
a = Node(t.value) | |
tokens = tokens[1:] | |
t = tokens[0] | |
if t.type == '(': | |
# Check func. | |
b, new_tokens = _parse_expr(tokens[1:]) | |
if b is None: | |
if new_tokens[0].type == ')': | |
return a, tokens[2:] | |
else: | |
args = [b] | |
while True: | |
if new_tokens[0].type == ',': | |
b, new_tokens_next = _parse_expr(new_tokens[1:]) | |
if b: | |
args.append(b) | |
new_tokens = new_tokens_next | |
continue | |
break | |
if new_tokens[0].type == ')': | |
r = a | |
for a in args: | |
r.append(a) | |
return r, new_tokens[1:] | |
elif t.type == '.': | |
# Check dot. | |
t = tokens[1] | |
if t.type == Token.SYMBOL: | |
b = Node(t.value) | |
r = Node('dot_') | |
r.append(a) | |
r.append(b) | |
return r, tokens[2:] | |
return a, tokens | |
if t.type == Token.NUMBER: | |
return Node(t.value), tokens[1:] | |
if t.type == '(': | |
a, new_tokens = _parse_expr(tokens[1:]) | |
if a and new_tokens[0].type == ')': | |
return a, new_tokens[1:] | |
return None, tokens | |
def filter_error(error_code): | |
"""Return True if the given lint error is enabled.""" | |
for s in opts.ignore: | |
if error_code.startswith(s): | |
return False | |
if not opts.select: | |
return True | |
for s in opts.select: | |
if error_code.startswith(s): | |
return True | |
return False | |
def print_lint_error(filename, lineno, e, lines_str): | |
"""Print a lint error.""" | |
if not opts.quiet: | |
if isinstance(e, LintError): | |
print(e.format(filename, lineno)) | |
else: | |
print('{0}:{1}: {2}'.format(filename, lineno, e)) | |
if lines_str: | |
lines = lines_str.strip().split('\n') | |
for i in range(len(lines)): | |
s = lines[i].strip() | |
if len(s) > 75: | |
s = s[:71] + ' ...' # fit in 79 characters | |
s = (' ' if i == 0 else ' ') + s | |
lines[i] = s | |
print('\n'.join(lines)) | |
if isinstance(e, LintError): | |
stats.nerrors += 1 | |
def remove_preprocessor_instructions(lines): | |
"""Remove preprocessor instructions. | |
Return a list of (approximately) preprocessed lines. | |
""" | |
lines = list(lines) # copy | |
continued = False | |
for i in range(len(lines)): | |
lines[i] = lines[i].rstrip() # don't need trailing spaces + line break | |
if not continued: | |
if lines[i].startswith('*'): | |
# comment line | |
lines[i] = '' | |
elif lines[i].lstrip().startswith('#'): | |
# preprocessor instruction | |
lines[i] = '' | |
continued = lines[i].endswith('\\') | |
# TODO: multiline "...". | |
else: | |
# don't need leading spaces | |
lines[i] = lines[i].lstrip() | |
# remove trailing comments, e.g., "sth; * comment". | |
lines[i] = re.sub(r';\s*\*.*$', ';', lines[i]) | |
else: | |
# continued preprocessor instruction | |
lines[i] = '' | |
continued = lines[i].endswith('\\') | |
return lines | |
def enum_patterns(lines): | |
"""Find all patterns.""" | |
def id_pattern(s): | |
# Ignore options of the id-statement. | |
while True: | |
s = re.sub(r'^[\s,]+', '', s) # ignore spaces and commas | |
m = re.match(r'(?:multi|many|select|once|only|disorder|all)\b', | |
s, re.IGNORECASE) | |
if m: | |
s = s[m.end():] | |
continue | |
m = re.match('(?:ifmatch|ifnot?match)\s*->\s*[^\s,]+', | |
s, re.IGNORECASE) | |
if m: | |
s = s[m.end():] | |
continue | |
break | |
# Ignore the RHS. | |
# XXX: symbols like [x=1] | |
m = re.search('(?<![<>])=', s) | |
if m: | |
s = s[:m.start()] | |
return s.rstrip() | |
continued = False | |
continued_lineno = -1 | |
continued_str = '' | |
for i, line in enumerate(lines): | |
while line: | |
if not continued: | |
# Find a Identify statement. | |
# NOTE: We hope misrecognition of 'CF ID;', 'P "ID";' etc. | |
# doesn't make any mess... | |
m = re.search(( | |
r"(?<![0-9a-zA-Z_])`id'(?![0-9a-zA-Z_])" # hack: `id' | |
r'|' | |
r'\b' | |
r'id(?:e(?:n(?:t(?:i(?:f(?:y)?)?)?)?)?)?' # identify | |
r'\b' | |
), line, re.IGNORECASE) | |
if m: | |
j = line.find(';', m.end()) | |
if j >= 0: | |
yield i + 1, id_pattern(line[m.end(): j]) | |
line = line[j + 1:] | |
continue | |
else: | |
continued = 'id' | |
continued_lineno = i + 1 | |
continued_str = line[m.end():] + '\n' | |
# TODO: ifmatch, ifnomatch, if(match(...)) | |
elif continued is 'id': | |
line = line.lstrip() | |
j = line.find(';') | |
if j >= 0: | |
yield continued_lineno, id_pattern(continued_str + | |
line[:j]) | |
line = line[j + 1:] | |
continued = False | |
continue | |
else: | |
continued_str += line + '\n' | |
line = None | |
def check_node(node): | |
"""Check the given node.""" | |
# TODO: How to detect may-not-work patterns for symmetric functions? | |
if not node: | |
return | |
for a in node: | |
check_node(a) | |
if filter_error('E101'): | |
if node.is_symbol_wildcard: | |
if node.parent and node.parent.head == 'times_': | |
for f in node.parent: | |
if (f.is_function and f.head != node.head and | |
not f.free(node.head)): | |
raise LintError('E101', node.head) | |
if filter_error('E102'): | |
if node.is_function: | |
if (node.parent and node.parent.is_function and | |
node.parent.parent and | |
node.parent.parent.head == 'times_'): | |
# TODO: more nested cases | |
# the first/last argument field wildcard | |
i1 = next((i for i, x in enumerate(node) | |
if x.is_argument_field_wildcard), -1) | |
if i1 >= 0: | |
i2 = next((i for i, x in reversed(tuple(enumerate(node))) | |
if x.is_argument_field_wildcard), -1) | |
if i1 < i2: | |
for x in node[i1 + 1:i2]: | |
# x is sandwiched by two argument field wildcards | |
if x.is_symbol_wildcard: | |
for f in node.parent.parent: | |
if (f.is_function and | |
f is not node.parent and | |
f.head != x.head and | |
not f.free(x.head)): | |
raise LintError('E102', x.head) | |
if filter_error('E103'): | |
if (node.parent and node.parent.is_function and | |
node.head == 'power_' and node[0].is_symbol and | |
node[1].is_symbol_wildcard): | |
raise LintError('E103', '{0}^{1}'.format( | |
node[0].head, node[1].head)) | |
def lint_patterns(filename, lines): | |
"""Do lint for patterns.""" | |
if opts.debug_pattern: | |
# Debug mode. | |
maxlineno = len(lines) + 1 | |
lineno_width = len(str(maxlineno)) | |
fmt = '{{0:{0}}} {{1}}'.format(lineno_width) | |
line_break = '\n ' + ' ' * lineno_width | |
print('* {0} --debug-enum-pattern'.format(filename)) | |
for lineno, pattern in enum_patterns(lines): | |
node, remainder_tokens, remainder_str = parse_pattern(pattern) | |
print(fmt.format(lineno, re.sub(r'\r\n?|\n', line_break, pattern))) | |
print(fmt.format('', 'node: {0}'.format(node))) | |
if remainder_tokens: | |
print(fmt.format('', 'remainder_tokens: {0}'.format( | |
remainder_tokens))) | |
if remainder_str: | |
print(fmt.format('', 'remainder_str: {0}'.format( | |
re.sub(r'\r\n?|\n', ' ', remainder_str)))) | |
print('') | |
for lineno, pattern in enum_patterns(lines): | |
# We ignore the remainders because preprocessor variable substitutions | |
# can easily confuse us. Just take the part that we can recognize. | |
node, _remainder_tokens, _remainder_str = parse_pattern(pattern) | |
try: | |
check_node(node) | |
except LintError as e: | |
print_lint_error(filename, lineno, e, pattern) | |
Fold = namedtuple('Fold', 'lineno name') | |
def lint_folds(filename, lines): | |
"""Do lint for folds.""" | |
folds = [] # stacks fold names | |
for i, line in enumerate(lines): | |
lineno = i + 1 | |
text = line.rstrip('\r\n') | |
# Check the line length. | |
if len(text) > opts.max_line_length: | |
if filter_error('E301'): | |
print_lint_error( | |
filename, | |
lineno, | |
LintError('E301', len(text), opts.max_line_length), | |
text | |
) | |
# Check if the line has a fold. | |
m = re.match(r'...#(\[|\])([^:]*):', line) | |
if not m: | |
continue | |
foldname = m.group(2) | |
starting = (line[4] == '[') | |
closed = (text[-1] == ' ') | |
if starting: | |
folds.append(Fold(lineno, foldname)) | |
continue | |
if not folds: | |
if filter_error('E201'): | |
print_lint_error(filename, lineno, LintError('E201'), text) | |
continue | |
if folds[-1].name != foldname: | |
if filter_error('E203'): | |
print_lint_error(filename, lineno, LintError('E203'), text) | |
print_lint_error( | |
filename, | |
folds[-1].lineno, | |
'started with', | |
lines[folds[-1].lineno - 1] | |
) | |
if not closed: | |
if filter_error('W201'): | |
print_lint_error(filename, lineno, LintError('W201'), | |
text + ' <-- no whitespace') | |
folds.pop() | |
if filter_error('E202'): | |
while folds: | |
print_lint_error(filename, lineno, LintError('E202'), '') | |
print_lint_error( | |
filename, | |
folds[-1].lineno, | |
'started with', | |
lines[folds[-1].lineno - 1] | |
) | |
folds.pop() | |
def lint_file(filename): | |
"""Do lint for the given file.""" | |
# Read the file as a list of all lines. | |
with open(filename) as f: | |
raw_lines = f.read().splitlines(True) # keep line breaks | |
stats.nfiles += 1 | |
# Do lint. | |
processed_lines = remove_preprocessor_instructions(raw_lines) | |
if opts.debug_preprocessor: | |
# Debug mode. | |
maxlineno = len(processed_lines) + 1 | |
fmt = '{{0:{0}}} {{1}}'.format(len(str(maxlineno))) | |
print('* {0} --debug-preprocessor'.format( | |
filename)) | |
for i, s in enumerate(processed_lines): | |
print(fmt.format(i + 1, s)) | |
print('') | |
lint_patterns(filename, processed_lines) | |
lint_folds(filename, raw_lines) | |
def main(): | |
"""Entry point.""" | |
# Parse the command line arguments. | |
parser = argparse.ArgumentParser( | |
usage='%(prog)s [options] [--] [files..]' | |
) | |
parser.add_argument( | |
'files', | |
nargs='*', | |
help=argparse.SUPPRESS, | |
) | |
parser.add_argument( | |
'-q', | |
'--quiet', | |
action='store_true', | |
help='do not print errors', | |
) | |
parser.add_argument( | |
'-w', | |
'--warn', | |
action='store_true', | |
help='enable errors in the category of warnings', | |
) | |
parser.add_argument( | |
'--max-line-length', | |
type=int, | |
default=255, | |
help='set maximum allowed line length (default: 255)', | |
metavar='errors', | |
) | |
parser.add_argument( | |
'--ignore', | |
help='ignore these errors', | |
metavar='errors', | |
) | |
parser.add_argument( | |
'--select', | |
help='select only these errors', | |
metavar='errors', | |
) | |
parser.add_argument( | |
'--debug-preprocessor', | |
action='store_true', | |
help=argparse.SUPPRESS, | |
) | |
parser.add_argument( | |
'--debug-pattern', | |
action='store_true', | |
help=argparse.SUPPRESS, | |
) | |
global opts, stats | |
opts = parser.parse_args() | |
stats = argparse.Namespace( | |
nfiles=0, | |
nerrors=0, | |
) | |
if opts.max_line_length <= 0: | |
parser.error('--max-line-length must be > 0') | |
# opts.ignore and opts.select as lists | |
if not opts.ignore and not opts.select: | |
if not opts.warn: | |
# Default setting: ignore all warnings. | |
opts.ignore = 'W' | |
if opts.ignore: | |
opts.ignore = opts.ignore.upper().split(',') | |
else: | |
opts.ignore = [] | |
if opts.select: | |
opts.select = opts.select.upper().split(',') | |
else: | |
opts.select = [] | |
# Perform lint. | |
for filename in opts.files: | |
lint_file(filename) | |
# Print the statistics. | |
print('{0} file{1}, {2} error{3}'.format( | |
stats.nfiles, | |
's' if stats.nfiles != 1 else '', | |
stats.nerrors, | |
's' if stats.nerrors != 1 else '', | |
)) | |
# Non-zero exit code for errors. | |
if stats.nerrors: | |
sys.exit(1) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Test formlint.""" | |
import argparse | |
import tempfile | |
import textwrap | |
import unittest | |
import formlint | |
def lint_str(string, expected_error_code=None): | |
"""Do lint for the given string.""" | |
string = textwrap.dedent(string.lstrip('\r\n')) | |
f = tempfile.NamedTemporaryFile() | |
f.write(string.encode()) | |
f.flush() | |
formlint.opts = argparse.Namespace( | |
quiet=True, | |
max_line_length=255, | |
ignore=[], | |
select=[expected_error_code] if expected_error_code else [], | |
debug_preprocessor=False, | |
debug_pattern=False, | |
) | |
formlint.stats = argparse.Namespace( | |
nfiles=0, | |
nerrors=0, | |
) | |
formlint.lint_file(f.name) | |
class TesFORMLint(unittest.TestCase): | |
def test_examples(self): | |
examples = formlint.LintError._explanations | |
for ecode in sorted(list(examples)): | |
lint_str(examples[ecode], ecode) | |
self.assertTrue(formlint.stats.nerrors > 0) | |
if __name__ == '__main__': | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment