Skip to content

Instantly share code, notes, and snippets.

@clee704
Last active November 28, 2015 12:34
Show Gist options
  • Save clee704/4ad73918d1c408b8814c to your computer and use it in GitHub Desktop.
Save clee704/4ad73918d1c408b8814c to your computer and use it in GitHub Desktop.
Simple tag expression matcher
LPAR = 0
RPAR = 1
AND = 2
OR = 3
NOT = 4
IDENT = 5
EOF = 6
token_names = ('LPAR', 'RPAR', 'AND', 'OR', 'NOT', 'IDENT', 'EOF')
class Lexer(object):
symbol_map = {
'(': LPAR,
')': RPAR,
'+': AND,
',': OR,
'~': NOT,
}
keyword_map = {
'and': AND,
'or': OR,
'not': NOT,
}
def __init__(self, input):
self.input = input
self.curpos = -1
self.nextpos = 0
self.token = (EOF, '')
def next(self):
n = len(self.input)
i = self.nextpos
while i < n and self.input[i].isspace():
i += 1
self.curpos = i
if i == n:
self.nextpos = i
self.token = (EOF, '')
return self.token
c = self.input[i]
if c in self.symbol_map:
self.nextpos = i + 1
self.token = (self.symbol_map[c], c)
return self.token
j = i + 1
while j < n and not (self.input[j].isspace() or
self.input[j] in self.symbol_map):
j += 1
self.nextpos = j
tok = self.input[i:j]
self.token = (self.keyword_map.get(tok.lower(), IDENT), tok)
return self.token
def generate(self):
if self.token[0] == EOF:
self.next()
while self.token[0] != EOF:
yield self.token
self.next()
class Node(object):
def __init__(self, kind, *children):
self.kind = kind
self.children = tuple(children)
def __eq__(self, rhs):
return (isinstance(rhs, Node) and self.kind == rhs.kind and
self.children == rhs.children)
def __repr__(self):
return 'Node(kind={!r}, {})'.format(self.kind,
', '.join(repr(c) for c in self.children))
class ParserException(Exception):
pass
class Parser(object):
def __init__(self, lexer):
self.lexer = lexer
self.stash = None
def parse(self):
ret = self.parse_expr()
tok = self._next_token()
if tok[0] != EOF:
raise ParserException('expected EOF, got {} at pos {}'.format(
token_names[tok[0]], self.lexer.curpos + 1))
return ret
def parse_expr(self):
return self._parse_nary(OR, self.parse_term)
def parse_term(self):
return self._parse_nary(AND, self.parse_atom)
def parse_atom(self):
tok = self._next_token()
if tok[0] == IDENT:
return tok[1]
elif tok[0] == NOT:
return Node(NOT, self.parse_atom())
elif tok[0] == LPAR:
ret = self.parse_expr()
tok = self._next_token()
if tok[0] != RPAR:
raise ParserException('expected RPAR, got {} at pos {}'
.format(token_names[tok[0]], self.lexer.curpos + 1))
return ret
else:
raise ParserException('expected IDENT, NOT, or LPAR, got {} at '
'pos {}' .format(token_names[tok[0]],
self.lexer.curpos + 1))
def _next_token(self):
if self.stash is not None:
ret = self.stash
self.stash = None
return ret
else:
return self.lexer.next()
def _putback(self, token):
assert self.stash is None
self.stash = token
def _parse_nary(self, kind, parse_child):
nodes = [parse_child()]
tok = self._next_token()
while tok[0] == kind:
nodes.append(parse_child())
tok = self._next_token()
self._putback(tok)
return nodes[0] if len(nodes) == 1 else Node(kind, *nodes)
class And(object):
def __init__(self, *matchers):
self.matchers = matchers
def __call__(self, arg):
return all(m(arg) for m in self.matchers)
def __repr__(self):
return 'And({})'.format(', '.join(repr(m) for m in self.matchers))
class Or(object):
def __init__(self, *matchers):
self.matchers = matchers
def __call__(self, arg):
return any(m(arg) for m in self.matchers)
def __repr__(self):
return 'Or({})'.format(', '.join(repr(m) for m in self.matchers))
class Not(object):
def __init__(self, matcher):
self.matcher = matcher
def __call__(self, arg):
return not self.matcher(arg)
def __repr__(self):
return 'Not({!r})'.format(self.matcher)
class Contains(object):
def __init__(self, expected):
self.expected = expected
def __call__(self, arg):
return self.expected in arg
def __repr__(self):
return 'Contains({!r})'.format(self.expected)
def get_matcher(expr):
return _get_matcher(Parser(Lexer(expr)).parse())
def _get_matcher(ast):
if isinstance(ast, Node):
if ast.kind == AND:
return And(*[_get_matcher(c) for c in ast.children])
elif ast.kind == OR:
return Or(*[_get_matcher(c) for c in ast.children])
elif ast.kind == NOT:
return Not(_get_matcher(ast.children[0]))
else:
raise RuntimeError('unknown AST type: {!r}'.format(ast.kind))
else:
return Contains(ast)
import pytest
from matcher import (AND, IDENT, LPAR, NOT, OR, RPAR, Lexer, Node, Parser,
ParserException, get_matcher)
@pytest.mark.parametrize('input, output', [
('a + b', [(IDENT, 'a'), (AND, '+'), (IDENT, 'b')]),
('((anderson and oreo) or not notton)', [
(LPAR, '('),
(LPAR, '('),
(IDENT, 'anderson'),
(AND, 'and'),
(IDENT, 'oreo'),
(RPAR, ')'),
(OR, 'or'),
(NOT, 'not'),
(IDENT, 'notton'),
(RPAR, ')'),
]),
('Dumb And Dumber', [(IDENT, 'Dumb'), (AND, 'And'), (IDENT, 'Dumber')]),
(' a\n +\n b \n', [(IDENT, 'a'), (AND, '+'), (IDENT, 'b')]),
('a+b', [(IDENT, 'a'), (AND, '+'), (IDENT, 'b')]),
])
def test_lexer(input, output):
assert list(Lexer(input).generate()) == output
@pytest.mark.parametrize('input, output', [
('a + b', Node(AND, 'a', 'b')),
('(a and b)', Node(AND, 'a', 'b')),
('((a and ((b))))', Node(AND, 'a', 'b')),
('((anderson and oreo) or not notton)',
Node(OR,
Node(AND, 'anderson', 'oreo'),
Node(NOT, 'notton'))),
('Dumb And Dumber', Node(AND, 'Dumb', 'Dumber')),
('a and b or c', Node(OR, Node(AND, 'a', 'b'), 'c')),
('a and (b or c)', Node(AND, 'a', Node(OR, 'b', 'c'))),
('a or b and c', Node(OR, 'a', Node(AND, 'b', 'c'))),
('a and b and c', Node(AND, 'a', 'b', 'c')),
('a or b or c', Node(OR, 'a', 'b', 'c')),
('not a or b', Node(OR, Node(NOT, 'a'), 'b')),
('a or not b', Node(OR, 'a', Node(NOT, 'b'))),
('not (a or b)', Node(NOT, Node(OR, 'a', 'b'))),
('~a+b,c', Node(OR, Node(AND, Node(NOT, 'a'), 'b'), 'c')),
('a,b,c,d', Node(OR, 'a', 'b', 'c', 'd')),
('a+b+c+d', Node(AND, 'a', 'b', 'c', 'd')),
('~a+b+~c+d', Node(AND, Node(NOT, 'a'), 'b', Node(NOT, 'c'), 'd')),
('~a+b+c,~d',
Node(OR,
Node(AND, Node(NOT, 'a'), 'b', 'c'),
Node(NOT, 'd'))),
('(a,b)+c', Node(AND, Node(OR, 'a', 'b'), 'c')),
])
def test_parser(input, output):
assert Parser(Lexer(input)).parse() == output
@pytest.mark.parametrize('input, error', [
('', "expected IDENT, NOT, or LPAR, got EOF at pos 1"),
('a +', "expected IDENT, NOT, or LPAR, got EOF at pos 4"),
('a a', "expected EOF, got IDENT at pos 3"),
('a (and) b', "expected EOF, got LPAR at pos 3"),
('and and and', "expected IDENT, NOT, or LPAR, got AND at pos 1"),
('(())', "expected IDENT, NOT, or LPAR, got RPAR at pos 3"),
])
def test_parser_error(input, error):
parser = Parser(Lexer(input))
try:
parser.parse()
except ParserException as e:
assert e.message == error
else:
assert False, 'exception should have been thrown'
@pytest.mark.parametrize('input, arg, result', [
('a', ['a'], True),
('a or b', ['a'], True),
('a or b', ['b'], True),
('a or b', ['c'], False),
('a and b or c', ['a'], False),
('a and b or c', ['b'], False),
('a and b or c', ['a', 'b'], True),
('a and b or c', ['c'], True),
('not a', ['a'], False),
('not a', ['b'], True),
('(not a or not b) and (c or d)', ['a'], False),
('(not a or not b) and (c or d)', ['a', 'b'], False),
('(not a or not b) and (c or d)', ['a', 'c'], True),
('(not a or not b) and (c or d)', ['a', 'b', 'c'], False),
('(not a or not b) and (c or d)', ['e'], False),
('(not a or not b) and (c or d)', ['c'], True),
('(not a or not b) and (c or d)', ['d'], True),
('(not a or not b) and (c or d)', ['c', 'e'], True),
])
def test_get_matcher(input, arg, result):
assert get_matcher(input)(arg) == result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment