Problem: I keep needing a new parsing system.
Solution?: A simple-enough library could be adapted or rewritten as needed.
Related problem: Newish programmers learning about parsing usually face either toy demos or beasts like yacc.
import re
def parse(p, s):
"Return the value from p parsing (a prefix of) s, or None on failure."
for value, rest in p(s): return value
def give(val): return lambda s: [(val, s)]
def take(p, f): return lambda s: [res for val, s1 in p(s) for res in f(val)(s1)]
def drop(p, q): return take(p, lambda _: q)
def cons(p, q): return take(p, lambda vp: take(q, lambda vq: give([vp]+vq)))
def match(regex): return lambda s: [(m.group(0), s[m.end():])
for m in [re.match(regex, s)] if m]
def alt(p, *qs): return lambda s: p(s) or (alt(*qs)(s) if qs else [])
def complement(p): return lambda s: [] if p(s) else [(None, s)]
def star(p): return alt(cons(p, lambda s: star(p)(s)),
give([]))
def opt(p): return alt(p, give(None))
def plus(p): return cons(p, star(p))
- Python's not Haskell
- Error reporting
- Memoizing
from grammar import *
G = grammar(r"""
Exp => Term [+-] Exp | Term
Term => Factor [*/] Term | Factor
Factor => Funcall | Var | Num | [(] Exp [)]
Funcall => Var [(] Exps [)]
Exps => Exp [,] Exps | Exp
Var => [a-zA-Z_]\w*
Num => [-+]?[0-9]+([.][0-9]*)?
""")
>>> parse('Exp', '2+3', G)
(['Exp', ['Term', ['Factor', ['Num', '2']]], '+', ['Exp', ['Term', ['Factor', ['Num', '3']]]]], '')
- Produces a concrete parse tree
- Error reporting
- Missing 'not' operator
from peglet import *
from operator import *
def funcall(f, *args): return f(*args)
decimal = int
parse = Parser(r"""
exp term \+ exp :add
exp term - exp :sub
exp term
term factor \* term :mul
term factor / term :div
term factor
factor var :eval [(] exps [)] :funcall
factor var
factor num
factor [(] exp [)]
exps exp , exps
exps exp
var ([a-zA-Z_]\w*)
num ([-+]?[0-9]+(?:[.][0-9]*)?) :decimal
""", **globals())
>>> parse('2+3')
(5,)
- A bit over the 1-page target (not counting comments)