Skip to content

Instantly share code, notes, and snippets.

@gdevanla
Last active October 27, 2021 15:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gdevanla/12a52739a9c369c20c41aafba5d4e2d5 to your computer and use it in GitHub Desktop.
Save gdevanla/12a52739a9c369c20c41aafba5d4e2d5 to your computer and use it in GitHub Desktop.
Parser Combinator in Python - Notes
import operator
import typing as tp
"""
a. Structure of Parser
p = (s -> Any object)
Partially consuming parser
p1 = (s -> (a, remaining))
b. Build Small parsers
i. Parse any char
ii. Specific char
iii. Only digits
iv. New abstraction
c. Composing the small parsers
i. compose
ii. bind (may be)
d. Parser Combinators
i. chooseP
ii. Parse and eval a simple expression
iii. Support more complicated expression
"""
import operator
import typing as tp
ParserP = tp.Callable[[str], tp.Tuple[tp.Any, str]]
# A simple parser
example_parser = lambda s: (s[0], s[1:])
class ParserError(Exception):
def __init__(self, msg, content):
super().__init__(f"{msg}: {content}")
# Some helper functions
def parse(p: ParserP, s: str) -> tp.Tuple[tp.Any, str]:
(a, s) = p(s)
return (a, s)
# Tiny parsers
def anyChar() -> ParserP:
def func(s):
return (s[0], s[1:])
return func
# Parser for a specific character
def oneChar(c) -> ParserP:
def func(s):
if s[0] == c:
return (s[0], s[1:])
raise ParserError(f"Unexpected {s[0]}, expecting {c}", s)
return func
# Parse for specific digit
def anyDigit() -> ParserP:
def func(s):
if s[0].isdigit():
return (s[0], s[1:])
raise ParserError(f"Expected digit, got {s[0]}", s)
return func
# Time for some abstraction
def oneCharP(c) -> ParserP:
return satisfy(lambda c1: c == c1)
def anyDigitP() -> ParserP:
return satisfy(lambda c: c.isdigit())
# Generic Predicate Parser
def satisfy(pred_function: tp.Callable[["char"], bool]) -> ParserP:
def func(s):
if not s:
raise ParserError("Empty string", "")
if pred_function(s[0]):
return (s[0], s[1:])
raise ParserError(f"Unexpected condition", s)
return func
# Compose parsers
def compose(p1: ParserP, p2: ParserP) -> ParserP:
def func(s):
(a, s1) = parse(p1, s)
(b, s2) = parse(p2, s1)
return ((a, b), s2)
return func
# Other interesting functions
#
# Parse a string
def strP(es: str) -> ParserP:
def func(s):
p = oneCharP(es[0])
for c in es[1:]:
p = compose(p, oneCharP(c))
return p(s)
return func
# 2 + 3 + 4
# def expression():
# parse digit - we can use anyDigit()
# parse operator - we need to be able to choose different options
# parse digit
# A choose parser combinator
def chooseP(p1: ParserP, p2: ParserP) -> ParserP:
def func(s):
try:
return p1(s)
except ParserError:
return p2(s)
return func
# now lets use chooseP
def mathOp(op):
return satisfy(lambda c: c == op)
def mathOpP() -> ParserP:
plus = mathOp("+")
minus = mathOp("-")
mult = mathOp("*")
div = mathOp("/")
return chooseP(plus, chooseP(minus, chooseP(mult, minus)))
# let start writing our expression
def expression_does_not_work():
def func(s):
(digit1, s1) = parse(anyDigitP(), s)
(op, s2) = parse(mathOpP(), s1) # this does not work
(digit2, s3) = parse(anyDigitP(), s2)
return ((int(digit1), op, int(digit2)), s3)
return func
# Introduce the `bind` function
ParserF = tp.Callable[[tp.Any], ParserP]
def bind(p1: ParserP, pf: ParserF) -> ParserP:
def func(s):
(a, s1) = parse(p1, s)
p2 = pf(a)
(b, s2) = parse(p2, s1)
return (b, s2)
return func
def mathOpP1() -> ParserP:
def f(op):
if op == "+":
return operator.add
elif op == "-":
return operator.sub
elif op == "*":
return operator.mul
elif op == "/":
return operator.floordiv
plus = bind(mathOp("+"), lambda a: lambda s: (f(a), s))
minus = bind(mathOp("-"), lambda a: lambda s: (f(a), s))
mult = bind(mathOp("*"), lambda a: lambda s: (f(a), s))
div = bind(mathOp("/"), lambda a: lambda s: (f(a), s))
return chooseP(plus, chooseP(minus, chooseP(mult, div)))
# let start writing our expression
def expression():
def func(s):
(digit1, s1) = parse(anyDigitP(), s)
(op, s2) = parse(mathOpP1(), s1) # this does not work
(digit2, s3) = parse(anyDigitP(), s2)
return (op(int(digit1), int(digit2)), s3)
return func
# need a way to recurse
def expression_or_digit():
return chooseP(expression_2(), anyDigitP())
def expression_2():
def func(s):
(digit1, s1) = parse(anyDigitP(), s)
(op, s2) = parse(mathOpP1(), s1)
(digit2, s3) = parse(expression_or_digit(), s2)
return (op(int(digit1), int(digit2)), s3)
return func
# need more combinators
# def chainl(op: ParserP, base: ParserP):
# def bob(x):
# return chooseP(grab(x), (lambda s: (x, s)))
# def grab(x):
# def func0(s):
# (oper, s1) = op(s)
# (operand2, s2) = base(s1)
# return (bob(oper(x, operand2)), s2)
# return func0
# return bind(base, bob)
# Revisiting strP
# Parse a string
def strP1(es: str) -> ParserP:
def f2(c):
def f(x):
f1 = lambda xs: lambda s: (x + xs, s)
return bind(oneCharP(c), f1)
return f
def func(s):
p = oneCharP(es[0])
for c in es[1:]:
p = bind(p, f2(c))
return p(s)
return func
# Another simple combinator
def manyP(p: ParserP):
def func(s):
result = []
while True:
try:
(a, s) = parse(p, s)
result.append(a)
if not s:
break
except ParserError:
break
return ("".join(result), s)
return func
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment