Skip to content

Instantly share code, notes, and snippets.

@asukaminato0721
Created December 1, 2023 19:15
Show Gist options
  • Save asukaminato0721/e94b0230678599b33cce25b029ca5f76 to your computer and use it in GitHub Desktop.
Save asukaminato0721/e94b0230678599b33cce25b029ca5f76 to your computer and use it in GitHub Desktop.
"""
learned from https://www.sitepen.com/blog/unlocking-the-power-of-parser-combinators-a-beginners-guide
author: asuka minato
"""
from __future__ import annotations
import string as S
from dataclasses import dataclass, field
from typing import Callable, Dict, Iterable
@dataclass
class Result:
success: bool
value: str = ""
rest: str = ""
captures: Dict[str, str] = field(default_factory=dict)
Combinator = Callable[[str], Result]
@dataclass
class Char:
c: str
def __call__(self, other: str) -> Result:
if len(other) == 0:
return Result(False)
if self.c == other[0]:
return Result(True, self.c, other[1:])
return Result(False)
@dataclass
class Either:
combinators: Iterable[Combinator]
def __call__(self, s: str) -> Result:
for x in self.combinators:
result = x(s)
if result.success:
return result
return Result(False)
assert Char("c")("c").success
assert not Char("c")("b").success
assert Either([Char("a"), Char("b")])("a").success
assert Either([Char("a"), Char("b")])("b").success
assert not Either([Char("a"), Char("b")])("d").success
digit = Either([Char(x) for x in "1234567890"])
hexDigit = Either([digit, *[Char(x) for x in "abcdefABCDEF"]])
assert digit("1").success
assert not digit("a").success
assert hexDigit("C").success
assert not hexDigit("G").success
@dataclass
class Sequence:
combinators: Iterable[Combinator]
def __call__(self, s: str) -> Result:
rest = s
value = ""
captures: Dict[str, str] = dict()
for i in self.combinators:
result = i(rest)
if result.success:
rest = result.rest
value += result.value
captures = {**captures, **result.captures}
else:
return Result(False)
return Result(True, value, rest, captures)
assert Sequence([Char("a"), Char("b")])("abcdef") == Result(
success=True, value="ab", rest="cdef"
)
def string(s: str):
return Sequence([Char(x) for x in s])
assert string("abc")("abcdef").success
assert not string("abd")("abcd").success
@dataclass
class nOrMore:
n: int
comb: Combinator
def __call__(self, s: str) -> Result:
matches = 0
rest = s
val = ""
capture: Dict[str, str] = {}
while True:
result = self.comb(rest)
if result.success:
matches += 1
val += result.value
rest = result.rest
capture = {
**capture,
**result.captures,
}
continue
break
if matches >= self.n:
return Result(True, val, rest, capture)
return Result(False)
@dataclass
class Optional:
c: Combinator
def __call__(self, s: str) -> Result:
result = self.c(s)
if result.success:
return result
return Result(True, "", s)
hexNumber = Sequence([string("0x"), nOrMore(1, hexDigit)])
assert hexNumber("0x1afb").success
print(hexNumber("0x1afb"))
integer = nOrMore(1, digit)
floatingPoint = Sequence([integer, Char("."), integer])
number = Either([floatingPoint, integer])
assert number("3.14").success
print(number("3.14"))
@dataclass
class capture:
name: str
comb: Combinator
map_: Callable[[str], str] = lambda x: x
def __call__(self, s: str) -> Result:
result = self.comb(s)
if result.success:
return Result(
success=result.success,
value=result.value,
rest=result.rest,
captures={**result.captures, self.name: self.map_(result.value)},
)
return result
@dataclass
class map_:
comb: Combinator
m: Callable[[Result], Result]
def __call__(self, s: str) -> Result:
result = self.comb(s)
if result.success:
return self.m(result)
return result
plus = Sequence([capture("left", number), Char("+"), capture("right", number)])
print(plus("1+1"))
letter = Either([capture("c", Char(x)) for x in S.ascii_letters])
begin = capture("begin", nOrMore(3, Either(Char(x) for x in S.ascii_uppercase)))
number_capture = capture("number", number)
ans = Sequence([begin, Char("-"), number_capture])
print(ans("IPX-177"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment