Skip to content

Instantly share code, notes, and snippets.

@jeacom25b
Last active April 17, 2020 23:33
Show Gist options
  • Save jeacom25b/ab3d6d330ffb6c4cb5f26f664c039acf to your computer and use it in GitHub Desktop.
Save jeacom25b/ab3d6d330ffb6c4cb5f26f664c039acf to your computer and use it in GitHub Desktop.
A parser combinator prototype.
import re
class Color:
"""
colors defined as escape sequences for fancy output.
easy to use but dont work on all terminals
https://en.wikipedia.org/wiki/ANSI_escape_code
"""
@classmethod
def enable(cls):
cls.red = "\u001b[31m"
cls.yellow = "\u001b[38;5;221m"
cls.pink = "\u001b[38;5;213m"
cls.cyan = "\u001b[38;5;38m"
cls.green = "\u001b[38;5;112m"
cls.reset = "\u001b[0m"
@classmethod
def disable(cls):
cls.red = ""
cls.yellow = ""
cls.pink = ""
cls.cyan = ""
cls.green = ""
cls.reset = ""
Color.enable()
def basic_error(index):
return f"Coudn't find match at index {index}"
def parser_ensure(*args):
for arg in args:
if isinstance(arg, str):
yield String(arg).tag(arg)
elif isinstance(arg, BaseParser):
yield arg
else:
raise ValueError(f"{arg} is not a valid parser")
class ParserResult:
def __init__(self, result=None, new_index=0, error=None, tag=None):
self.result = result
self.new_index = new_index
self.error = error
self.tag = tag
def tag_set(self, tag):
self.tag = tag
def result_set(self, r):
self.result = r
def index_set(self, i, j):
self.index = i
def error_set(self, e):
self.error = e
def __getitem__(self, index):
return self.result[index]
def __setitem__(self, index, val):
self.result[index] = val
def deep_repr(self, indent_lvl=0):
tag = "" if self.tag is None else self.tag
indent = " " * indent_lvl
if self.error:
return f"{indent}{Color.red}E: {self.error}{Color.reset}"
elif isinstance(self.result, (list, tuple)):
out_lines = [f"{indent}{Color.green}{tag}:{Color.reset}"]
for item in self.result:
if isinstance(item, ParserResult):
out_lines.append(item.deep_repr(indent_lvl + 4))
else:
out_lines.append(repr(item))
return "\n".join(out_lines)
else:
return f"{indent}{Color.green}{tag}: {Color.pink}[{repr(self.result)}{Color.pink}]{Color.reset}"
def __repr__(self):
return self.deep_repr()
class BaseParser:
def run(self, string, context=None, index=0):
raise NotImplemented()
def sep_by(self, by, min_matches=0, max_matches=float("inf")):
return Separated(self, by=by, min_matches=min_matches, max_matches=max_matches)
def between(self, left, right=None):
if not right:
right = left
return Between(left, self, right)
def around(self, mid):
return Around(self, mid, self)
def to_left_of(self, x):
def tr(r):
r.tag_set(r[0].tag)
r.result_set(r[0].result)
return Sequence(self, x).transform(tr)
def to_right_of(self, x):
def tr(r):
r.tag_set(r[1].tag)
r.result_set(r[1].result)
return Sequence(x, self).transform(tr)
def tag(self, tag):
return Tag(self, tag)
def transform(self, func=lambda x: x):
return Transform(self, func)
def atleast(self, n):
return Many(self, min_matches=n)
def atmost(self, n):
return Many(self, max_matches=n)
def exactly(self, n):
return Many(self, max_matches=n, min_matches=n)
def __getitem__(self, key):
if isinstance(key, tuple):
if len(key) == 2:
a, b = key
if isinstance(a, int) and b == Ellipsis:
return self.atleast(a)
elif isinstance(b, int) and a == Ellipsis:
return self.atmost(b)
elif isinstance(a, int) and isinstance(b, int):
return Many(self, min_matches=a, max_matches=b)
elif isinstance(a, BaseParser):
if isinstance(b, int):
return self.sep_by(a, min_matches=b, max_matches=b)
elif b == Ellipsis:
return self.sep_by(a)
elif len(key) == 3:
a, b, c = key
if isinstance(a, BaseParser):
if isinstance(b, int) and isinstance(c, int):
return self.sep_by(a, min_matches=b, max_matches=c)
elif isinstance(b, int) and c == Ellipsis:
return self.sep_by(a, min_matches=b)
elif isinstance(c, int) and b == Ellipsis:
return self.sep_by(a, max_matches=c)
elif isinstance(key, int):
return self.exactly(key)
elif key == Ellipsis:
return Many(self)
raise ValueError(f"Invalid quantifier {key}")
def __floordiv__(self, other):
return self.tag(other)
def __or__(self, other):
return Either(self, other)
def __add__(self, other):
return Sequence(self, other)
def __radd__(self, other):
return Sequence(other, self)
def __ge__(self, other):
return self.tag(other)
def __rshift__(self, other):
if not isinstance(other, BaseParser):
other = String(other)
return other.to_right_of(self)
def __rrshift__(self, other):
return self.to_right_of(String(other))
def __lshift__(self, other):
return self.to_left_of(other)
def __rlshift__(self, other):
return String(other).to_left_of(self)
class ParserRecursionPromisses:
def __init__(self):
self._definitions = {}
def __setattr__(self, name, val):
if name == "_definitions":
super().__setattr__("_definitions", val)
else:
self._definitions[name] = val
def __getattr__(self, name):
if name == "_definitions":
return object.__getattr__("_definitions")[name]
else:
return _ThePromissedParser(self._definitions, name)
class _ThePromissedParser(BaseParser):
def __init__(self, definitions, parser_name):
self._definitions = definitions
self.parser_name = parser_name
def run(self, string, context=None, index=0, ):
if self.parser_name in self._definitions:
parser = self._definitions[self.parser_name]
if not isinstance(parser, BaseParser):
raise ValueError(f"{parser} is not a parser")
return parser.run(string, context, index)
else:
raise ValueError(
f"Parser {repr(self.parser_name)} promissed but not defined")
class Transform(BaseParser):
def __init__(self, parser, func=lambda x: None):
self.parser, = parser_ensure(parser)
self.func = func
def run(self, string, context=None, index=0):
result = self.parser.run(string, context, index)
if not result.error:
self.func(result)
return result
def __repr__(self):
return f"Transform({self.parser}, {self.func})"
class Tag(BaseParser):
def __init__(self, parser, tag):
self.parser, = parser_ensure(parser)
self.tag = tag
def run(self, string, context=None, index=0):
result = self.parser.run(string, context, index)
if not result.error:
result.tag = self.tag
return result
def __repr__(self):
return f"Tag({self.parser}, {repr(self.tag)})"
class Chain(BaseParser):
def __init__(self, parser, func):
self.parser, = parser_ensure(parser)
self.func = func
def run(self, string, context=None, index=0):
result = self.parser.run(string, context, index)
if result.error:
return result
new_parser = self.func(result)
if isinstance(new_parser, BaseParser):
new_result = new_parser.run(
string, context, result.new_index)
if new_result.error:
return new_result
return ParserResult(result=[result, new_result],
new_index=new_result.new_index,
error=None)
return result
def __repr__(self):
return f"Chain({self.parser}, {self.func})"
class Sequence(BaseParser):
def __init__(self, *args):
self.items = list(parser_ensure(*args))
def run(self, string, context=None, index=0):
matches = []
for item in self.items:
result = item.run(string, context, index)
if not result.error:
matches.append(result)
index = result.new_index
else:
return result
return ParserResult(result=matches,
new_index=index,
error=None)
def __add__(self, other):
return Sequence(*self.items, other)
def __repr__(self):
return f"Sequence({', '.join(map(repr, self.items))})"
class Around(BaseParser):
def __init__(self, left, mid, right):
self.left, self.mid, self.right = parser_ensure(left, mid, right)
def run(self, string, context=None, index=0):
lresult = self.left.run(string, context, index)
if lresult.error:
return lresult
mid_result = self.mid.run(
string, context, lresult.new_index)
if mid_result.error:
return mid_result
rresult = self.right.run(
string, context, mid_result.new_index)
if rresult.error:
return rresult
else:
return ParserResult(result=[lresult, rresult],
new_index=rresult.new_index,
error=None)
def __repr__(self):
return f"Around({self.left}, {self.mid}, {self.right})"
class Between(BaseParser):
def __init__(self, left, mid, right):
self.left, self.mid, self.right = parser_ensure(left, mid, right)
def run(self, string, context=None, index=0):
result = self.left.run(string, context, index)
if result.error:
return result
mid_result = self.mid.run(
string, context, result.new_index)
if mid_result.error:
return mid_result
result = self.right.run(
string, context, mid_result.new_index)
if result.error:
return result
else:
mid_result.new_index = result.new_index
return mid_result
def __repr__(self):
return f"Between({self.left}, {self.mid}, {self.right})"
class Separated(BaseParser):
def __init__(self, parser, by, min_matches=1, max_matches=None):
if max_matches is None:
max_matches = float("inf")
self.parser, self.sep_parser = parser_ensure(parser, by)
self.min_matches = min_matches
self.max_matches = max_matches
def run(self, string, context=None, index=0):
matches = []
while True:
result = self.parser.run(string, context, index)
if not result.error:
matches.append(result)
else:
break
result = self.sep_parser.run(
string, context, result.new_index)
if result.error or len(matches) == self.max_matches:
break
index = result.new_index
if len(matches) < self.min_matches:
return result
else:
return ParserResult(result=matches,
new_index=result.new_index,
error=None)
def __repr__(self):
return f"Separated({self.parser}, {self.sep}, {self.min_matches}, {self.max_matches})"
class Either(BaseParser):
def __init__(self, *args):
self.parsers = []
for parser in parser_ensure(*args):
if type(parser) == Either:
self.parsers.extend(parser.parsers)
else:
self.parsers.append(parser)
def run(self, string, context=None, index=0):
errors = []
for parser in self.parsers:
result = parser.run(string, context, index)
if not result.error:
return result
else:
errors.append(result)
else:
return ParserResult(result=errors,
new_index=index,
error=errors)
def __repr__(self):
return f"Either({', '.join(map(repr, self.parsers))})"
class Many(BaseParser):
def __init__(self, parser, min_matches=0, max_matches=None):
if max_matches is None:
max_matches = float("inf")
self.min_matches = min_matches
self.max_matches = max_matches
self.parser, = parser_ensure(parser)
def run(self, string, context=None, index=0):
matches = []
while True:
result = self.parser.run(string, context, index)
if not result.error:
matches.append(result)
index = result.new_index
else:
if self.min_matches <= len(matches) <= self.max_matches:
return ParserResult(result=matches,
new_index=index,
error=None)
else:
return result
if len(matches) == self.max_matches:
return ParserResult(result=matches,
new_index=index,
error=None)
def __repr__(self):
return f"Many({self.parser}, {self.min_matches}, {self.max_matches})"
class Regex(BaseParser):
def __init__(self, x):
self.regex = re.compile(x)
self.x = x
def run(self, string, context=None, index=0):
match = self.regex.match(string, index)
if match:
return ParserResult(result=match[0],
new_index=len(match[0]) + index,
error=None)
else:
return ParserResult(result=None,
new_index=index,
error=basic_error(index))
def __repr__(self):
return f"Regex({repr(self.x)})"
class String(BaseParser):
def __init__(self, x):
if x == "":
raise ValueError("String(x) arg 1 cannot be empty string")
self.x = x
def run(self, string, context=None, index=0):
string_slice = string[index:index + len(self.x)]
if string_slice == self.x:
return ParserResult(result=self.x,
new_index=index + len(self.x),
error=None)
else:
return ParserResult(result=None,
new_index=index,
error=basic_error(index))
def __repr__(self):
return f"String({repr(self.x)})"
if __name__ == "__main__":
print("Running parsers.py demo \n\n")
id = Regex("[A-Za-z_]+[A-Za-z0-9_]*")
whitespace = Regex("[ \t]*")
comma = Between(whitespace, ",", whitespace)
open_bracket = Between(whitespace, "[", whitespace)
close_bracket = Between(whitespace, "]", whitespace)
number = Regex(r"\d+").tag("number")
pr = ParserRecursionPromisses()
pr.array = Between(open_bracket,
Separated(Either(number,
pr.array),
by=comma),
close_bracket).tag("array")
pr.value = Either(number, pr.array)
pr.assignemnt = Transform(Sequence(id.tag("variable"),
whitespace,
"=",
whitespace,
pr.value
),
lambda r: r.result_set(
[r.result[0], r.result[4]])
).tag("assignemnt")
print(pr.assignemnt.run("asd = [1, [5, 4], 2]"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment