Last active
June 27, 2021 02:36
-
-
Save CurtisFenner/c0807dee1132661ae0dcc7284a9e3f85 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
class ParseResult: | |
""" | |
Represents the result of parsing an input. | |
`object` is the thing that was parsed from the beginning of the input. | |
`rest` is everything that follows the `object` within the input. | |
""" | |
def __init__(self, object, rest): | |
self.object = object | |
self.rest = rest | |
def __str__(self): | |
return "ParseResult(object=" + str(self.object) + ", rest=" + repr(self.rest) + ")" | |
class RegexParser: | |
""" | |
RegexParser uses a regex to parse from an input. | |
The object that is parsed is the regex groups, followed by the entire match, | |
as a tuple. | |
""" | |
def __init__(self, regex): | |
self.regex = regex | |
def parse(self, string: str, context) -> ParseResult: | |
match = re.match(self.regex, string) | |
if not match: | |
return None | |
return ParseResult(match.groups() + (match.group(),), string[match.end():]) | |
class SeqParser: | |
""" | |
SeqParser parses a sequence of several sub-parsers, stored in a dict. | |
The parsed object is a dict, where each sub-parser's result was placed into | |
the corresponding key in the output dict. | |
""" | |
def __init__(self, parsers: dict): | |
self.parsers = parsers | |
def parse(self, string: str, context) -> ParseResult: | |
record = {} | |
rest = string | |
for key, parser in self.parsers.items(): | |
sub_result = parser.parse(rest, context) | |
if not sub_result: | |
return None | |
record[key] = sub_result.object | |
rest = sub_result.rest | |
return ParseResult(record, rest) | |
class ChoiceParser: | |
""" | |
ChoiceParser parses one of several possible sub-parsers. ("ordered choice") | |
The object from the first sub-parser to successfully parse is returned. | |
""" | |
def __init__(self, *choices): | |
self.choices = choices | |
def parse(self, s, context) -> ParseResult: | |
for choice in self.choices: | |
sub_result = choice.parse(s, context) | |
if sub_result: | |
return sub_result | |
return None | |
class NameParser: | |
""" | |
NameParser refers to a parser defined within a containing `GrammarParser`. | |
""" | |
def __init__(self, name): | |
self.name = name | |
def parse(self, s, context) -> ParseResult: | |
return context["names"][self.name].parse(s, context) | |
class StarParser: | |
""" | |
StarParser parses zero-or-more occurences of some sub-parser. | |
("Kleene-star") | |
The list of parsed objects is returned. | |
""" | |
def __init__(self, p): | |
self.p = p | |
def parse(self, string: str, context) -> ParseResult: | |
objects = [] | |
rest = string | |
while True: | |
sub_result = self.p.parse(rest, context) | |
if not sub_result: | |
return ParseResult(objects, rest) | |
objects.append(sub_result.object) | |
rest = sub_result.rest | |
class GrammarParser: | |
""" | |
GrammarParser allows recursive grammars to be created. | |
The `grammar` argument is a dict of parsers. Each key of the dict can be | |
referenced in a `NameParser`. This allows recursive parsing rules. | |
GrammarParser adds a `"names"` key to the `context` dict. This is where the | |
`NameParser` will look for definitions. | |
""" | |
def __init__(self, initial, grammar: dict): | |
self.initial = initial | |
self.grammar = grammar | |
def parse(self, string: str, context: dict): | |
context_with_names_map = {**context, "names": self.grammar} | |
return self.grammar[self.initial].parse(string, context_with_names_map) | |
################################################################################ | |
# Define token parsers | |
intParser = RegexParser("[0-9]+") | |
opParser = RegexParser(r"\s*([/*+-])\s*") | |
openParser = RegexParser(r"\s*\(\s*") | |
closeParser = RegexParser(r"\s*\)\s*") | |
eofParser = RegexParser(r"$") | |
# Define grammar | |
grammar = GrammarParser("program", { | |
"atom": ChoiceParser(intParser, NameParser("parened")), | |
"parened": SeqParser({ | |
"_open": openParser, | |
"e": NameParser("exp"), | |
"_close": closeParser, | |
}), | |
"exp": SeqParser({ | |
"lhs": NameParser("atom"), | |
"ops": StarParser(NameParser("operation")), | |
}), | |
"operation": SeqParser({"op": opParser, "rhs": NameParser("atom")}), | |
"program": SeqParser({"e": NameParser("exp"), "_": eofParser}), | |
}) | |
# Parse an input | |
text = " (1 + 2) * (3 + 4) " | |
ast = grammar.parse(text, {}) | |
print(ast) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment