Skip to content

Instantly share code, notes, and snippets.

@CurtisFenner
Last active June 27, 2021 02:36
Show Gist options
  • Save CurtisFenner/c0807dee1132661ae0dcc7284a9e3f85 to your computer and use it in GitHub Desktop.
Save CurtisFenner/c0807dee1132661ae0dcc7284a9e3f85 to your computer and use it in GitHub Desktop.
import re
class ParseResult:
"""
Represents the result of parsing an input.
`object` is the thing that was parsed from the beginning of the input.
`rest` is everything that follows the `object` within the input.
"""
def __init__(self, object, rest):
self.object = object
self.rest = rest
def __str__(self):
return "ParseResult(object=" + str(self.object) + ", rest=" + repr(self.rest) + ")"
class RegexParser:
"""
RegexParser uses a regex to parse from an input.
The object that is parsed is the regex groups, followed by the entire match,
as a tuple.
"""
def __init__(self, regex):
self.regex = regex
def parse(self, string: str, context) -> ParseResult:
match = re.match(self.regex, string)
if not match:
return None
return ParseResult(match.groups() + (match.group(),), string[match.end():])
class SeqParser:
"""
SeqParser parses a sequence of several sub-parsers, stored in a dict.
The parsed object is a dict, where each sub-parser's result was placed into
the corresponding key in the output dict.
"""
def __init__(self, parsers: dict):
self.parsers = parsers
def parse(self, string: str, context) -> ParseResult:
record = {}
rest = string
for key, parser in self.parsers.items():
sub_result = parser.parse(rest, context)
if not sub_result:
return None
record[key] = sub_result.object
rest = sub_result.rest
return ParseResult(record, rest)
class ChoiceParser:
"""
ChoiceParser parses one of several possible sub-parsers. ("ordered choice")
The object from the first sub-parser to successfully parse is returned.
"""
def __init__(self, *choices):
self.choices = choices
def parse(self, s, context) -> ParseResult:
for choice in self.choices:
sub_result = choice.parse(s, context)
if sub_result:
return sub_result
return None
class NameParser:
"""
NameParser refers to a parser defined within a containing `GrammarParser`.
"""
def __init__(self, name):
self.name = name
def parse(self, s, context) -> ParseResult:
return context["names"][self.name].parse(s, context)
class StarParser:
"""
StarParser parses zero-or-more occurences of some sub-parser.
("Kleene-star")
The list of parsed objects is returned.
"""
def __init__(self, p):
self.p = p
def parse(self, string: str, context) -> ParseResult:
objects = []
rest = string
while True:
sub_result = self.p.parse(rest, context)
if not sub_result:
return ParseResult(objects, rest)
objects.append(sub_result.object)
rest = sub_result.rest
class GrammarParser:
"""
GrammarParser allows recursive grammars to be created.
The `grammar` argument is a dict of parsers. Each key of the dict can be
referenced in a `NameParser`. This allows recursive parsing rules.
GrammarParser adds a `"names"` key to the `context` dict. This is where the
`NameParser` will look for definitions.
"""
def __init__(self, initial, grammar: dict):
self.initial = initial
self.grammar = grammar
def parse(self, string: str, context: dict):
context_with_names_map = {**context, "names": self.grammar}
return self.grammar[self.initial].parse(string, context_with_names_map)
################################################################################
# Define token parsers
intParser = RegexParser("[0-9]+")
opParser = RegexParser(r"\s*([/*+-])\s*")
openParser = RegexParser(r"\s*\(\s*")
closeParser = RegexParser(r"\s*\)\s*")
eofParser = RegexParser(r"$")
# Define grammar
grammar = GrammarParser("program", {
"atom": ChoiceParser(intParser, NameParser("parened")),
"parened": SeqParser({
"_open": openParser,
"e": NameParser("exp"),
"_close": closeParser,
}),
"exp": SeqParser({
"lhs": NameParser("atom"),
"ops": StarParser(NameParser("operation")),
}),
"operation": SeqParser({"op": opParser, "rhs": NameParser("atom")}),
"program": SeqParser({"e": NameParser("exp"), "_": eofParser}),
})
# Parse an input
text = " (1 + 2) * (3 + 4) "
ast = grammar.parse(text, {})
print(ast)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment