Created
February 16, 2024 22:42
-
-
Save philzook58/f26010c6484bc42910da3ab4df0c9bea to your computer and use it in GitHub Desktop.
A simpler sexp parser using regex.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
# \s* is whitespace follow by digits or symbol or ( or ) | |
token_pattern = re.compile(r"\s*(?:(\d+)|([A-Za-z\-!=\+\*\_<>]+[A-Za-z0-9\-!=\+\*\_<>]*)|(\()|(\)))") | |
def tokenize(s): | |
for match in token_pattern.finditer(s): | |
yield match.groups() | |
def parse_expression(iterator): | |
"""Parse an expression from the token iterator.""" | |
items = [] | |
for number, symbol, lparen, rparen in iterator: | |
if lparen: | |
items.append(parse_expression(iterator)) | |
elif rparen: | |
return items | |
elif number: | |
items.append(int(number)) | |
elif symbol: | |
items.append(symbol) | |
else: | |
raise SyntaxError("Unexpected token") | |
return items | |
def parse_sexp(s): | |
"""Parse an S-expression from a string.""" | |
tokens = tokenize(s) | |
try: | |
# The outermost list is not required for a valid S-expression, | |
# so we extract the single expression inside it. | |
result = parse_expression(tokens) | |
# Check for trailing tokens | |
for _ in tokens: | |
raise SyntaxError("Trailing tokens") | |
return result | |
except StopIteration: | |
raise SyntaxError("Unexpected end of input") | |
# Example usage | |
s_expression = "(define square (lambda (x) (* x x)))" | |
parsed = parse_sexp(s_expression) | |
print(parsed) | |
# possibly I could make this a generator that streams? | |
parse_sexp("1 2 4 (foo bar) (8 9 (biz baz))") | |
parse_sexp("(define-const x!8 Int)") | |
parse_sexp(""" | |
(define-const x Int) | |
(assert (= x 3)) | |
(assert (>= x 8)) | |
(check-sat) | |
(push) | |
(pop) | |
() | |
""") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment