zehnpaard/llast.py

## llast.py
from dataclasses import dataclass

@dataclass
class Alpha:
    s : str

@dataclass
class Num:
    n : int

@dataclass
class Cons:
    car : any
    cdr : any

## llparser.py
import string

import lltokens as t
import llast as a

def lex(s):
    s = s.replace("(", " ( ").replace(")", " ) ").replace(".", " . ")
    for token in s.split():
        match token:
            case "(":
                yield t.Lparen()
            case ")":
                yield t.Rparen()
            case ".":
                yield t.Dot()
            case _ if all((c in string.ascii_letters) for c in token):
                yield t.Alpha(token)
            case _ if all((c in string.digits) for c in token):
                yield t.Num(int(token))
            case _:
                raise ValueError(f"Unable to lex {token}")

def parse(s):
    tokens = list(lex(s))[::-1]
    return parse_exp(tokens)

def parse_exp(tokens):
    match tokens.pop():
        case t.Lparen():
            return parse_list(tokens)
        case t.Num(n):
            return a.Num(n)
        case t.Alpha(s):
            return a.Alpha(s)
        case token:
            raise ValueError(f"Unexpected token {token} at start of expression")

def parse_list(tokens):
    items = []
    tail = a.Alpha("NIL")

    while tokens[-1] not in (t.Dot(), t.Rparen()):
        items.append(parse_exp(tokens))

    if tokens[-1] == t.Dot():
        tokens.pop()
        tail = parse_exp(tokens)
        if tokens[-1] != t.Rparen():
            raise ValueError(f"Unexpected token {tokens[-1]} found at end of dotted list")
    tokens.pop()

    for item in reversed(items):
        tail = a.Cons(item, tail)
    return tail

## lltokens.py
from dataclasses import dataclass

@dataclass
class Lparen:
    pass

@dataclass
class Rparen:
    pass

@dataclass
class Dot:
    pass

@dataclass
class Alpha:
    s : str

@dataclass
class Num:
    n : int

## test_llparser.py
import llparser as p
import lltokens as t
import llast as a

def test_lexer():
    assert list(p.lex("()")) == [t.Lparen(), t.Rparen()]
    assert list(p.lex("(abc)")) == [t.Lparen(), t.Alpha("abc"), t.Rparen()]
    assert list(p.lex("( abc) ")) == [t.Lparen(), t.Alpha("abc"), t.Rparen()]
    assert list(p.lex("(abc 2)")) == [t.Lparen(), t.Alpha("abc"), t.Num(2),  t.Rparen()]
    assert list(p.lex("(abc 2 . ghi )")) == [t.Lparen(), t.Alpha("abc"), t.Num(2),  t.Dot(), t.Alpha("ghi"), t.Rparen()]
    assert list(p.lex("(()()(")) == [t.Lparen(), t.Lparen(), t.Rparen(), t.Lparen(), t.Rparen(), t.Lparen()]

def test_parser():
    assert p.parse("()") == a.Alpha("NIL")
    assert p.parse("(abc)") == a.Cons(a.Alpha("abc"), a.Alpha("NIL"))
    assert p.parse("(abc 2 . ghi )") == a.Cons(a.Alpha("abc"), a.Cons(a.Num(2), a.Alpha("ghi")))
    assert p.parse("(abc 2 . (ghi) )") == a.Cons(a.Alpha("abc"), a.Cons(a.Num(2), a.Cons(a.Alpha("ghi"), a.Alpha("NIL"))))
	from dataclasses import dataclass

	@dataclass
	class Alpha:
	s : str

	@dataclass
	class Num:
	n : int

	@dataclass
	class Cons:
	car : any
	cdr : any
	import string

	import lltokens as t
	import llast as a

	def lex(s):
	s = s.replace("(", " ( ").replace(")", " ) ").replace(".", " . ")
	for token in s.split():
	match token:
	case "(":
	yield t.Lparen()
	case ")":
	yield t.Rparen()
	case ".":
	yield t.Dot()
	case _ if all((c in string.ascii_letters) for c in token):
	yield t.Alpha(token)
	case _ if all((c in string.digits) for c in token):
	yield t.Num(int(token))
	case _:
	raise ValueError(f"Unable to lex {token}")

	def parse(s):
	tokens = list(lex(s))[::-1]
	return parse_exp(tokens)

	def parse_exp(tokens):
	match tokens.pop():
	case t.Lparen():
	return parse_list(tokens)
	case t.Num(n):
	return a.Num(n)
	case t.Alpha(s):
	return a.Alpha(s)
	case token:
	raise ValueError(f"Unexpected token {token} at start of expression")

	def parse_list(tokens):
	items = []
	tail = a.Alpha("NIL")

	while tokens[-1] not in (t.Dot(), t.Rparen()):
	items.append(parse_exp(tokens))

	if tokens[-1] == t.Dot():
	tokens.pop()
	tail = parse_exp(tokens)
	if tokens[-1] != t.Rparen():
	raise ValueError(f"Unexpected token {tokens[-1]} found at end of dotted list")
	tokens.pop()

	for item in reversed(items):
	tail = a.Cons(item, tail)
	return tail
	from dataclasses import dataclass

	@dataclass
	class Lparen:
	pass

	@dataclass
	class Rparen:
	pass

	@dataclass
	class Dot:
	pass

	@dataclass
	class Alpha:
	s : str

	@dataclass
	class Num:
	n : int
	import llparser as p
	import lltokens as t
	import llast as a

	def test_lexer():
	assert list(p.lex("()")) == [t.Lparen(), t.Rparen()]
	assert list(p.lex("(abc)")) == [t.Lparen(), t.Alpha("abc"), t.Rparen()]
	assert list(p.lex("( abc) ")) == [t.Lparen(), t.Alpha("abc"), t.Rparen()]
	assert list(p.lex("(abc 2)")) == [t.Lparen(), t.Alpha("abc"), t.Num(2), t.Rparen()]
	assert list(p.lex("(abc 2 . ghi )")) == [t.Lparen(), t.Alpha("abc"), t.Num(2), t.Dot(), t.Alpha("ghi"), t.Rparen()]
	assert list(p.lex("(()()(")) == [t.Lparen(), t.Lparen(), t.Rparen(), t.Lparen(), t.Rparen(), t.Lparen()]

	def test_parser():
	assert p.parse("()") == a.Alpha("NIL")
	assert p.parse("(abc)") == a.Cons(a.Alpha("abc"), a.Alpha("NIL"))
	assert p.parse("(abc 2 . ghi )") == a.Cons(a.Alpha("abc"), a.Cons(a.Num(2), a.Alpha("ghi")))
	assert p.parse("(abc 2 . (ghi) )") == a.Cons(a.Alpha("abc"), a.Cons(a.Num(2), a.Cons(a.Alpha("ghi"), a.Alpha("NIL"))))