Skip to content

Instantly share code, notes, and snippets.

@zehnpaard
Last active Sep 18, 2021
Embed
What would you like to do?
Python Implementation of Memory-Efficient Packrat Recursive Descent Parser based loosely on Language Implementation Patterns
list : '[' elements ']';
elements : element (',' element)*;
element : NAME | assign | list | list_assign;
assign : NAME '=' NAME;
list_assign : list '=' list;
NAME : ('a'..'z' | 'A'..'Z')+;
from contextlib import contextmanager
class Peekable:
def __init__(self, input_, k, sentinel=None):
self.sentinel = sentinel
self._k = k
self._stream = iter(input_)
self._peek = [next(self._stream, sentinel) for _ in range(k)]
def __getitem__(self, n):
if isinstance(n, int) and n >= self._k:
raise IndexError(f"Invalid lookahead index {n} on Peekable with k={self._k}")
return self._peek[n]
def __iter__(self):
return self
def __next__(self):
if self._peek[0] == self.sentinel:
raise StopIteration
res = self._peek[0]
self._peek = self._peek[1:]
self._peek.append(next(self._stream, self.sentinel))
return res
class ClearableBuffer:
def __init__(self):
self._buffer = []
self._start = 0
def __getitem__(self, key):
if 0 <= key < self._start:
raise IndexError(f"Invalid access of index {key} for ClearableBuffer starting at {self._start}")
return self._buffer[key - self._start]
def __len__(self):
return self._start + len(self._buffer)
def __repr__(self):
return repr(self._buffer)
def append(self, item):
self._buffer.append(item)
def clear(self):
if not self._buffer:
return
self._start += len(self._buffer) - 1
self._buffer = self._buffer[-1:]
class Backtrackable:
def __init__(self, input_, sentinel=None):
self._stream = iter(input_)
self.sentinel = sentinel
self.i = 0
self.buffer = ClearableBuffer()
self.backtrack_points = []
def __getitem__(self, n):
if isinstance(n, int):
self._fill(n+1)
return self.buffer[self.i + n]
else:
raise IndexError(f"Unsupported operation: Indexing into Backtrackable with {n}")
def __iter__(self):
return self
def __next__(self):
self._fill(1)
res = self.buffer[self.i]
self.i += 1
return res
def _fill(self, n):
for _ in range(self.i + n - len(self.buffer)):
self.buffer.append(next(self._stream, self.sentinel))
@contextmanager
def backtrack_always(self):
self.backtrack_points.append(self.i)
try:
yield
finally:
self.i = self.backtrack_points.pop()
def try_clear(self):
if self.backtrack_points or (self.i < len(self.buffer)-1) or (len(self.buffer) < 2):
return False
self.buffer.clear()
return True
import string
from utils import Peekable
import xtokens as t
def lex(char_iterable):
stream = Peekable(char_iterable, 1)
return _lex(stream)
def _lex(stream):
while True:
match stream[0]:
case stream.sentinel:
yield t.Eof()
break
case '[':
next(stream)
yield t.Lbrack()
case ']':
next(stream)
yield t.Rbrack()
case '=':
next(stream)
yield t.Equal()
case ',':
next(stream)
yield t.Comma()
case c if _is_letter(c):
yield _lex_ident(stream)
case c if c in string.whitespace:
next(stream)
case c:
raise ValueError(f"Invalid character {c}")
def _lex_ident(stream):
cs = []
while _is_letter(stream[0]):
cs.append(next(stream))
return t.Ident(''.join(cs))
def _is_letter(c):
return c in string.ascii_letters
import functools
import xtokens as t
import xlexer as xl
from utils import Backtrackable
_caches = []
def _clear_caches():
for cache in _caches:
cache.clear()
def _buffer_cache_clear(func):
@functools.wraps(func)
def f(tokens):
if tokens.try_clear():
_clear_caches()
func(tokens)
return f
def _memo(func):
d = {}
_caches.append(d)
@functools.wraps(func)
def f(tokens):
match d.get(tokens.i, None):
case int(n):
tokens.i = n
case ValueError() as e:
raise e
case None:
current_pos = tokens.i
try:
func(tokens)
d[current_pos] = tokens.i
except ValueError as e:
d[current_pos] = e
raise
return f
def parse(char_stream):
_clear_caches()
tokens = Backtrackable(xl.lex(char_stream))
_list(tokens)
_match(tokens, t.Eof)
def _test(tokens, f):
with tokens.backtrack_always():
try:
f(tokens)
except ValueError:
return False
return True
@_buffer_cache_clear
@_memo
def _element(tokens):
if _test(tokens, _assign):
_assign(tokens)
elif _test(tokens, _name):
_name(tokens)
elif _test(tokens, _assign_list):
_assign_list(tokens)
elif _test(tokens, _list):
_list(tokens)
else:
raise ValueError(f"Invalid token {tokens[0]}")
@_buffer_cache_clear
@_memo
def _list(tokens):
_match(tokens, t.Lbrack)
_elements(tokens)
_match(tokens, t.Rbrack)
@_buffer_cache_clear
@_memo
def _elements(tokens):
_element(tokens)
while tokens[0] == t.Comma():
next(tokens)
_element(tokens)
@_buffer_cache_clear
@_memo
def _assign(tokens):
_match(tokens, t.Ident)
_match(tokens, t.Equal)
_match(tokens, t.Ident)
@_buffer_cache_clear
@_memo
def _assign_list(tokens):
_list(tokens)
_match(tokens, t.Equal)
_list(tokens)
@_buffer_cache_clear
@_memo
def _name(tokens):
_match(tokens, t.Ident)
def _match(tokens, token_type):
if isinstance(tokens[0], token_type):
next(tokens)
else:
raise ValueError(f"Failed to match {tokens[0]} to type {token_type}")
from dataclasses import dataclass
@dataclass
class Lbrack:
pass
@dataclass
class Rbrack:
pass
@dataclass
class Equal:
pass
@dataclass
class Comma:
pass
@dataclass
class Eof:
pass
@dataclass
class Ident:
s : str
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment