Last active
May 25, 2019 00:01
-
-
Save kodo-pp/092699e922912f8e7c737cb9737f99c3 to your computer and use it in GitHub Desktop.
A simple bottom-up "parser". I may use this script in my future projects. Licensed under CC0 (Public Domain)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from silly_parser import * | |
adv = Terminal('ADV') | |
v = Terminal('V') | |
adj = Terminal('ADJ') | |
n = Terminal('N') | |
vp = adv.many().optional() + v | |
np = adj.many().optional() + n | |
grammar = [('VP', vp), ('NP', np)] | |
items = [ | |
('He', 'PRP'), | |
('thought', 'V'), | |
('that', 'ETC'), | |
('he', 'PRP'), | |
('suddenly', 'ADV'), | |
('ran', 'V'), | |
('into', 'IN'), | |
('some', 'ADJ'), | |
('big', 'ADJ'), | |
('trouble', 'N'), | |
] | |
print(parse(grammar, items, mapper=tag, name_mapper=tag)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
identity = lambda x: x | |
tag = lambda x: x.name if isinstance(x, Parsed) else x[1] | |
idnm = lambda x: 'id' | |
class Terminal: | |
is_simple = True | |
def __init__(self, value): | |
self.value = value | |
def __add__(self, other): | |
if other.is_simple: | |
return Concat([self, other]) | |
else: | |
return Concat([self] + other.items) | |
def many(self): | |
return Many(self) | |
def optional(self): | |
return Optional(self) | |
def match(self, items, mapper=identity): | |
if len(items) < 1 or mapper(items[0]) != self.value: | |
return None | |
return 1 | |
class RegexTerminal(Terminal): | |
is_simple = True | |
def __init__(self, value): | |
self.value = value | |
def match(self, items, mapper=identity): | |
if len(items) < 1 or re.match(self.value, mapper(items[0])) is None: | |
return None | |
return 1 | |
class NonTerminal: | |
is_simple = True | |
def __add__(self, other): | |
if other.is_simple: | |
return Concat([self, other]) | |
else: | |
return Concat([self] + other.items) | |
def many(self): | |
return Many(self) | |
def optional(self): | |
return Optional(self) | |
class Many(NonTerminal): | |
def __init__(self, item): | |
self.item = item | |
def match(self, items, mapper=identity): | |
n = 0 | |
while len(items) > 0: | |
k = self.item.match(items, mapper) | |
if k is None: | |
break | |
items = items[k:] | |
n += k | |
return None if n == 0 else n | |
class Optional(NonTerminal): | |
def __init__(self, item): | |
self.item = item | |
def match(self, items, mapper=identity): | |
n = self.item.match(items, mapper) | |
return 0 if n is None else n | |
class Concat(NonTerminal): | |
is_simple = False | |
def __init__(self, items): | |
self.items = items | |
def match(self, items, mapper=identity): | |
n = 0 | |
for it in self.items: | |
k = it.match(items, mapper) | |
if k is None: | |
return None | |
items = items[k:] | |
n += k | |
return n | |
def __add__(self, item): | |
return Concat(self.items + [item]) | |
class Parsed: | |
def __init__(self, name, value): | |
self.name = name | |
self.value = value | |
def __repr__(self): | |
return f'Parsed({self.name}, {self.value})' | |
def parse(grammar, items, mapper=identity, name_mapper=idnm): | |
items = map_parsed(items, mapper, name_mapper) | |
while True: | |
changed = False | |
for i in range(len(items)): | |
res = match(grammar, items[i:], mapper) | |
if res is not None: | |
name, count = res | |
tmp = items[i : i + count] | |
del items[i : i + count] | |
items.insert(i, Parsed(name, tmp)) | |
changed = True | |
break | |
if not changed: | |
return map_parsed(items, mapper, name_mapper) | |
def map_parsed(items, mapper, name_mapper): | |
return [x if isinstance(x, Parsed) else Parsed(name_mapper(x), [x]) for x in items] | |
def match(grammar, items, mapper): | |
for lhs, rhs in grammar: | |
m = rhs.match(items, mapper) | |
if m is not None: | |
return lhs, m |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment