Skip to content

Instantly share code, notes, and snippets.

@kodo-pp
Last active May 25, 2019 00:01
Show Gist options
  • Save kodo-pp/092699e922912f8e7c737cb9737f99c3 to your computer and use it in GitHub Desktop.
Save kodo-pp/092699e922912f8e7c737cb9737f99c3 to your computer and use it in GitHub Desktop.
A simple bottom-up "parser". I may use this script in my future projects. Licensed under CC0 (Public Domain)
from silly_parser import *
adv = Terminal('ADV')
v = Terminal('V')
adj = Terminal('ADJ')
n = Terminal('N')
vp = adv.many().optional() + v
np = adj.many().optional() + n
grammar = [('VP', vp), ('NP', np)]
items = [
('He', 'PRP'),
('thought', 'V'),
('that', 'ETC'),
('he', 'PRP'),
('suddenly', 'ADV'),
('ran', 'V'),
('into', 'IN'),
('some', 'ADJ'),
('big', 'ADJ'),
('trouble', 'N'),
]
print(parse(grammar, items, mapper=tag, name_mapper=tag))
import re
identity = lambda x: x
tag = lambda x: x.name if isinstance(x, Parsed) else x[1]
idnm = lambda x: 'id'
class Terminal:
is_simple = True
def __init__(self, value):
self.value = value
def __add__(self, other):
if other.is_simple:
return Concat([self, other])
else:
return Concat([self] + other.items)
def many(self):
return Many(self)
def optional(self):
return Optional(self)
def match(self, items, mapper=identity):
if len(items) < 1 or mapper(items[0]) != self.value:
return None
return 1
class RegexTerminal(Terminal):
is_simple = True
def __init__(self, value):
self.value = value
def match(self, items, mapper=identity):
if len(items) < 1 or re.match(self.value, mapper(items[0])) is None:
return None
return 1
class NonTerminal:
is_simple = True
def __add__(self, other):
if other.is_simple:
return Concat([self, other])
else:
return Concat([self] + other.items)
def many(self):
return Many(self)
def optional(self):
return Optional(self)
class Many(NonTerminal):
def __init__(self, item):
self.item = item
def match(self, items, mapper=identity):
n = 0
while len(items) > 0:
k = self.item.match(items, mapper)
if k is None:
break
items = items[k:]
n += k
return None if n == 0 else n
class Optional(NonTerminal):
def __init__(self, item):
self.item = item
def match(self, items, mapper=identity):
n = self.item.match(items, mapper)
return 0 if n is None else n
class Concat(NonTerminal):
is_simple = False
def __init__(self, items):
self.items = items
def match(self, items, mapper=identity):
n = 0
for it in self.items:
k = it.match(items, mapper)
if k is None:
return None
items = items[k:]
n += k
return n
def __add__(self, item):
return Concat(self.items + [item])
class Parsed:
def __init__(self, name, value):
self.name = name
self.value = value
def __repr__(self):
return f'Parsed({self.name}, {self.value})'
def parse(grammar, items, mapper=identity, name_mapper=idnm):
items = map_parsed(items, mapper, name_mapper)
while True:
changed = False
for i in range(len(items)):
res = match(grammar, items[i:], mapper)
if res is not None:
name, count = res
tmp = items[i : i + count]
del items[i : i + count]
items.insert(i, Parsed(name, tmp))
changed = True
break
if not changed:
return map_parsed(items, mapper, name_mapper)
def map_parsed(items, mapper, name_mapper):
return [x if isinstance(x, Parsed) else Parsed(name_mapper(x), [x]) for x in items]
def match(grammar, items, mapper):
for lhs, rhs in grammar:
m = rhs.match(items, mapper)
if m is not None:
return lhs, m
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment