Skip to content

Instantly share code, notes, and snippets.

@mtasic85
Last active September 5, 2021 12:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mtasic85/d64bafe70f39314dd5d35af58fb868ba to your computer and use it in GitHub Desktop.
Save mtasic85/d64bafe70f39314dd5d35af58fb868ba to your computer and use it in GitHub Desktop.
Python - PEG - Parsing Combinators - Simple JSON parser v1.1
'''
PEG - Parsing Combinators
Example: Simple JSON parser
'''
from typing import Any, Callable
from string import digits, ascii_letters, whitespace
class JsonParser:
'''
VALUE ::= STRING | NUMBER | OBJECT | ARRAY
OBJECT ::= "{" (PAIR ("," PAID)*)? "}"
PAIR ::= STRING ":" VALUE
ARRAY ::= "[" (VALUE ("," VALUE)*)? "]"
'''
def parse(self, text):
stack = []
pos = 0
input_ = text
def skip_whitespace():
nonlocal pos
while pos < len(input_) and input_[pos] in whitespace:
pos += 1
def parse_stringlet():
nonlocal pos
i = pos
if input_[i] != '"':
return False
pos += 1
i += 1
while i < len(input_) and input_[i] != '"':
i += 1
if input_[i] != '"':
return False
stack.append(input_[pos:i])
pos = i + 1
return True
def parse_number():
nonlocal pos
i = pos
while input_[i] in digits:
i += 1
if i == pos:
return False
stack.append(int(input_[pos:i]))
pos = i
return True
def parse_char(c: str):
nonlocal pos
success = input_[pos] == c
if not success:
return False
pos += 1
return True
class Parser:
def parse(self) -> bool:
raise NotImplementedError()
class StringParser(Parser):
def parse(self):
skip_whitespace()
return parse_stringlet()
class CharParser(Parser):
c: str
def __init__(self, c: str):
self.c = c
def parse(self):
skip_whitespace()
return parse_char(self.c)
class NumberParser(Parser):
def parse(self):
skip_whitespace()
return parse_number()
# Combinators
class Sequence(Parser):
children: list[Parser]
def __init__(self, *children):
self.children = children
def parse(self):
nonlocal pos
pos0 = pos
for child in self.children:
if not child.parse():
pos = pos0
return False
return True
class ForwardReference(Parser):
supplier: Callable
def __init__(self, supplier: Callable):
self.supplier = supplier
def parse(self):
return self.supplier().parse()
class Repetition(Parser):
child: Parser
def __init__(self, child):
self.child = child
def parse(self):
while self.child.parse():
pass
return True
class Optional_(Parser):
child: Parser
def __init__(self, child):
self.child = child
def parse(self):
self.child.parse()
return True
class Choice(Parser):
children: list[Parser]
def __init__(self, *children):
self.children = children
def parse(self):
for child in self.children:
if child.parse():
return True
return False
class ComposeObject(Parser):
child: Parser
def __init__(self, child):
self.child = child
def parse(self):
nonlocal stack
stack0 = len(stack)
success: bool = self.child.parse()
if not success:
return False
o: dict[str, Any] = {}
while len(stack) > stack0:
v: Any = stack.pop()
k: str = stack.pop()
o[k] = v
o = {k: v for k, v in reversed(o.items())}
stack.append(o)
return True
class ComposeArray(Parser):
child: Parser
def __init__(self, child):
self.child = child
def parse(self):
nonlocal stack
stack0 = len(stack)
success: bool = self.child.parse()
if not success:
return False
o: list[Any] = []
while len(stack) > stack0:
v: Any = stack.pop()
o.append(v)
o = list(reversed(o))
stack.append(o)
return True
# PAIR ::= STRING ":" VALUE
pair: Parser = Sequence(
StringParser(),
CharParser(':'),
ForwardReference(lambda: value)
)
# ("," PAID)*)
pair_tails: Parser = Repetition(
Sequence(
CharParser(','),
pair,
)
)
# (PAIR ("," PAID)*)?
pairs: Parser = Optional_(
Sequence(
pair,
pair_tails,
)
)
# OBJECT ::= "{" (PAIR ("," PAID)*)? "}"
object_: Parser = ComposeObject(
Sequence(
CharParser('{'),
pairs,
CharParser('}'),
)
)
# ("," VALUE)*
value_tails: Parser = Repetition(
Sequence(
CharParser(','),
ForwardReference(lambda: value),
)
)
# (VALUE ("," VALUE)*)?
values: Parser = Optional_(
Sequence(
ForwardReference(lambda: value),
value_tails,
)
)
# "[" (VALUE ("," VALUE)*)? "]"
array: Parser = ComposeArray(
Sequence(
CharParser('['),
values,
CharParser(']'),
)
)
# VALUE ::= STRING | NUMBER | OBJECT | ARRAY
value: Parser = Choice(
StringParser(),
NumberParser(),
object_,
array,
)
value.parse()
value = stack.pop()
return value
if __name__ == '__main__':
jp = JsonParser()
text = '''
{
"x": 1,
"y": "2",
"items": [
{"type": "user", "id": 0, "refs": []},
{"type": "user", "id": 1, "refs": [{
"type": "ref"
}]}
]
}
'''
value = jp.parse(text)
# {'x': 1, 'y': '2', 'items': [{'type': 'user', 'id': 0, 'refs': []}, {'type': 'user', 'id': 1, 'refs': [{'type': 'ref'}]}]}
print(value)
@mtasic85
Copy link
Author

mtasic85 commented Sep 4, 2021

Based on great video "7. Parsing Combinators" from Nicolas Laurent.

https://www.youtube.com/watch?v=3Cfq4i6754s&list=PLOech0kWpH8-njQpmSNGSiQBPUvl8v3IM&index=7

More pythonic version coming soon.

@mtasic85
Copy link
Author

mtasic85 commented Sep 5, 2021

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment