Created
November 14, 2020 14:11
-
-
Save charles-esterbrook/9ab557d70391fd85ebac2b1a59a326cf to your computer and use it in GitHub Desktop.
lark-parser #618
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
lark_tree_to_json.py | |
How can I convert the output of lark().parse() into JSON #618 | |
https://github.com/lark-parser/lark/issues/618 | |
Lark parser, LALR & Earley, Python | |
https://github.com/lark-parser/lark | |
https://lark-parser.readthedocs.io/en/latest/ | |
https://pypi.org/project/lark-parser/ | |
""" | |
def tree_to_json_str(item): | |
output = [] | |
tree_to_json(tree, output.append) # will build output in memory | |
return ''.join(output) | |
def tree_to_json(item, write=None): | |
""" Writes a Lark tree as a JSON dictionary. """ | |
if write is None: write = sys.stdout.write | |
_tree_to_json(item, write, 0) | |
def _tree_to_json(item, write, level): | |
indent = ' ' * level | |
level += 1 | |
if isinstance(item, lark.Tree): | |
write(f'{indent}{{ "type": "{item.data}", "children": [\n') | |
sep = '' | |
for child in item.children: | |
write(indent) | |
write(sep) | |
_tree_to_json(child, write, level) | |
sep = ',\n' | |
write(f'{indent}] }}\n') | |
elif isinstance(item, lark.Token): | |
# reminder: Lark Tokens are directly strings | |
# token attrs include: line, end_line, column, end_column, pos_in_stream, end_pos | |
write(f'{indent}{{ "type": "{item.type}", "text": "{item}", "line": {item.line}, "col": {item.column} }}\n') | |
else: | |
assert False, item # fall-through | |
# get a grammar | |
import sys | |
sys.path.insert(0, 'examples') | |
import calc | |
grammar = calc.calc_grammar | |
# get some code to parse | |
code = '3 + 5 * 12' | |
# parse with lark | |
import lark | |
parser = lark.Lark(grammar, parser='lalr') | |
tree = parser.parse(code) | |
print(tree.pretty()) | |
# print with tree_to_json | |
tree_to_json(tree) # will print to stdout | |
# build JSON string with tree_to_json_str | |
json_str = tree_to_json_str(tree) | |
# test that above JSON string is valid | |
import json | |
parsed_json = json.loads(json_str) | |
print(parsed_json) | |
assert isinstance(parsed_json, dict) | |
assert parsed_json['type'] == 'add' | |
assert len(parsed_json['children']) == 2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment