Skip to content

Instantly share code, notes, and snippets.

@charles-esterbrook
Created November 14, 2020 14:11
Show Gist options
  • Save charles-esterbrook/9ab557d70391fd85ebac2b1a59a326cf to your computer and use it in GitHub Desktop.
Save charles-esterbrook/9ab557d70391fd85ebac2b1a59a326cf to your computer and use it in GitHub Desktop.
lark-parser #618
"""
lark_tree_to_json.py
How can I convert the output of lark().parse() into JSON #618
https://github.com/lark-parser/lark/issues/618
Lark parser, LALR & Earley, Python
https://github.com/lark-parser/lark
https://lark-parser.readthedocs.io/en/latest/
https://pypi.org/project/lark-parser/
"""
def tree_to_json_str(item):
output = []
tree_to_json(tree, output.append) # will build output in memory
return ''.join(output)
def tree_to_json(item, write=None):
""" Writes a Lark tree as a JSON dictionary. """
if write is None: write = sys.stdout.write
_tree_to_json(item, write, 0)
def _tree_to_json(item, write, level):
indent = ' ' * level
level += 1
if isinstance(item, lark.Tree):
write(f'{indent}{{ "type": "{item.data}", "children": [\n')
sep = ''
for child in item.children:
write(indent)
write(sep)
_tree_to_json(child, write, level)
sep = ',\n'
write(f'{indent}] }}\n')
elif isinstance(item, lark.Token):
# reminder: Lark Tokens are directly strings
# token attrs include: line, end_line, column, end_column, pos_in_stream, end_pos
write(f'{indent}{{ "type": "{item.type}", "text": "{item}", "line": {item.line}, "col": {item.column} }}\n')
else:
assert False, item # fall-through
# get a grammar
import sys
sys.path.insert(0, 'examples')
import calc
grammar = calc.calc_grammar
# get some code to parse
code = '3 + 5 * 12'
# parse with lark
import lark
parser = lark.Lark(grammar, parser='lalr')
tree = parser.parse(code)
print(tree.pretty())
# print with tree_to_json
tree_to_json(tree) # will print to stdout
# build JSON string with tree_to_json_str
json_str = tree_to_json_str(tree)
# test that above JSON string is valid
import json
parsed_json = json.loads(json_str)
print(parsed_json)
assert isinstance(parsed_json, dict)
assert parsed_json['type'] == 'add'
assert len(parsed_json['children']) == 2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment