Skip to content

Instantly share code, notes, and snippets.

@zed
Last active March 6, 2017 10:01
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zed/73f8d6081e67626ca9ed7e2a8a493902 to your computer and use it in GitHub Desktop.
Save zed/73f8d6081e67626ca9ed7e2a8a493902 to your computer and use it in GitHub Desktop.
# $ grako -o kv_parser.py KV.ebnf
@@grammar :: KV
# whitespace inside a string excluding the start/end of the line
@@whitespace :: /((?!^)(?![\r\n])\s)+/
@@eol_comments :: /#[^\r\n]*/
@@left_recursion :: False
# input is zero or more assignments
start
=
{assignment} $
;
assignment
=
name '=' json_value
;
name
=
/(?!\d)\w+/
;
json_value
=
number | string | json_array | json_object | json_literal
;
# @+: puts values into the list
# ~ prevent other options from being considered even if what follows fails to parse.
json_array
=
'[' ~ [@+:json_value {',' @+:json_value}] ']'
;
json_object
=
'{' ~ [@+:pair {',' @+:pair}] '}'
;
pair
=
string ':' json_value
;
json_literal
=
'true' | 'false' | 'null'
;
number
=
/\-?(0|(?!0)\d+)(\.\d+)?([eE][+-]?\d+)?/
;
string
=
'"' ~ ?'(\\(["\\/bfnrt]|u[0-9a-fA-F]{4})|[^"\\])*' '"'
;
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# CAVEAT UTILITOR
#
# This file was automatically generated by Grako.
#
# https://pypi.python.org/pypi/grako/
#
# Any changes you make to it will be overwritten the next time
# the file is generated.
from __future__ import print_function, division, absolute_import, unicode_literals
from grako.buffering import Buffer
from grako.parsing import graken, Parser
from grako.util import re, RE_FLAGS, generic_main # noqa
KEYWORDS = {}
class KVBuffer(Buffer):
def __init__(
self,
text,
whitespace=re.compile('((?!^)(?![\\r\\n])\\s)+', RE_FLAGS | re.DOTALL),
nameguard=None,
comments_re=None,
eol_comments_re='#[^\\r\\n]*',
ignorecase=None,
namechars='',
**kwargs
):
super(KVBuffer, self).__init__(
text,
whitespace=whitespace,
nameguard=nameguard,
comments_re=comments_re,
eol_comments_re=eol_comments_re,
ignorecase=ignorecase,
namechars=namechars,
**kwargs
)
class KVParser(Parser):
def __init__(
self,
whitespace=re.compile('((?!^)(?![\\r\\n])\\s)+', RE_FLAGS | re.DOTALL),
nameguard=None,
comments_re=None,
eol_comments_re='#[^\\r\\n]*',
ignorecase=None,
left_recursion=False,
parseinfo=True,
keywords=None,
namechars='',
buffer_class=KVBuffer,
**kwargs
):
if keywords is None:
keywords = KEYWORDS
super(KVParser, self).__init__(
whitespace=whitespace,
nameguard=nameguard,
comments_re=comments_re,
eol_comments_re=eol_comments_re,
ignorecase=ignorecase,
left_recursion=left_recursion,
parseinfo=parseinfo,
keywords=keywords,
namechars=namechars,
buffer_class=buffer_class,
**kwargs
)
@graken()
def _start_(self):
def block0():
self._assignment_()
self._closure(block0)
self._check_eof()
@graken()
def _assignment_(self):
self._name_()
self._token('=')
self._json_value_()
@graken()
def _name_(self):
self._pattern(r'(?!\d)\w+')
@graken()
def _json_value_(self):
with self._choice():
with self._option():
self._number_()
with self._option():
self._string_()
with self._option():
self._json_array_()
with self._option():
self._json_object_()
with self._option():
self._json_literal_()
self._error('no available options')
@graken()
def _json_array_(self):
self._token('[')
self._cut()
with self._optional():
self._json_value_()
self.add_last_node_to_name('@')
def block1():
self._token(',')
self._json_value_()
self.add_last_node_to_name('@')
self._closure(block1)
self._token(']')
@graken()
def _json_object_(self):
self._token('{')
self._cut()
with self._optional():
self._pair_()
self.add_last_node_to_name('@')
def block1():
self._token(',')
self._pair_()
self.add_last_node_to_name('@')
self._closure(block1)
self._token('}')
@graken()
def _pair_(self):
self._string_()
self._token(':')
self._json_value_()
@graken()
def _json_literal_(self):
with self._choice():
with self._option():
self._token('true')
with self._option():
self._token('false')
with self._option():
self._token('null')
self._error('expecting one of: false null true')
@graken()
def _number_(self):
self._pattern(r'\-?(0|(?!0)\d+)(\.\d+)?([eE][+-]?\d+)?')
@graken()
def _string_(self):
self._token('"')
self._cut()
self._pattern(r'(\\(["\\/bfnrt]|u[0-9a-fA-F]{4})|[^"\\])*')
self._token('"')
class KVSemantics(object):
def start(self, ast):
return ast
def assignment(self, ast):
return ast
def name(self, ast):
return ast
def json_value(self, ast):
return ast
def json_array(self, ast):
return ast
def json_object(self, ast):
return ast
def pair(self, ast):
return ast
def json_literal(self, ast):
return ast
def number(self, ast):
return ast
def string(self, ast):
return ast
def main(filename, startrule, **kwargs):
with open(filename) as f:
text = f.read()
parser = KVParser()
return parser.parse(text, startrule, filename=filename, **kwargs)
if __name__ == '__main__':
import json
from grako.util import asjson
ast = generic_main(main, KVParser, name='KV')
print('AST:')
print(ast)
print()
print('JSON:')
print(json.dumps(asjson(ast), indent=2))
print()
#!/usr/bin/env python
r"""Parse space-separated key=json_value pairs.
>>> from pprint import pprint
>>> pprint(parse(r'a=1 b="2" c=3.123 d=[true, 2, 3]e={"Ke\n":["h=4"],"Key2":{" ":1}}'))
{'a': 1,
'b': '2',
'c': 3.123,
'd': [True, 2, 3],
'e': {'Ke\n': ['h=4'], 'Key2': {' ': 1}}}
>>> pprint(parse("a=1 b=2 c=3\n"))
{'a': 1, 'b': 2, 'c': 3}
>>> pprint(parse('a=[] B={} c=""d=1E-10'))
{'B': {}, 'a': [], 'c': '', 'd': 1e-10}
"""
import codecs
from kv_parser import KVParser, KVSemantics
null = object() # placeholder to smuggle None through AST
class Semantics(KVSemantics):
def start(self, ast):
"""start = { assignment }* $ ;"""
return dict(ast)
def assignment(self, ast):
"""assignment = name '=' json_value ;"""
# NOTE: all names should be unique
return ast[0], (None if ast[2] is null else ast[2])
def json_object(self, ast):
if ast == ['{', '}']: # XXX emtpy dict
return {}
return dict(ast)
def json_array(self, ast):
if ast == ['[', ']']: # XXX empty list
return []
return ast
def pair(self, ast):
"""pair = string ':' json_value ;"""
return ast[0], (None if ast[2] is null else ast[2])
def number(self, ast):
r"""json number.
number = /\-?(0|(?!0)\d+)(\.\d+)?([eE][+-]?\d+)?/ ;
"""
try:
return int(ast)
except ValueError:
return float(ast)
def string(self, ast):
return codecs.decode(ast[1], 'unicode-escape')
def json_literal(self, ast):
return {'true': True, 'false': False, 'null': null}[ast]
def parse(text):
return KVParser().parse(text.strip(), semantics=Semantics())
if __name__ == "__main__":
import doctest
doctest.testmod()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment