Last active
March 6, 2017 10:01
-
-
Save zed/73f8d6081e67626ca9ed7e2a8a493902 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# $ grako -o kv_parser.py KV.ebnf | |
@@grammar :: KV | |
# whitespace inside a string excluding the start/end of the line | |
@@whitespace :: /((?!^)(?![\r\n])\s)+/ | |
@@eol_comments :: /#[^\r\n]*/ | |
@@left_recursion :: False | |
# input is zero or more assignments | |
start | |
= | |
{assignment} $ | |
; | |
assignment | |
= | |
name '=' json_value | |
; | |
name | |
= | |
/(?!\d)\w+/ | |
; | |
json_value | |
= | |
number | string | json_array | json_object | json_literal | |
; | |
# @+: puts values into the list | |
# ~ prevent other options from being considered even if what follows fails to parse. | |
json_array | |
= | |
'[' ~ [@+:json_value {',' @+:json_value}] ']' | |
; | |
json_object | |
= | |
'{' ~ [@+:pair {',' @+:pair}] '}' | |
; | |
pair | |
= | |
string ':' json_value | |
; | |
json_literal | |
= | |
'true' | 'false' | 'null' | |
; | |
number | |
= | |
/\-?(0|(?!0)\d+)(\.\d+)?([eE][+-]?\d+)?/ | |
; | |
string | |
= | |
'"' ~ ?'(\\(["\\/bfnrt]|u[0-9a-fA-F]{4})|[^"\\])*' '"' | |
; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# CAVEAT UTILITOR | |
# | |
# This file was automatically generated by Grako. | |
# | |
# https://pypi.python.org/pypi/grako/ | |
# | |
# Any changes you make to it will be overwritten the next time | |
# the file is generated. | |
from __future__ import print_function, division, absolute_import, unicode_literals | |
from grako.buffering import Buffer | |
from grako.parsing import graken, Parser | |
from grako.util import re, RE_FLAGS, generic_main # noqa | |
KEYWORDS = {} | |
class KVBuffer(Buffer): | |
def __init__( | |
self, | |
text, | |
whitespace=re.compile('((?!^)(?![\\r\\n])\\s)+', RE_FLAGS | re.DOTALL), | |
nameguard=None, | |
comments_re=None, | |
eol_comments_re='#[^\\r\\n]*', | |
ignorecase=None, | |
namechars='', | |
**kwargs | |
): | |
super(KVBuffer, self).__init__( | |
text, | |
whitespace=whitespace, | |
nameguard=nameguard, | |
comments_re=comments_re, | |
eol_comments_re=eol_comments_re, | |
ignorecase=ignorecase, | |
namechars=namechars, | |
**kwargs | |
) | |
class KVParser(Parser): | |
def __init__( | |
self, | |
whitespace=re.compile('((?!^)(?![\\r\\n])\\s)+', RE_FLAGS | re.DOTALL), | |
nameguard=None, | |
comments_re=None, | |
eol_comments_re='#[^\\r\\n]*', | |
ignorecase=None, | |
left_recursion=False, | |
parseinfo=True, | |
keywords=None, | |
namechars='', | |
buffer_class=KVBuffer, | |
**kwargs | |
): | |
if keywords is None: | |
keywords = KEYWORDS | |
super(KVParser, self).__init__( | |
whitespace=whitespace, | |
nameguard=nameguard, | |
comments_re=comments_re, | |
eol_comments_re=eol_comments_re, | |
ignorecase=ignorecase, | |
left_recursion=left_recursion, | |
parseinfo=parseinfo, | |
keywords=keywords, | |
namechars=namechars, | |
buffer_class=buffer_class, | |
**kwargs | |
) | |
@graken() | |
def _start_(self): | |
def block0(): | |
self._assignment_() | |
self._closure(block0) | |
self._check_eof() | |
@graken() | |
def _assignment_(self): | |
self._name_() | |
self._token('=') | |
self._json_value_() | |
@graken() | |
def _name_(self): | |
self._pattern(r'(?!\d)\w+') | |
@graken() | |
def _json_value_(self): | |
with self._choice(): | |
with self._option(): | |
self._number_() | |
with self._option(): | |
self._string_() | |
with self._option(): | |
self._json_array_() | |
with self._option(): | |
self._json_object_() | |
with self._option(): | |
self._json_literal_() | |
self._error('no available options') | |
@graken() | |
def _json_array_(self): | |
self._token('[') | |
self._cut() | |
with self._optional(): | |
self._json_value_() | |
self.add_last_node_to_name('@') | |
def block1(): | |
self._token(',') | |
self._json_value_() | |
self.add_last_node_to_name('@') | |
self._closure(block1) | |
self._token(']') | |
@graken() | |
def _json_object_(self): | |
self._token('{') | |
self._cut() | |
with self._optional(): | |
self._pair_() | |
self.add_last_node_to_name('@') | |
def block1(): | |
self._token(',') | |
self._pair_() | |
self.add_last_node_to_name('@') | |
self._closure(block1) | |
self._token('}') | |
@graken() | |
def _pair_(self): | |
self._string_() | |
self._token(':') | |
self._json_value_() | |
@graken() | |
def _json_literal_(self): | |
with self._choice(): | |
with self._option(): | |
self._token('true') | |
with self._option(): | |
self._token('false') | |
with self._option(): | |
self._token('null') | |
self._error('expecting one of: false null true') | |
@graken() | |
def _number_(self): | |
self._pattern(r'\-?(0|(?!0)\d+)(\.\d+)?([eE][+-]?\d+)?') | |
@graken() | |
def _string_(self): | |
self._token('"') | |
self._cut() | |
self._pattern(r'(\\(["\\/bfnrt]|u[0-9a-fA-F]{4})|[^"\\])*') | |
self._token('"') | |
class KVSemantics(object): | |
def start(self, ast): | |
return ast | |
def assignment(self, ast): | |
return ast | |
def name(self, ast): | |
return ast | |
def json_value(self, ast): | |
return ast | |
def json_array(self, ast): | |
return ast | |
def json_object(self, ast): | |
return ast | |
def pair(self, ast): | |
return ast | |
def json_literal(self, ast): | |
return ast | |
def number(self, ast): | |
return ast | |
def string(self, ast): | |
return ast | |
def main(filename, startrule, **kwargs): | |
with open(filename) as f: | |
text = f.read() | |
parser = KVParser() | |
return parser.parse(text, startrule, filename=filename, **kwargs) | |
if __name__ == '__main__': | |
import json | |
from grako.util import asjson | |
ast = generic_main(main, KVParser, name='KV') | |
print('AST:') | |
print(ast) | |
print() | |
print('JSON:') | |
print(json.dumps(asjson(ast), indent=2)) | |
print() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
r"""Parse space-separated key=json_value pairs. | |
>>> from pprint import pprint | |
>>> pprint(parse(r'a=1 b="2" c=3.123 d=[true, 2, 3]e={"Ke\n":["h=4"],"Key2":{" ":1}}')) | |
{'a': 1, | |
'b': '2', | |
'c': 3.123, | |
'd': [True, 2, 3], | |
'e': {'Ke\n': ['h=4'], 'Key2': {' ': 1}}} | |
>>> pprint(parse("a=1 b=2 c=3\n")) | |
{'a': 1, 'b': 2, 'c': 3} | |
>>> pprint(parse('a=[] B={} c=""d=1E-10')) | |
{'B': {}, 'a': [], 'c': '', 'd': 1e-10} | |
""" | |
import codecs | |
from kv_parser import KVParser, KVSemantics | |
null = object() # placeholder to smuggle None through AST | |
class Semantics(KVSemantics): | |
def start(self, ast): | |
"""start = { assignment }* $ ;""" | |
return dict(ast) | |
def assignment(self, ast): | |
"""assignment = name '=' json_value ;""" | |
# NOTE: all names should be unique | |
return ast[0], (None if ast[2] is null else ast[2]) | |
def json_object(self, ast): | |
if ast == ['{', '}']: # XXX emtpy dict | |
return {} | |
return dict(ast) | |
def json_array(self, ast): | |
if ast == ['[', ']']: # XXX empty list | |
return [] | |
return ast | |
def pair(self, ast): | |
"""pair = string ':' json_value ;""" | |
return ast[0], (None if ast[2] is null else ast[2]) | |
def number(self, ast): | |
r"""json number. | |
number = /\-?(0|(?!0)\d+)(\.\d+)?([eE][+-]?\d+)?/ ; | |
""" | |
try: | |
return int(ast) | |
except ValueError: | |
return float(ast) | |
def string(self, ast): | |
return codecs.decode(ast[1], 'unicode-escape') | |
def json_literal(self, ast): | |
return {'true': True, 'false': False, 'null': null}[ast] | |
def parse(text): | |
return KVParser().parse(text.strip(), semantics=Semantics()) | |
if __name__ == "__main__": | |
import doctest | |
doctest.testmod() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment