Created
December 10, 2014 12:27
-
-
Save bbc2/be2abbefa0000816278c to your computer and use it in GitHub Desktop.
Naive JSON parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
import sys | |
HEX_DIGITS = '0123456789abcdefABCDEF' | |
NUMBER_RE = re.compile(r'-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?') | |
class ParseError(Exception): | |
pass | |
def w(s): | |
"""Remove leading whitespace characters of a string.""" | |
return s.lstrip() | |
def n(s): | |
"""Return the first non-whitespace character of a string, | |
or ``None`` if it is empty.""" | |
s = w(s) | |
if len(s) == 0: | |
return None | |
else: | |
return s[0] | |
def chars(c, s): | |
s = w(s) | |
if s.startswith(c): | |
return s[len(c):] | |
else: | |
raise ParseError(s) | |
def hex4(s): | |
s = w(s) | |
if len(s) < 4: | |
raise ParseError(s) | |
elif all(c in HEX_DIGITS for c in s[:4]): | |
return s[4:] | |
else: | |
raise ParseError(s) | |
def number(s): | |
s = w(s) | |
match = NUMBER_RE.match(s) | |
if match: | |
return chars(match.group(), s) | |
else: | |
raise ParseError(s) | |
def in_array(s): | |
s = w(s) | |
s = value(s) | |
if n(s) == ']': | |
return s | |
elif n(s) == ',': | |
s = chars(',', s) | |
return in_array(s) | |
else: | |
raise ParseError(s) | |
def array(s): | |
s = w(s) | |
s = chars('[', s) | |
if n(s) != ']': s = in_array(s) | |
return chars(']', s) | |
def in_string(s): | |
s = w(s) | |
if n(s) == '"': | |
return s | |
elif n(s) == '\\': | |
s = chars('\\', s) | |
if n(s) in '"\\/bfnrt': | |
s = chars(n(s), s) | |
elif n(s) == 'u': | |
s = chars('u', s) | |
s = hex4(s) | |
else: | |
raise ParseError(s) | |
elif n(s) is None: | |
raise ParseError(s) | |
else: | |
s = chars(n(s), s) | |
return in_string(s) | |
def string(s): | |
s = w(s) | |
s = chars('"', s) | |
s = in_string(s) | |
return chars('"', s) | |
def value(s): | |
s = w(s) | |
if n(s) == '"': | |
return string(s) | |
elif n(s) in '-0123456789': | |
return number(s) | |
elif n(s) == '{': | |
return obj(s) | |
elif n(s) == '[': | |
return array(s) | |
elif n(s) == 't': | |
return chars('true', s) | |
elif n(s) == 'f': | |
return chars('false', s) | |
elif n(s) == 'n': | |
return chars('null', s) | |
else: | |
raise ParseError(s) | |
def binding(s): | |
s = w(s) | |
s = string(s) | |
s = chars(':', s) | |
return value(s) | |
def in_obj(s): | |
s = w(s) | |
s = binding(s) | |
if n(s) == '}': | |
return s | |
elif n(s) == ',': | |
s = chars(',', s) | |
return in_obj(s) | |
else: | |
raise ParseError(s) | |
def obj(s): | |
s = w(s) | |
s = chars('{', w(s)) | |
if n(s) != '}': s = in_obj(s) | |
s = chars('}', s) | |
return s | |
def empty(s): | |
s = w(s) | |
if s != '': raise ParseError(s) | |
return s | |
def parse(s): | |
s = w(s) | |
s = obj(s) | |
return empty(s) | |
if __name__ == '__main__': | |
s = ''.join((line for line in sys.stdin)) | |
parse(s) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment