Skip to content

Instantly share code, notes, and snippets.

@ls0f ls0f/parse.py
Last active Jul 2, 2016

Embed
What would you like to do?
#coding:utf-8
import re
class Parse(object):
def __init__(self):
self.i = -1
self.text = ""
self.js = None
def next_char(self):
return self.text[self.i+1]
def next_next_char(self):
return self.text[self.i+2]
def consume(self, l=1):
assert self.i < len(self.text)
t = self.text[self.i+1: self.i+l+1]
self.i += l
return t
def accept(self, c):
if self.next_char() != c:
self.err()
self.consume()
return True
def accept_multi(self, multi):
for c in multi:
self.accept(c)
def accept_regexp(self, l, r):
multi = self.consume(l)
if re.search(r, multi) is None:
raise
return True
def consume_blank(self):
while re.search(r"\s", self.next_char()):
self.consume()
def err(self):
raise Exception("unexpected char '%s', nearly '%s' " % (self.next_char(), self.text[self.i:self.i+20]))
def parse(self, text):
self.text = text
self.i = -1
self.consume_blank()
if self.next_char() == '[':
self.js = self.parse_array()
elif self.next_char() == '{':
self.js = self.parse_object()
else:
self.err()
def parse_object(self):
d = dict()
self.consume_blank()
self.accept('{')
self.consume_blank()
if self.next_char() == '"':
k, v = self.parse_pair()
d[k] = v
while self.next_char() == ',':
self.accept(',')
k, v = self.parse_pair()
d[k] = v
self.accept('}')
return d
def parse_pair(self):
self.consume_blank()
k = self.parse_string()
self.accept(":")
v = self.parse_value()
self.consume_blank()
return k, v
def parse_string(self):
self.accept('"')
s = self.i
while self.next_char() != '"':
c = self.next_char()
if c == '\\':
self.consume()
if self.next_char() == 'u':
self.parse_unicode()
elif self.next_char() == '\\':
self.consume()
elif self.next_char() == '"':
self.consume()
else:
self.accept_regexp(1, r'[/bfnrt]')
else:
self.consume()
e = self.i
self.accept('"')
self.consume_blank()
return self.text[s+1: e+1]
def parse_value(self):
self.consume_blank()
if self.next_char() == '"':
v = self.parse_string()
elif re.search(r"[1-9-]", self.next_char()):
v = self.parse_number()
elif self.next_char() == '{':
v = self.parse_object()
elif self.next_char() == '[':
v = self.parse_array()
elif self.next_char() == 't':
self.accept_multi("true")
v = 'true'
elif self.next_char() == 'f':
self.accept_multi("false")
v = 'false'
elif self.next_char() == 'n':
self.accept_multi("null")
v = 'null'
else:
self.err()
self.consume_blank()
return v
def parse_array(self):
l = []
self.accept('[')
self.consume_blank()
while self.next_char() != ']':
l = self.parse_element()
self.accept(']')
self.consume_blank()
return l
def parse_element(self):
ele = [self.parse_value()]
while self.next_char() == ',':
self.accept(',')
ele.append(self.parse_value())
self.consume_blank()
return ele
def parse_unicode(self):
self.accept('u')
self.accept_regexp(4, r"[0-9a-fA-F]{4}")
def parse_number(self):
s = self.i
self.parse_int()
if self.next_char() == '.':
self.parse_frac()
if self.next_char() in ('e', "E"):
self.parse_exp()
e = self.i
t = self.text[s+1: e+1]
if 'e' in t or '.' in t:
return float(t)
else:
return int(t)
def parse_int(self):
if self.next_char() == '-':
self.consume()
self.parse_digits()
def parse_frac(self):
self.accept('.')
self.parse_digits()
def parse_exp(self):
self.accept_regexp(1, r'e|E')
if self.next_char() in ('+', '-'):
self.consume()
self.parse_digits()
def parse_digits(self):
self.accept_regexp(1, '[0-9]')
while self.next_char().isdigit():
self.consume()
if __name__ == "__main__":
pass
#coding:utf-8
import sys
import parse
class Pretty(object):
def __init__(self):
self.buffer = []
self.indent = 4 * ' '
self.on_line = False
def pretty(self, js):
self.buffer = []
self.pretty_json(js, 0)
self.clear_buffer()
def ptr(self, d, str, newlines=True):
if newlines is False:
print "%s%s" % (self.indent * d, str),
self.on_line = True
elif self.on_line:
print str
self.on_line = False
else:
print "%s%s" % (self.indent * d, str)
def pretty_json(self, js, d=0, comma=False):
if isinstance(js, list):
self.pretty_list(js, d)
elif isinstance(js, dict):
self.pretty_map(js, d)
elif isinstance(js, str):
self.ptr(d, '"%s"%s' % (js, ',' if comma else ''))
elif js is None:
self.ptr(d, '%s%s' % ("null", ',' if comma else ''))
elif isinstance(js, bool):
self.ptr(d, '%s%s' % ("true" if js else "false", ',' if comma else ''))
else:
self.ptr(d, '%s%s' % (js, ',' if comma else ''))
def pretty_list(self, ls, d):
self.ptr(d, "[")
self.buffer.append([d, ']'])
i = 0
for js in ls:
self.pretty_json(js, d+1, i != len(ls) - 1)
i += 1
def pretty_map(self, map, d):
self.ptr(d, "{")
self.buffer.append([d, '}'])
i = 0
for k, v in map.items():
self.pretty_key(k, d+1)
self.pretty_json(v, d+1, i != len(map) - 1)
i += 1
def pretty_key(self, k, d):
self.ptr(d, '"%s":' % (k, ), False)
def clear_buffer(self):
while self.buffer:
d, p = self.buffer.pop(-1)
self.ptr(d, "%s" % (p, ))
def test():
test = {"a": 1, "b": [1,2,"c", {"f": {"a": "1", "b": [1,2,3]}}]}
test2 = [{"a": 1, "b": 3}, 3, 4, [[[[1,2,3]]]]]
Pretty().pretty(test)
Pretty().pretty(test2)
if __name__ == "__main__":
f = sys.stdin.readlines()
p = parse.Parse()
p.parse(''.join(f))
Pretty().pretty(p.js)
@ls0f

This comment has been minimized.

Copy link
Owner Author

ls0f commented Jul 1, 2016

@ls0f

This comment has been minimized.

Copy link
Owner Author

ls0f commented Jul 1, 2016

prettyjson

@ls0f

This comment has been minimized.

Copy link
Owner Author

ls0f commented Jul 1, 2016

curl https://raw.githubusercontent.com/antlr/grammars-v4/master/json/examples/example1.json 2>/dev/null | python prettyjson.py

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.