Last active
October 5, 2019 20:41
-
-
Save rsiemens/559dbf3ca0bed4e600f3b6df057a9731 to your computer and use it in GitHub Desktop.
A toy recursive decent parser for a JSON subset called "JSON Lite"
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Grammar | |
JSON = "{" FIELD "}" | |
FIELD = ε | FIELDS | |
FIELDS = STRING ":" VALUE ("," FIELDS)* | |
VALUE = STRING | JSON | |
STRING = "\"" PRINTABLE "\"" | |
PRINTABLE = (a | .. | z | A | .. | Z | 0 | .. | 9)* | |
""" | |
import json | |
from string import ascii_letters, digits | |
from unittest import TestCase | |
class ParsingException(Exception): | |
def __init__(self, expected): | |
super().__init__(f'Invalid token, expected {expected}') | |
class JsonLiteParser: | |
def __init__(self, input, whitespace=(' ', '\n', '\t')): | |
self.whitespace = whitespace | |
self.input = input.strip() | |
self.cursor = 0 | |
@classmethod | |
def parse(cls, input): | |
parser = cls(input) | |
parser.json() | |
if parser.current != None: | |
raise ParsingException('EOF') | |
@property | |
def current(self): | |
try: | |
return self.input[self.cursor] | |
except IndexError: | |
return None | |
def next(self): | |
self.cursor += 1 | |
# skip whitespace | |
while self.current in self.whitespace: | |
self.cursor += 1 | |
def match(self, expected_token): | |
# Match current token to the expected_token and advance the | |
# cursor else error. | |
if self.current != expected_token: | |
raise ParsingException(expected_token) | |
self.next() | |
def json(self): | |
self.match('{') | |
self.field() | |
self.match('}') | |
def field(self): | |
while self.current == '"': | |
self.string() | |
self.match(':') | |
if self.current == '"': | |
self.string() | |
elif self.current == '{': | |
self.json() | |
else: | |
raise ParsingException('" or {') | |
if self.current == ',': | |
self.next() | |
if self.current != '"': | |
raise ParsingException('"') | |
def string(self): | |
self.match('"') | |
while self.current in ascii_letters + digits: | |
self.next() | |
self.match('"') | |
class JsonLiteParseTestCases(TestCase): | |
def test_parse_ok(self): | |
valid_inputs = [ | |
'{}', | |
'{"name": "Ryan"}', | |
'{"name": "Ryan", "position": "Backend Engineer"}', | |
'{"location": {"city": "Fresno", "state": "California"}}', | |
""" | |
{ | |
"name": "Ryan", | |
"position": "Backend Engineer", | |
"location": { | |
"city": "Fresno", | |
"state": "California" | |
} | |
} | |
""", | |
'{"empty": {}}', | |
'{"": ""}', | |
'{"": {"": {"": {"": ""}}}, "very": "fun"}', | |
] | |
for valid_input in valid_inputs: | |
JsonLiteParser.parse(valid_input) | |
json.loads(valid_input) | |
def test_parse_errors(self): | |
invalid_inputs = [ | |
'"key": "value"}', # missing opening { | |
'{"key"}', # missing `: "value"` | |
'{"key": }', # missing value | |
'{: "value"}', # missing a key | |
'{"key": "value",}', # missing key value pair after comma | |
'{"key": "value", {}}', # invalid key, objects can only be values | |
'{key: "value"}', # key needs quotes around it | |
'{"key": value}', # value needs quotes around it | |
'{"key": {"another_key": "value"}', # missing closing } | |
'{"key": "value"} extra', # Not all of input is valid | |
] | |
for invalid_input in invalid_inputs: | |
with self.assertRaises(ParsingException, msg=invalid_input): | |
JsonLiteParser.parse(invalid_input) | |
for invalid_input in invalid_inputs: | |
with self.assertRaises(json.JSONDecodeError): | |
json.loads(invalid_input) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment