Skip to content

Instantly share code, notes, and snippets.

@rsiemens
Last active October 5, 2019 20:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rsiemens/559dbf3ca0bed4e600f3b6df057a9731 to your computer and use it in GitHub Desktop.
Save rsiemens/559dbf3ca0bed4e600f3b6df057a9731 to your computer and use it in GitHub Desktop.
A toy recursive decent parser for a JSON subset called "JSON Lite"
"""
Grammar
JSON = "{" FIELD "}"
FIELD = ε | FIELDS
FIELDS = STRING ":" VALUE ("," FIELDS)*
VALUE = STRING | JSON
STRING = "\"" PRINTABLE "\""
PRINTABLE = (a | .. | z | A | .. | Z | 0 | .. | 9)*
"""
import json
from string import ascii_letters, digits
from unittest import TestCase
class ParsingException(Exception):
def __init__(self, expected):
super().__init__(f'Invalid token, expected {expected}')
class JsonLiteParser:
def __init__(self, input, whitespace=(' ', '\n', '\t')):
self.whitespace = whitespace
self.input = input.strip()
self.cursor = 0
@classmethod
def parse(cls, input):
parser = cls(input)
parser.json()
if parser.current != None:
raise ParsingException('EOF')
@property
def current(self):
try:
return self.input[self.cursor]
except IndexError:
return None
def next(self):
self.cursor += 1
# skip whitespace
while self.current in self.whitespace:
self.cursor += 1
def match(self, expected_token):
# Match current token to the expected_token and advance the
# cursor else error.
if self.current != expected_token:
raise ParsingException(expected_token)
self.next()
def json(self):
self.match('{')
self.field()
self.match('}')
def field(self):
while self.current == '"':
self.string()
self.match(':')
if self.current == '"':
self.string()
elif self.current == '{':
self.json()
else:
raise ParsingException('" or {')
if self.current == ',':
self.next()
if self.current != '"':
raise ParsingException('"')
def string(self):
self.match('"')
while self.current in ascii_letters + digits:
self.next()
self.match('"')
class JsonLiteParseTestCases(TestCase):
def test_parse_ok(self):
valid_inputs = [
'{}',
'{"name": "Ryan"}',
'{"name": "Ryan", "position": "Backend Engineer"}',
'{"location": {"city": "Fresno", "state": "California"}}',
"""
{
"name": "Ryan",
"position": "Backend Engineer",
"location": {
"city": "Fresno",
"state": "California"
}
}
""",
'{"empty": {}}',
'{"": ""}',
'{"": {"": {"": {"": ""}}}, "very": "fun"}',
]
for valid_input in valid_inputs:
JsonLiteParser.parse(valid_input)
json.loads(valid_input)
def test_parse_errors(self):
invalid_inputs = [
'"key": "value"}', # missing opening {
'{"key"}', # missing `: "value"`
'{"key": }', # missing value
'{: "value"}', # missing a key
'{"key": "value",}', # missing key value pair after comma
'{"key": "value", {}}', # invalid key, objects can only be values
'{key: "value"}', # key needs quotes around it
'{"key": value}', # value needs quotes around it
'{"key": {"another_key": "value"}', # missing closing }
'{"key": "value"} extra', # Not all of input is valid
]
for invalid_input in invalid_inputs:
with self.assertRaises(ParsingException, msg=invalid_input):
JsonLiteParser.parse(invalid_input)
for invalid_input in invalid_inputs:
with self.assertRaises(json.JSONDecodeError):
json.loads(invalid_input)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment