Last active
August 29, 2015 14:14
-
-
Save andfoy/3c7acb7945b6d44c4951 to your computer and use it in GitHub Desktop.
Minimal JSON object parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
KEY_REGEX = '^[,]?["][a-zA-Z_][\w-]*["]$' | |
VALUE_REGEX = '^["].*["][,]?$' | |
CURLY_PAIR = ('{', '}') | |
SQ_PAIR = ('[', ']') | |
class JSONException(Exception): | |
def __init__(self, value): | |
self.value = value | |
def __str__(self): | |
return str(self.value) | |
class JSON(object): | |
def __init__(self, f): | |
self.contents = '' | |
if isinstance(f, file): | |
self.contents = f.readlines()[0] | |
f.close() | |
elif isinstance(f, str): | |
with open(f, 'rb') as fp: | |
self.contents = fp.readlines()[0] | |
else: | |
raise JSONException("Input file not valid, only str and file types are accepted") | |
self.representation = self.parse(self.contents,{}) | |
def preProcess(self, s): | |
last_c = None | |
first_c = None | |
for i,c in enumerate(s): | |
if c == ' ': | |
pass | |
else: | |
if first_c is None: | |
first_c = i | |
else: | |
last_c = i | |
if first_c != None and last_c is None: | |
return s[first_c] | |
elif first_c is None and last_c is None: | |
return '' | |
return s[first_c:last_c+1] | |
def fileSyntax(self, content): | |
squareLevel = 0 | |
curlyLevel = 0 | |
for i in content: | |
if i == '{': | |
curlyLevel += 1 | |
elif i == '}': | |
curlyLevel -= 1 | |
if i == '[': | |
squareLevel += 1 | |
elif i == ']': | |
squareLevel -= 1 | |
return squareLevel == 0 and curlyLevel == 0 | |
def findBrace(self, content, pos, symPair): | |
bracketLevel = 1 | |
imLevel = -1 | |
for i in range(pos+1, len(content)): | |
if content[i] == symPair[0]: | |
bracketLevel += 1 | |
elif content[i] == symPair[1]: | |
bracketLevel -= 1 | |
if bracketLevel == 0: | |
imLevel = i | |
break | |
return imLevel | |
def splitByComma(self, content): | |
level = 0 | |
elements = [] | |
accum = '' | |
next = False | |
for c in content: | |
if c == '{' or c == '[': | |
level += 1 | |
elif c == ']' or c == '}': | |
level -= 1 | |
if c == ',' and level == 0: | |
elements.append(accum) | |
next = True | |
accum += c | |
if next: | |
accum = '' | |
next = False | |
if level == 0: | |
elements.append(accum) | |
return elements | |
def parse(self, content, d = {}): | |
content = self.preProcess(content) | |
if self.fileSyntax(content): | |
accum = '' | |
accumCopy = '' | |
key = None | |
value = None | |
curlyLevel = 0 | |
pos = 0 | |
while pos < len(content): | |
if content[pos] == '{': | |
if d is None: | |
d = {} | |
curlyLevel += 1 | |
accum = '' | |
pos += 1 | |
accum += content[pos] | |
if accum[-1] == ':' and key is None: | |
#print accum | |
#print re.findall(KEY_REGEX, accum[:-1]) | |
if len(re.findall(KEY_REGEX, accum[:-1])) == 1: | |
key = accum[:-1] | |
accumCopy = str(accum) | |
accum = '' | |
if key[0] == ',': | |
key = key[1:] | |
if content[pos+1] == '{': | |
levelEnd = self.findBrace(content, pos+1, CURLY_PAIR) | |
print content[pos+1:levelEnd+1] | |
value = self.parse(content[pos+1:levelEnd+1],{}) | |
d[key] = value | |
key = None | |
value = None | |
pos = levelEnd | |
elif content[pos+1] == '[': | |
levelEnd = self.findBrace(content, pos+1, SQ_PAIR) | |
array = content[pos+2:levelEnd] | |
#elements = array.split(',') | |
print array | |
elements = self.splitByComma(array) | |
print elements | |
value = map(lambda e: self.parse(e,{}), elements) | |
d[key] = value | |
key = None | |
value = None | |
pos = levelEnd | |
else: | |
raise JSONException("The file syntax is malformed: %s" % (accum)) | |
if len(accum) > 0 and accum[-1] == ',' and key is not None: | |
if len(re.findall(VALUE_REGEX, accum[:-1])) == 1: | |
value = accum[:-1] | |
d[key] = value | |
key = None | |
value = None | |
accum = '' | |
else: | |
raise JSONException("The file syntax is malformed: %s" % (accum)) | |
elif len(accum) > 0 and accum[-1] == '}' and key is not None: | |
if len(re.findall(VALUE_REGEX, accum[:-1])) == 1: | |
value = accum[:-1] | |
d[key] = value | |
key = None | |
value = None | |
accum = '' | |
else: | |
raise JSONException("The file syntax is malformed: %s" % (accum)) | |
pos += 1 | |
else: | |
raise JSONException("The file syntax is malformed %s" % (content)) | |
return d | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment