Skip to content

Instantly share code, notes, and snippets.

@skliarpawlo
Created July 16, 2015 12:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save skliarpawlo/a225a5fb952190271d51 to your computer and use it in GitHub Desktop.
Save skliarpawlo/a225a5fb952190271d51 to your computer and use it in GitHub Desktop.
Parse json from piece of html/script
def _get_json_from_string(s):
"""Parses first found json object from string (piece of html).
>>> _get_json_from_string('{"a": "b"}')
{u'a': u'b'}
>>> _get_json_from_string('dskljasd{"a": "b"}asdkljasd')
{u'a': u'b'}
>>> _get_json_from_string('require(asd=[{"a": "b"}, {"c":"d"}]spamspamspam)')
[{u'a': u'b'}, {u'c': u'd'}]
>>> _get_json_from_string('{"a": [1,2,{"b":"d[1,2,3]sd"}]}')
{u'a': [1, 2, {u'b': u'd[1,2,3]sd'}]}
>>> _get_json_from_string('xxxxxx')
>>> _get_json_from_string('{{}')
Traceback (most recent call last):
...
RuntimeError: Incorrect json format: not all brances closed
>>> _get_json_from_string('{]{}')
Traceback (most recent call last):
...
RuntimeError: Incorrect json format
"""
stack = []
start_idx = 0
while start_idx < len(s) and not s[start_idx] in {'{', '['}:
start_idx += 1
if start_idx == len(s):
return None
end_idx = start_idx
open_close_map = {
'{': '}',
'[': ']',
}
in_quotes = False
slashed_next = False
for c in s[start_idx:]:
end_idx += 1
if in_quotes:
if c == '"' and not slashed_next:
in_quotes = False
slashed_next = False
if c == '\\' and not slashed_next:
slashed_next = True
else:
slashed_next = False
elif c == '"':
in_quotes = True
elif c in {'[', '{'}:
stack.append(c)
elif c in {']', '}'}:
opener = stack.pop()
if open_close_map[opener] != c:
raise RuntimeError('Incorrect json format')
if not stack:
break
if stack:
raise RuntimeError('Incorrect json format: not all brances closed')
return json.loads(s[start_idx:end_idx])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment