Skip to content

Instantly share code, notes, and snippets.

@magical
Last active August 29, 2015 14:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save magical/2b7bc306369b81f033ed to your computer and use it in GitHub Desktop.
Save magical/2b7bc306369b81f033ed to your computer and use it in GitHub Desktop.
import re
import time
quotefix_re = re.compile(
r'''
^(
(?:
(?:
(?:[^\n\\"]|(?:\\{2})*\\"|\\[^\n"])*
(?:\\{2})*
"
){2}
)*
((?:[^\n\\"]|(?:\\{2})*\\"|\\[^\n"])*)
"
((?:[^\n\\"]|(?:\\{2})*\\"|\\[^\n"])*)
)$\n
''',
flags=re.MULTILINE | re.VERBOSE
)
def sb_json_regex(raw):
while True:
raw, n = quotefix_re.subn(r'\1\\n', raw, count=1)
if n == 0:
break
return raw
def sb_json_boring(s):
out = []
state = ''
for c in s:
if state == '':
if c == '"':
out.append(c)
state = 'string'
else:
out.append(c)
elif state == 'string':
if c == '"':
out.append(c)
state = ''
elif c == '\n':
out.append('\\')
out.append('n')
elif c == '\\':
out.append(c)
state = 'escape'
else:
out.append(c)
elif state == 'escape':
out.append(c)
state = 'string'
else:
assert 0, "impossible"
return ''.join(out)
def timeit(f, name, s):
now = time.time()
f(s)
t = time.time() - now
r = s
if len(r) > 20:
r = r[:20] + "..."
print("%s: s=%r len(s)=%d: %fs" % (name, r, len(s), t))
def do(s):
timeit(sb_json_regex, "regex", s)
timeit(sb_json_boring, "boring", s)
assert sb_json_regex(s) == sb_json_boring(s)
do("{}")
do("""{"hello": "world"}""")
do(r"""{"\"\\ \n": ""}""")
do("{\n" + """ "this\nis an example": "of a very\nlong json file\nwith embedded newlines",\n""" * 100 + """ "the": "end" } """)
regex: s='{}' len(s)=2: 0.000076s
boring: s='{}' len(s)=2: 0.000005s
regex: s='{"hello": "world"}' len(s)=18: 0.000034s
boring: s='{"hello": "world"}' len(s)=18: 0.000013s
regex: s='{"\\"\\\\ \\n": ""}' len(s)=15: 0.000026s
boring: s='{"\\"\\\\ \\n": ""}' len(s)=15: 0.000011s
regex: s='{\n "this\nis an exa...' len(s)=7618: 0.855799s
boring: s='{\n "this\nis an exa...' len(s)=7618: 0.003821s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment