Skip to content

Instantly share code, notes, and snippets.

@liftoff
Last active January 29, 2016 19:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save liftoff/261bf0b75c9884259d86 to your computer and use it in GitHub Desktop.
Save liftoff/261bf0b75c9884259d86 to your computer and use it in GitHub Desktop.
Clean up JSON-like data before decoding as JSON
import os, re, sys
comments_re = re.compile(
r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
re.DOTALL | re.MULTILINE
)
trailing_commas_re = re.compile(
r'(,)\s*}(?=([^"\\]*(\\.|"([^"\\]*\\.)*[^"\\]*"))*[^"]*$)')
def remove_comments(json_like):
"""
Removes C-style comments from *json_like* and returns the result. Example::
>>> test_json = '''\
{
"foo": "bar", // This is a single-line comment
"baz": "blah" /* Multi-line
Comment */
}'''
>>> remove_comments('{"foo":"bar","baz":"blah",}')
'{\n "foo":"bar",\n "baz":"blah"\n}'
"""
def replacer(match):
s = match.group(0)
if s[0] == '/': return ""
return s
return comments_re.sub(replacer, json_like)
def remove_trailing_commas(json_like):
"""
Removes trailing commas from *json_like* and returns the result. Example::
>>> remove_trailing_commas('{"foo":"bar","baz":"blah",}')
'{"foo":"bar","baz":"blah"}'
"""
return trailing_commas_re.sub("}", json_like)
# These two functions together can be used like so:
with open('some_file.json') as f:
almost_json = remove_comments(f.read()) # Remove comments
proper_json = remove_trailing_commas(almost_json) # Remove trailing commas
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment