Skip to content

Instantly share code, notes, and snippets.

@kdeloach
Last active November 3, 2015 20:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kdeloach/510a29c0aeefc108a7fc to your computer and use it in GitHub Desktop.
Save kdeloach/510a29c0aeefc108a7fc to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
Coalesce a single file containing multiple JS arrays (undelimited) into
one giant JS array.
Ex. "[{...}]\n[{..}]" becomes "[{...},{...}]
This assumes each JS array contains a list of JS object literals.
Usage:
> coalesce.py broken.json > fixed.json
"""
import os
import sys
import re
# Source: http://stackoverflow.com/questions/3862010/is-there-a-generator-version-of-string-split-in-python
def itersplit(s, sep=None):
exp = re.compile(r'\s+' if sep is None else re.escape(sep))
pos = 0
while True:
m = exp.search(s, pos)
if not m:
if pos < len(s) or sep is not None:
yield s[pos:]
break
if pos < m.start() or sep is not None:
yield s[pos:m.start()]
pos = m.end()
contents = open(sys.argv[1], 'r').read()
i = 0
sys.stdout.write('[')
for line in itersplit(contents, '\n'):
# Strip leading and trailing characters.
line = line.strip(',[]')
# Filter out empty rows.
if len(line) == 0:
continue
# Filter out rows that do not contain whole JSON objects.
if line[0] != '{':
continue
if line[len(line) - 1] != '}':
continue
if i > 0:
sys.stdout.write('\n,')
sys.stdout.write(line)
i += 1
sys.stdout.write(']')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment