Created
January 8, 2017 00:55
-
-
Save mahmoud/dd5394625b202a66a4b15e4028eec75b to your computer and use it in GitHub Desktop.
for when jsonl doesn't go right
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
def main(): | |
inp = open('briefings.jsonl').read() | |
out_f = open('briefings.real.jsonl', 'wb') | |
start = 0 | |
cur_token_beg = 0 | |
end = None | |
while start < len(inp): | |
end = inp.find('}{', cur_token_beg) + 1 or len(inp) | |
try: | |
cur_obj = json.loads(inp[start:end]) | |
except Exception as e: | |
cur_token_beg = end | |
print e | |
import pdb;pdb.set_trace() | |
continue | |
print 'found an object', cur_obj['title'] | |
out_f.write(json.dumps(cur_obj, sort_keys=True)) | |
out_f.write('\n') | |
start = cur_token_beg = end | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment