Skip to content

Instantly share code, notes, and snippets.

@YaYaB
Last active January 10, 2020 11:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save YaYaB/ea60604bb655c953750ef58573e1d1cb to your computer and use it in GitHub Desktop.
Save YaYaB/ea60604bb655c953750ef58573e1d1cb to your computer and use it in GitHub Desktop.
Transform a json document to a ljson document (one json per line)
import json
import os
from glob import glob
input_doc = "TODO" # can be a json doc or a folder containing only jsons
output_ljson = "{}_output.ljson".format('.'.join(input_doc.split('.')[:-1]))
def transform_input_to_dumpable(input_doc):
# Read input
with open(input_doc) as f:
content = json.load(f)
# Transform json object into a json array in case we have only one element
if isinstance(content, dict):
content = [content]
return content
# Parse json content and write in the output_file
with open(output_ljson, 'w') as f:
input_docs = [input_doc] # by default take the file
# If input is directory
if os.path.isdir(input_doc):
input_docs = glob("{}/*".format(input_doc))
# Parse every json file
for doc in input_docs:
content = transform_input_to_dumpable(doc)
for elem in content:
f.write(json.dumps(elem) + "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment