Skip to content

Instantly share code, notes, and snippets.

@zelaznik
Last active October 20, 2021 16:32
Show Gist options
  • Save zelaznik/968f74ce4b7f32d5fd46360a406b032f to your computer and use it in GitHub Desktop.
Save zelaznik/968f74ce4b7f32d5fd46360a406b032f to your computer and use it in GitHub Desktop.
Convert JSON to CSV in Python
#!/usr/bin/python3
import csv
import io
import json
import pdb
import sys
from collections import OrderedDict
def serialize_cell(cell):
if isinstance(cell, dict) or isinstance(cell, list):
return json.dumps(cell)
else:
return cell
def post_process(row, prefix=()):
new_row = OrderedDict()
for key, value in row.items():
if isinstance(value, dict):
subdict = post_process(value, prefix + (key,))
for subkey, subval in subdict.items():
new_row[subkey] = subval
else:
new_key = ".".join(prefix + (key,))
new_row[new_key] = serialize_cell(value)
return new_row
def get_master_keyset(rows):
aggregate = set()
for row in rows:
aggregate = aggregate | set(row)
return tuple(sorted(aggregate))
def json_string_to_csv(raw):
as_json = json.loads(raw, object_pairs_hook=OrderedDict)
key_sets = {tuple(row) for row in as_json}
if len(key_sets) > 1:
raise RuntimeError("Inconsistent keys across rows")
(key_set,) = key_sets
processed = [post_process(row) for row in as_json]
master_keyset = get_master_keyset(processed)
def orderer(key):
prefix = key.split(".")[0]
return (key not in key_set, key_set.index(prefix), key)
sorted_keyset = tuple(sorted(master_keyset, key=orderer))
buffer = io.StringIO()
dict_writer = csv.DictWriter(buffer, sorted_keyset)
dict_writer.writeheader()
dict_writer.writerows(processed)
return buffer.getvalue()
def main():
raw_as_json = sys.stdin.read()
raw_as_csv = json_string_to_csv(raw_as_json)
print(raw_as_csv)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment