Skip to content

Instantly share code, notes, and snippets.

@tankala
Last active March 2, 2022 08:31
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tankala/2f0ac895e063ddaef239b9dca16f11c8 to your computer and use it in GitHub Desktop.
Save tankala/2f0ac895e063ddaef239b9dca16f11c8 to your computer and use it in GitHub Desktop.
Write/Convert Nested JSON data to CSV for all keys or specific keys. For more details read my article https://blog.tanka.la/2020/03/29/write-convert-nested-json-data-to-csv-for-specific-subset-keysheaders/
[
{
"_id": "5e7f55e2f065ef934a048d32",
"index": 0,
"guid": "88647c07-bf2a-42c3-8e23-cec1ed731f2f",
"isActive": false,
"balance": "$1,254.74",
"picture": "http://placehold.it/32x32",
"age": 40,
"eyeColor": "green",
"name": "Laverne Robles",
"gender": "female",
"company": "COMVEY",
"email": "lavernerobles@comvey.com",
"phone": "+1 (898) 413-3041",
"address": "783 Livingston Street, Hillsboro, District Of Columbia, 1897",
"about": "Anim ut sunt consectetur irure irure sit consectetur et laboris minim. Et tempor excepteur aute consequat in sunt aliquip in. Proident non et reprehenderit consequat ea adipisicing. Nisi excepteur Lorem irure pariatur fugiat occaecat labore officia. Quis consectetur commodo velit aliquip cupidatat et esse.\r\n",
"registered": "2019-04-13T02:41:28 -06:-30",
"latitude": 85.172936,
"longitude": 40.103047,
"tags": [
"anim",
"cupidatat",
"excepteur",
"ea",
"do",
"ipsum",
"consectetur"
],
"friends": [
{
"1": "Dawn Lynch"
},
{
"2": "Rhodes Pacheco"
}
],
"greeting": "Hello, Laverne Robles! You have 4 unread messages.",
"favoriteFruit": "banana"
},
{
"_id": "5e7f55e272c1af3e99b6bf78",
"index": 1,
"guid": "0cb01bc1-c468-4bd4-9d1f-fcf52538f095",
"isActive": false,
"balance": "$2,697.51",
"picture": "http://placehold.it/32x32",
"age": 23,
"eyeColor": "brown",
"name": "Holder Hickman",
"gender": "male",
"company": "XINWARE",
"email": "holderhickman@xinware.com",
"phone": "+1 (934) 547-3070",
"address": "671 Claver Place, Robinette, Vermont, 710",
"about": "Ea et do qui aliqua laborum sunt mollit enim dolore aliqua. Elit eiusmod nulla reprehenderit aliqua irure consectetur culpa. Laborum cillum dolore culpa Lorem consequat sunt officia ut id dolore reprehenderit sint et.\r\n",
"registered": "2016-05-10T08:20:36 -06:-30",
"latitude": 10.192942,
"longitude": 74.879352,
"tags": [
"laboris",
"laborum",
"ullamco",
"laborum",
"occaecat",
"ad",
"mollit"
],
"friends": [
{
"1": "Ryan Ellis"
}
],
"greeting": "Hello, Holder Hickman! You have 5 unread messages.",
"favoriteFruit": "strawberry"
},
{
"_id": "5e7f55e235816416478ec377",
"index": 2,
"guid": "247a0687-b1f8-4b38-86b2-0eacb3fdab28",
"isActive": true,
"balance": "$2,269.84",
"picture": "http://placehold.it/32x32",
"age": 37,
"eyeColor": "blue",
"name": "Minnie Woods",
"gender": "female",
"company": "EPLOSION",
"email": "minniewoods@eplosion.com",
"phone": "+1 (806) 460-3043",
"address": "197 Ocean Court, Blue, Federated States Of Micronesia, 7113",
"about": "Ex officia occaecat incididunt commodo Lorem. Minim amet fugiat labore qui. Eu culpa culpa magna voluptate aute est tempor amet velit id est est. Laborum proident esse excepteur voluptate reprehenderit. Laborum ullamco commodo qui Lorem veniam id reprehenderit.\r\n",
"registered": "2018-06-18T06:37:45 -06:-30",
"latitude": 80.884237,
"longitude": -171.336369,
"tags": [
"Lorem",
"laborum",
"mollit",
"consequat",
"dolore",
"nostrud",
"anim"
],
"friends": [
{
"1": "Jolene Franco"
},
{
"2": "Gardner Boyle"
},
{
"3": "Alisa Carlson"
}
],
"greeting": "Hello, Minnie Woods! You have 2 unread messages.",
"favoriteFruit": "banana"
},
{
"_id": "5e7f55e222ba00311b60b946",
"index": 3,
"guid": "f80d3b7f-28c0-47a6-ad53-6d4d28671a29",
"isActive": false,
"balance": "$3,801.17",
"picture": "http://placehold.it/32x32",
"age": 37,
"name": "Ford Melton",
"gender": "male",
"company": "DIGIFAD",
"email": "fordmelton@digifad.com",
"phone": "+1 (881) 494-2384",
"address": "650 Nassau Avenue, Shasta, Georgia, 7589",
"about": "Consequat sint id sunt cupidatat quis proident. Elit sit ipsum aliqua amet. Anim qui incididunt culpa anim eu culpa irure deserunt non. Sint occaecat deserunt non minim ut in id voluptate ullamco laborum consequat sunt excepteur ex.\r\n",
"registered": "2019-03-04T02:35:04 -06:-30",
"latitude": -25.65117,
"longitude": 115.060646,
"tags": [
"anim",
"magna",
"deserunt",
"eu",
"cupidatat",
"ullamco",
"duis"
],
"friends": [
{
"1": "Thornton Gilmore"
},
{
"2": "Diana Everett"
},
{
"3": "Le Reynolds"
}
],
"greeting": "Hello, Ford Melton! You have 5 unread messages.",
"favoriteFruit": "apple"
},
{
"_id": "5e7f55e26964f0e4a752656c",
"index": 4,
"guid": "9161a683-9640-4472-a5b9-a673f7ec4c68",
"isActive": true,
"balance": "$3,823.16",
"picture": "http://placehold.it/32x32",
"age": 35,
"name": "Foley Lucas",
"gender": "male",
"company": "PHOLIO",
"email": "foleylucas@pholio.com",
"phone": "+1 (824) 468-3510",
"address": "426 Kingsland Avenue, Cucumber, Puerto Rico, 540",
"about": "Enim dolore aliquip proident et excepteur consectetur non minim excepteur qui. Labore quis minim aute laborum ad. Dolore velit sit officia nostrud tempor ullamco magna non reprehenderit consectetur sint nulla. Eiusmod nisi ad exercitation nulla velit fugiat. Tempor laborum elit veniam aliqua incididunt quis magna ex adipisicing magna. Culpa mollit voluptate dolor labore nisi Lorem incididunt. Sit labore qui labore esse eu nulla consectetur exercitation duis et in excepteur commodo.\r\n",
"registered": "2018-09-21T08:17:37 -06:-30",
"latitude": 77.763657,
"longitude": 85.236121,
"tags": [
"aliqua",
"velit",
"mollit",
"aute",
"dolor",
"exercitation",
"dolore"
],
"greeting": "Hello, Foley Lucas! You have 9 unread messages.",
"favoriteFruit": "strawberry"
}
]
import json # For JSON loading
import csv # For CSV dict writer
def get_leaves(item, key=None, key_prefix=""):
"""
This function converts nested dictionary structure to flat
"""
if isinstance(item, dict):
leaves = {}
"""Iterates the dictionary and go to leaf node after that calls to get_leaves function recursively to go to leaves level"""
for item_key in item.keys():
"""Some times leaves and parents or some other leaves might have same key that's why adding leave node key to distinguish"""
temp_key_prefix = (
item_key if (key_prefix == "") else (key_prefix + "_" + str(item_key))
)
leaves.update(get_leaves(item[item_key], item_key, temp_key_prefix))
return leaves
elif isinstance(item, list):
leaves = {}
elements = []
"""Iterates the list and go to leaf node after that if it is leave then simply add value to current key's list or
calls to get_leaves function recursively to go to leaves level"""
for element in item:
if isinstance(element, dict) or isinstance(element, list):
leaves.update(get_leaves(element, key, key_prefix))
else:
elements.append(element)
if len(elements) > 0:
leaves[key] = elements
return leaves
else:
return {key_prefix: item}
with open("data.json") as f_input, open("output.csv", "w", newline="") as f_output:
json_data = json.load(f_input, strict=False)
"""'First parse all entries to get the unique fieldnames why because already we have file in RAM level and
if we put each dictionary after parsing in list or some data structure it will crash your system due to memory constraint
that's why first we will get the keys first then we convert each dictionary and put it to CSV"""
fieldnames = set()
for entry in json_data:
fieldnames.update(get_leaves(entry).keys())
csv_output = csv.DictWriter(f_output, delimiter=";", fieldnames=sorted(fieldnames))
csv_output.writeheader()
csv_output.writerows(get_leaves(entry) for entry in json_data)
import json # For JSON loading
import csv # For CSV dict writer
def get_leaves(item, key=None, key_prefix=""):
"""
This function converts nested dictionary structure to flat
"""
if isinstance(item, dict):
leaves = {}
"""Iterates the dictionary and go to leaf node after that calls to get_leaves function recursively to go to leaves level"""
for item_key in item.keys():
"""Some times leaves and parents or some other leaves might have same key that's why adding leave node key to distinguish"""
temp_key_prefix = (
item_key if (key_prefix == "") else (key_prefix + "_" + str(item_key))
)
leaves.update(get_leaves(item[item_key], item_key, temp_key_prefix))
return leaves
elif isinstance(item, list):
leaves = {}
elements = []
"""Iterates the list and go to leaf node after that if it is leave then simply add value to current key's list or
calls to get_leaves function recursively to go to leaves level"""
for element in item:
if isinstance(element, dict) or isinstance(element, list):
leaves.update(get_leaves(element, key, key_prefix))
else:
elements.append(element)
if len(elements) > 0:
leaves[key] = elements
return leaves
else:
return {key_prefix: item}
field_names = [
"age",
"balance",
"company",
"eyeColor",
"favoriteFruit",
"friends_1",
"friends_2",
"friends_3",
"gender",
"greeting",
"isActive",
"latitude",
"longitude",
"registered",
"tags"
]
with open("data.json") as f_input, open("output.csv", "w", newline="") as f_output:
json_data = json.load(f_input, strict=False)
'''If dictionary have keys more than we mentioned in fieldnames it will give error
to ignore the keys which we don't have we need to use extrasaction="ignore"'''
csv_output = csv.DictWriter(f_output, delimiter=";", fieldnames=field_names, extrasaction="ignore")
csv_output.writeheader()
csv_output.writerows(get_leaves(entry) for entry in json_data)
@tankala
Copy link
Author

tankala commented Mar 2, 2022

Looking for a generic solution to convert nested JSON file to CSV in Python

Here you want to run for Data field right. You need to tweak the code take the Data field value and proceed further.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment