Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
This script reads n numbers of json files present in a folder and then extract certain data from each file and write in a csv file.
"""json_to_csv.py
This script reads n numbers of json files present in a folder and then extract certain data from each file and write in a csv file.
The folder contains the python script i.e. json_to_csv.py, output.csv and another folder descriptions containing all the json files.
"""
import os
import json
import csv
def get_list_of_json_files():
"""Returns the list of filenames of all the Json files present in the folder
Parameter
---------
directory : str
'descriptions' in this case
Returns
-------
list_of_files: list
List of the filenames of all the json files
"""
list_of_files = os.listdir('descriptions') # creates list of all the files in the folder
return list_of_files
def create_list_from_json(jsonfile):
"""Returns a list of the extracted items from json file in the same order we need it.
Parameter
_________
jsonfile : json
The json file containing the data
Returns
-------
one_sample_list : list
The list of the extracted items needed for the final csv
"""
with open(jsonfile) as f:
data = json.load(f)
data_list = [] # create an empty list
# append the items to the list in the same order.
data_list.append(data['_id'])
data_list.append(data['_modelType'])
data_list.append(data['creator']['_id'])
data_list.append(data['creator']['name'])
data_list.append(data['dataset']['_accessLevel'])
data_list.append(data['dataset']['_id'])
data_list.append(data['dataset']['description'])
data_list.append(data['dataset']['name'])
data_list.append(data['meta']['acquisition']['image_type'])
data_list.append(data['meta']['acquisition']['pixelsX'])
data_list.append(data['meta']['acquisition']['pixelsY'])
data_list.append(data['meta']['clinical']['age_approx'])
data_list.append(data['meta']['clinical']['benign_malignant'])
data_list.append(data['meta']['clinical']['diagnosis'])
data_list.append(data['meta']['clinical']['diagnosis_confirm_type'])
data_list.append(data['meta']['clinical']['melanocytic'])
data_list.append(data['meta']['clinical']['sex'])
data_list.append(data['meta']['unstructured']['diagnosis'])
# In few json files, the race was not there so using KeyError exception to add '' at the place
try:
data_list.append(data['meta']['unstructured']['race'])
except KeyError:
data_list.append("") # will add an empty string in case race is not there.
data_list.append(data['name'])
return data_list
def write_csv():
"""Creates the desired csv file
Parameters
__________
list_of_files : file
The list created by get_list_of_json_files() method
result.csv : csv
The csv file containing the header only
Returns
_______
result.csv : csv
The desired csv file
"""
list_of_files = get_list_of_json_files()
for file in list_of_files:
row = create_list_from_json(f'descriptions/{file}') # create the row to be added to csv for each file (json-file)
with open('output.csv', 'a') as c:
writer = csv.writer(c)
writer.writerow(row)
c.close()
if __name__ == '__main__':
write_csv()
@pedrodiazl

This comment has been minimized.

Copy link

pedrodiazl commented Jun 18, 2019

Hello, thanks very much for this helpful guide.
When I run this script on Python, I get the following error:
TypeError: list indices must be integers or slices, not str
How can I index through the objects without using the index numbers?
Thanks!

@iamsaaj201

This comment has been minimized.

Copy link

iamsaaj201 commented Nov 6, 2019

I am also getting below error. Can you please suggest.
"TypeError: list indices must be integers, not str"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.