Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
This script reads n numbers of json files present in a folder and then extract certain data from each file and write in a csv file.
"""json_to_csv.py
This script reads n numbers of json files present in a folder and then extract certain data from each file and write in a csv file.
The folder contains the python script i.e. json_to_csv.py, output.csv and another folder descriptions containing all the json files.
"""
import os
import json
import csv
def get_list_of_json_files():
"""Returns the list of filenames of all the Json files present in the folder
Parameter
---------
directory : str
'descriptions' in this case
Returns
-------
list_of_files: list
List of the filenames of all the json files
"""
list_of_files = os.listdir('descriptions') # creates list of all the files in the folder
return list_of_files
def create_list_from_json(jsonfile):
"""Returns a list of the extracted items from json file in the same order we need it.
Parameter
_________
jsonfile : json
The json file containing the data
Returns
-------
one_sample_list : list
The list of the extracted items needed for the final csv
"""
with open(jsonfile) as f:
data = json.load(f)
data_list = [] # create an empty list
# append the items to the list in the same order.
data_list.append(data['_id'])
data_list.append(data['_modelType'])
data_list.append(data['creator']['_id'])
data_list.append(data['creator']['name'])
data_list.append(data['dataset']['_accessLevel'])
data_list.append(data['dataset']['_id'])
data_list.append(data['dataset']['description'])
data_list.append(data['dataset']['name'])
data_list.append(data['meta']['acquisition']['image_type'])
data_list.append(data['meta']['acquisition']['pixelsX'])
data_list.append(data['meta']['acquisition']['pixelsY'])
data_list.append(data['meta']['clinical']['age_approx'])
data_list.append(data['meta']['clinical']['benign_malignant'])
data_list.append(data['meta']['clinical']['diagnosis'])
data_list.append(data['meta']['clinical']['diagnosis_confirm_type'])
data_list.append(data['meta']['clinical']['melanocytic'])
data_list.append(data['meta']['clinical']['sex'])
data_list.append(data['meta']['unstructured']['diagnosis'])
# In few json files, the race was not there so using KeyError exception to add '' at the place
try:
data_list.append(data['meta']['unstructured']['race'])
except KeyError:
data_list.append("") # will add an empty string in case race is not there.
data_list.append(data['name'])
return data_list
def write_csv():
"""Creates the desired csv file
Parameters
__________
list_of_files : file
The list created by get_list_of_json_files() method
result.csv : csv
The csv file containing the header only
Returns
_______
result.csv : csv
The desired csv file
"""
list_of_files = get_list_of_json_files()
for file in list_of_files:
row = create_list_from_json(f'descriptions/{file}') # create the row to be added to csv for each file (json-file)
with open('output.csv', 'a') as c:
writer = csv.writer(c)
writer.writerow(row)
c.close()
if __name__ == '__main__':
write_csv()
@pedrodiazl

This comment has been minimized.

Copy link

pedrodiazl commented Jun 18, 2019

Hello, thanks very much for this helpful guide.
When I run this script on Python, I get the following error:
TypeError: list indices must be integers or slices, not str
How can I index through the objects without using the index numbers?
Thanks!

@iamsaaj201

This comment has been minimized.

Copy link

iamsaaj201 commented Nov 6, 2019

I am also getting below error. Can you please suggest.
"TypeError: list indices must be integers, not str"

@gauthamkorada

This comment has been minimized.

Copy link

gauthamkorada commented Feb 13, 2020

Hello, I am seeing the same error - TypeError: list indices must be integers or slices, not str
Can you please suggest how to fix this error ?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.