Skip to content

Instantly share code, notes, and snippets.

@ajparsons
Created April 15, 2020 11:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ajparsons/1f6a2cd3ffc455543cfb92099323ac6d to your computer and use it in GitHub Desktop.
Save ajparsons/1f6a2cd3ffc455543cfb92099323ac6d to your computer and use it in GitHub Desktop.
# Roundtrip json bods to xlsx
import os
import openpyxl
import json
from tempfile import TemporaryDirectory
from flattentool import flatten, unflatten
def split_xlsx(xlsx_source, dest_folder):
statement_types = ["entity",
"person",
"ownershipOrControl"]
print (xlsx_source)
book = openpyxl.load_workbook(xlsx_source)
for s in statement_types:
prefix = s + "_"
new = openpyxl.Workbook()
for sheet in book.worksheets:
if prefix in sheet.title:
new_title = sheet.title[len(prefix):]
new_sheet = new.create_sheet(new_title)
for row in sheet:
for cell in row:
new_sheet[cell.coordinate].value = cell.value
new.save(os.path.join(dest_folder, s + ".xlsx"))
def fix_boolean(data):
if isinstance(data, list):
return [fix_boolean(x) for x in data]
elif isinstance(data, dict):
return {x: fix_boolean(y) for x, y in data.items()}
elif isinstance(data, str):
if data == "True":
return True
elif data == "False":
return False
else:
return data
else:
return data
def create_xlsx(bods_source, xlsx_destination=None):
if xlsx_destination is None:
folder = os.path.split(bods_source)[0]
file_name = os.path.splitext(os.path.split(bods_source)[1])[0]
xlsx_destination = os.path.join(folder, file_name + ".xlsx")
statement_types = ["entity",
"person",
"ownershipOrControl"]
final = openpyxl.Workbook()
with TemporaryDirectory() as temp_folder:
for s in statement_types:
dest = os.path.join(temp_folder, "{0}.xlsx".format(s))
flatten(input_name=bods_source,
output_format="xlsx",
output_name=dest,
root_is_list=True,
id_name="statementID",
sheet_prefix=s + "_",
filter_field="statementType",
filter_value=s + "Statement")
book = openpyxl.load_workbook(dest)
for sheet in book.worksheets:
new_sheet = final.create_sheet(sheet.title)
for row in sheet:
for cell in row:
new_sheet[cell.coordinate].value = cell.value
final.remove(final["Sheet"])
final.active = 0
final.save(xlsx_destination)
def json_from_xlsx(xlsx_source, json_destination=None):
statement_types = ["entity",
"person",
"ownershipOrControl"]
if json_destination is None:
folder = os.path.split(xlsx_source)[0]
file_name = os.path.splitext(os.path.split(xlsx_source)[1])[0]
json_destination = os.path.join(folder, file_name + ".json")
data = []
with TemporaryDirectory() as temp_folder:
split_xlsx(xlsx_source, temp_folder)
for s in statement_types:
segment_filename = os.path.join(temp_folder, s + ".xlsx")
segement_destination = os.path.join(temp_folder, s + ".json")
unflatten(input_name=segment_filename,
input_format="xlsx",
root_is_list=True,
id_name="statementID",
output_name=segement_destination
)
with open(segement_destination) as json_file:
data.extend(json.load(json_file))
data = fix_boolean(data)
with open(json_destination, 'w') as outfile:
json.dump(data, outfile, indent=4)
if __name__ == "__main__":
folder = ""
source_file = r"chiron.json"
# create_xlsx(os.path.join(folder, "chiron.json"))
json_from_xlsx(os.path.join(folder, "chiron_flatten.xlsx"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment