ajparsons/json_bods_flatten.py

## json_bods_flatten.py
# Roundtrip json bods to xlsx

import os
import openpyxl
import json

from tempfile import TemporaryDirectory
from flattentool import flatten, unflatten


def split_xlsx(xlsx_source, dest_folder):
    statement_types = ["entity",
                       "person",
                       "ownershipOrControl"]

    print (xlsx_source)
    book = openpyxl.load_workbook(xlsx_source)

    for s in statement_types:
        prefix = s + "_"
        new = openpyxl.Workbook()
        for sheet in book.worksheets:
            if prefix in sheet.title:
                new_title = sheet.title[len(prefix):]
                new_sheet = new.create_sheet(new_title)
                for row in sheet:
                    for cell in row:
                        new_sheet[cell.coordinate].value = cell.value
        new.save(os.path.join(dest_folder, s + ".xlsx"))


def fix_boolean(data):
    if isinstance(data, list):
        return [fix_boolean(x) for x in data]
    elif isinstance(data, dict):
        return {x: fix_boolean(y) for x, y in data.items()}
    elif isinstance(data, str):
        if data == "True":
            return True
        elif data == "False":
            return False
        else:
            return data
    else:
        return data


def create_xlsx(bods_source, xlsx_destination=None):

    if xlsx_destination is None:
        folder = os.path.split(bods_source)[0]
        file_name = os.path.splitext(os.path.split(bods_source)[1])[0]
        xlsx_destination = os.path.join(folder, file_name + ".xlsx")

    statement_types = ["entity",
                       "person",
                       "ownershipOrControl"]

    final = openpyxl.Workbook()

    with TemporaryDirectory() as temp_folder:
        for s in statement_types:

            dest = os.path.join(temp_folder, "{0}.xlsx".format(s))

            flatten(input_name=bods_source,
                    output_format="xlsx",
                    output_name=dest,
                    root_is_list=True,
                    id_name="statementID",
                    sheet_prefix=s + "_",
                    filter_field="statementType",
                    filter_value=s + "Statement")

            book = openpyxl.load_workbook(dest)
            for sheet in book.worksheets:
                new_sheet = final.create_sheet(sheet.title)
                for row in sheet:
                    for cell in row:
                        new_sheet[cell.coordinate].value = cell.value

    final.remove(final["Sheet"])
    final.active = 0
    final.save(xlsx_destination)


def json_from_xlsx(xlsx_source, json_destination=None):
    statement_types = ["entity",
                       "person",
                       "ownershipOrControl"]

    if json_destination is None:
        folder = os.path.split(xlsx_source)[0]
        file_name = os.path.splitext(os.path.split(xlsx_source)[1])[0]
        json_destination = os.path.join(folder, file_name + ".json")

    data = []

    with TemporaryDirectory() as temp_folder:
        split_xlsx(xlsx_source, temp_folder)
        for s in statement_types:
            segment_filename = os.path.join(temp_folder, s + ".xlsx")
            segement_destination = os.path.join(temp_folder, s + ".json")
            unflatten(input_name=segment_filename,
                      input_format="xlsx",
                      root_is_list=True,
                      id_name="statementID",
                      output_name=segement_destination
                      )

            with open(segement_destination) as json_file:
                data.extend(json.load(json_file))

    data = fix_boolean(data)
    with open(json_destination, 'w') as outfile:
        json.dump(data, outfile, indent=4)


if __name__ == "__main__":
    folder = ""
    source_file = r"chiron.json"
    # create_xlsx(os.path.join(folder, "chiron.json"))
    json_from_xlsx(os.path.join(folder, "chiron_flatten.xlsx"))
	# Roundtrip json bods to xlsx

	import os
	import openpyxl
	import json

	from tempfile import TemporaryDirectory
	from flattentool import flatten, unflatten


	def split_xlsx(xlsx_source, dest_folder):
	statement_types = ["entity",
	"person",
	"ownershipOrControl"]

	print (xlsx_source)
	book = openpyxl.load_workbook(xlsx_source)

	for s in statement_types:
	prefix = s + "_"
	new = openpyxl.Workbook()
	for sheet in book.worksheets:
	if prefix in sheet.title:
	new_title = sheet.title[len(prefix):]
	new_sheet = new.create_sheet(new_title)
	for row in sheet:
	for cell in row:
	new_sheet[cell.coordinate].value = cell.value
	new.save(os.path.join(dest_folder, s + ".xlsx"))


	def fix_boolean(data):
	if isinstance(data, list):
	return [fix_boolean(x) for x in data]
	elif isinstance(data, dict):
	return {x: fix_boolean(y) for x, y in data.items()}
	elif isinstance(data, str):
	if data == "True":
	return True
	elif data == "False":
	return False
	else:
	return data
	else:
	return data


	def create_xlsx(bods_source, xlsx_destination=None):

	if xlsx_destination is None:
	folder = os.path.split(bods_source)[0]
	file_name = os.path.splitext(os.path.split(bods_source)[1])[0]
	xlsx_destination = os.path.join(folder, file_name + ".xlsx")

	statement_types = ["entity",
	"person",
	"ownershipOrControl"]

	final = openpyxl.Workbook()

	with TemporaryDirectory() as temp_folder:
	for s in statement_types:

	dest = os.path.join(temp_folder, "{0}.xlsx".format(s))

	flatten(input_name=bods_source,
	output_format="xlsx",
	output_name=dest,
	root_is_list=True,
	id_name="statementID",
	sheet_prefix=s + "_",
	filter_field="statementType",
	filter_value=s + "Statement")

	book = openpyxl.load_workbook(dest)
	for sheet in book.worksheets:
	new_sheet = final.create_sheet(sheet.title)
	for row in sheet:
	for cell in row:
	new_sheet[cell.coordinate].value = cell.value

	final.remove(final["Sheet"])
	final.active = 0
	final.save(xlsx_destination)


	def json_from_xlsx(xlsx_source, json_destination=None):
	statement_types = ["entity",
	"person",
	"ownershipOrControl"]

	if json_destination is None:
	folder = os.path.split(xlsx_source)[0]
	file_name = os.path.splitext(os.path.split(xlsx_source)[1])[0]
	json_destination = os.path.join(folder, file_name + ".json")

	data = []

	with TemporaryDirectory() as temp_folder:
	split_xlsx(xlsx_source, temp_folder)
	for s in statement_types:
	segment_filename = os.path.join(temp_folder, s + ".xlsx")
	segement_destination = os.path.join(temp_folder, s + ".json")
	unflatten(input_name=segment_filename,
	input_format="xlsx",
	root_is_list=True,
	id_name="statementID",
	output_name=segement_destination
	)

	with open(segement_destination) as json_file:
	data.extend(json.load(json_file))

	data = fix_boolean(data)
	with open(json_destination, 'w') as outfile:
	json.dump(data, outfile, indent=4)


	if __name__ == "__main__":
	folder = ""
	source_file = r"chiron.json"
	# create_xlsx(os.path.join(folder, "chiron.json"))
	json_from_xlsx(os.path.join(folder, "chiron_flatten.xlsx"))