sudomakecoffee/parse-xml-to-json.py

## parse-xml-to-json.py
#!/usr/bin/env python
# General process
# 1. Parse any command-line arguments
# 2. Parse XML into memory
# 3. Separate items into lists based on type
#   a. Add item type to each item record
# 4. Merge lists and sort
# 5. Write uber-list to JSON file
from typing import Final
import xml.etree.ElementTree as et
import argparse
import json

INPUT_FILE_NOT_SPECIFIED: Final = "input_file_not_specified"
INPUT_FILE_HELP_TEXT: Final = "path to XML input file"
INPUT_FILE_DEFAULT: Final = "items.xml"

OUTPUT_FILE_NOT_SPECIFIED: Final = "output_file_not_specified"
OUTPUT_FILE_HELP_TEXT: Final = "path to JSON output file"
OUTPUT_FILE_DEFAULT: Final = "items.json"

def parse_arg(parsed_arg: any, value_not_specified: str, default_value: any):
    """Determines if the parsed arg has a value, and provides a default if it does not"""
    if parsed_arg is None or parsed_arg == value_not_specified:
        return default_value
    return parsed_arg
    pass

def parse_argument_list():
    """Constructs an argument parser and parses any command line values that were provided"""
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--inputfile", nargs="?", type=str, const=INPUT_FILE_NOT_SPECIFIED, help=INPUT_FILE_HELP_TEXT)
    parser.add_argument("-o", "--outputfile", nargs="?", type=str, const=OUTPUT_FILE_NOT_SPECIFIED, help=OUTPUT_FILE_HELP_TEXT)

    parsed_args = parser.parse_args()
    return parsed_args

def project_attribute(items: dict, new_attrib: str, new_value: any) -> list:
    """
    Adds the specified new attribute with the specified new value to the given dictionary,
    the returns the modified records as a list
    """
    mutated_items = []
    for item in items:
        item.attrib[new_attrib] = new_value
        mutated_items.append(item.attrib)
    return mutated_items

def sort_key(x: dict) -> int:
    """Specifies order key for dict.sort call"""
    return int(x["id"])

if __name__ == '__main__':
    # Parse command line args and/or get defaults
    parsed_args = parse_argument_list()
    input_file = parse_arg(parsed_args.inputfile, INPUT_FILE_NOT_SPECIFIED, INPUT_FILE_DEFAULT)
    output_file = parse_arg(parsed_args.outputfile, OUTPUT_FILE_NOT_SPECIFIED, OUTPUT_FILE_DEFAULT)

    # Parse input file into memory, and get the root node
    parser = et.parse(input_file)
    items = parser.getroot()

    # Separate each item type into its own list, and add the itemtype attribute to each record
    actives = project_attribute(items.findall("active"), "itemtype", "active")
    familiars = project_attribute(items.findall("familiar"), "itemtype", "familiar")
    passives = project_attribute(items.findall("passive"), "itemtype", "passive")

    # Append the lists together, then sort by item ID
    all_items = (actives + familiars + passives)
    all_items.sort(key = sort_key)

    # Write complete list to JSON file, pretty-printed
    with open(output_file, "w", encoding="utf-8") as file:
        text_data = json.dumps(all_items, indent = 2)
        file.write(text_data)
#end __main__

## test-items.xml
<items>
	<passive cache="damage" description="DMG up" gfx="Collectibles_007_BloodOfTheMartyr.png" id="7" name="Blood of the Martyr" />
	<familiar description="Friends 'till the end" gfx="Collectibles_008_BrotherBobby.png" id="8" name="Brother Bobby" tags="baby" />
	<passive description="Fly love" gfx="Collectibles_009_Skatole.png" id="9" name="Skatole" tags="fly poop" />
	<active cache="flying" description="Temporary flight" gfx="Collectibles_033_TheBible.png" id="33" maxcharges="4" name="The Bible" tags="angel book" />
	<active description="Reusable bomb buddy" gfx="Collectibles_037_MrBoom.png" id="37" maxcharges="2" name="Mr. Boom" />
</items>
	#!/usr/bin/env python
	# General process
	# 1. Parse any command-line arguments
	# 2. Parse XML into memory
	# 3. Separate items into lists based on type
	# a. Add item type to each item record
	# 4. Merge lists and sort
	# 5. Write uber-list to JSON file
	from typing import Final
	import xml.etree.ElementTree as et
	import argparse
	import json

	INPUT_FILE_NOT_SPECIFIED: Final = "input_file_not_specified"
	INPUT_FILE_HELP_TEXT: Final = "path to XML input file"
	INPUT_FILE_DEFAULT: Final = "items.xml"

	OUTPUT_FILE_NOT_SPECIFIED: Final = "output_file_not_specified"
	OUTPUT_FILE_HELP_TEXT: Final = "path to JSON output file"
	OUTPUT_FILE_DEFAULT: Final = "items.json"

	def parse_arg(parsed_arg: any, value_not_specified: str, default_value: any):
	"""Determines if the parsed arg has a value, and provides a default if it does not"""
	if parsed_arg is None or parsed_arg == value_not_specified:
	return default_value
	return parsed_arg
	pass

	def parse_argument_list():
	"""Constructs an argument parser and parses any command line values that were provided"""
	parser = argparse.ArgumentParser()
	parser.add_argument("-i", "--inputfile", nargs="?", type=str, const=INPUT_FILE_NOT_SPECIFIED, help=INPUT_FILE_HELP_TEXT)
	parser.add_argument("-o", "--outputfile", nargs="?", type=str, const=OUTPUT_FILE_NOT_SPECIFIED, help=OUTPUT_FILE_HELP_TEXT)

	parsed_args = parser.parse_args()
	return parsed_args

	def project_attribute(items: dict, new_attrib: str, new_value: any) -> list:
	"""
	Adds the specified new attribute with the specified new value to the given dictionary,
	the returns the modified records as a list
	"""
	mutated_items = []
	for item in items:
	item.attrib[new_attrib] = new_value
	mutated_items.append(item.attrib)
	return mutated_items

	def sort_key(x: dict) -> int:
	"""Specifies order key for dict.sort call"""
	return int(x["id"])

	if __name__ == '__main__':
	# Parse command line args and/or get defaults
	parsed_args = parse_argument_list()
	input_file = parse_arg(parsed_args.inputfile, INPUT_FILE_NOT_SPECIFIED, INPUT_FILE_DEFAULT)
	output_file = parse_arg(parsed_args.outputfile, OUTPUT_FILE_NOT_SPECIFIED, OUTPUT_FILE_DEFAULT)

	# Parse input file into memory, and get the root node
	parser = et.parse(input_file)
	items = parser.getroot()

	# Separate each item type into its own list, and add the itemtype attribute to each record
	actives = project_attribute(items.findall("active"), "itemtype", "active")
	familiars = project_attribute(items.findall("familiar"), "itemtype", "familiar")
	passives = project_attribute(items.findall("passive"), "itemtype", "passive")

	# Append the lists together, then sort by item ID
	all_items = (actives + familiars + passives)
	all_items.sort(key = sort_key)

	# Write complete list to JSON file, pretty-printed
	with open(output_file, "w", encoding="utf-8") as file:
	text_data = json.dumps(all_items, indent = 2)
	file.write(text_data)
	#end __main__
	<items>
	<passive cache="damage" description="DMG up" gfx="Collectibles_007_BloodOfTheMartyr.png" id="7" name="Blood of the Martyr" />
	<familiar description="Friends 'till the end" gfx="Collectibles_008_BrotherBobby.png" id="8" name="Brother Bobby" tags="baby" />
	<passive description="Fly love" gfx="Collectibles_009_Skatole.png" id="9" name="Skatole" tags="fly poop" />
	<active cache="flying" description="Temporary flight" gfx="Collectibles_033_TheBible.png" id="33" maxcharges="4" name="The Bible" tags="angel book" />
	<active description="Reusable bomb buddy" gfx="Collectibles_037_MrBoom.png" id="37" maxcharges="2" name="Mr. Boom" />
	</items>