Skip to content

Instantly share code, notes, and snippets.

@riconroy
Last active March 21, 2016 16:13
Show Gist options
  • Save riconroy/bafc6c64e74a4fda0945 to your computer and use it in GitHub Desktop.
Save riconroy/bafc6c64e74a4fda0945 to your computer and use it in GitHub Desktop.
This recursive Python routine parses an XML file to a JSON file, with the added requirement that the XML can have multiple equal tags that become a list (array). See example in listing below.
# example XML:
# <maximum-temperatures>
# <temperature year="2012" units="C">37</temperature>
# <temperature year="2013" units="C">38</temperature>
# </maximum-temperatures>
#
# would ouput:
#
# "maximum-temperatures": {
# "temperature": [
# {
# "year": "2012",
# "units": "C",
# "value": "37"
# },
# {
# "year": "2013",
# "units": "C",
# "value": "38"
# }
# ]
# }
def createInnerDictionary(element):
# example: <myTag name="foo" zone="bar">myText</myTag>
# child.attrib returns a dictionary of key/values that "describe the element": {"name": "foo", "zone": "bar"}
# child.tag is the name of the element: "myTag"
# child.text is the value of the element: "myText"
# start with the attributes of the main element (could be empty, = {})
out_dict = element.attrib
# if one exists, add the value of the main element here
if element.text and (len(element.text.strip()) > 0):
out_dict['value'] = element.text.strip()
# for this element, what are the children (and attributes)
for child in element:
addToArray = False
# we need to know if the child is already in the dictionary
if child.tag in out_dict:
addToArray = True
# we need to ensure the dictionary element is a list
if not isinstance(out_dict[child.tag], list):
# we need to make it an array
theElement = out_dict[child.tag]
out_dict.pop(child.tag, None)
out_dict[child.tag] = [theElement]
# check to see if the child has children:
if (len(child) == 0):
# the child has no children
if (len(child.attrib) > 0):
elementToAdd = child.attrib
if child.text and (len(child.text.strip()) > 0):
elementToAdd['value'] = child.text.strip()
else:
elementToAdd = child.text
else:
# the child does have children
elementToAdd = createInnerDictionary(child)
# add to either list or dictionary
if addToArray:
theList = out_dict[child.tag]
out_dict.pop(child.tag, None)
theList.append(elementToAdd)
out_dict[child.tag] = theList
else:
out_dict[child.tag] = elementToAdd
return out_dict
# our 'main'
import json
try:
import xml.etree.cElementTree as etree
except ImportError:
import xml.etree.ElementTree as etree
# here we assume input file is local
tree = etree.parse('myInputFile.xml')
root = tree.getroot()
# create a dictionary from the XML file
my_dict = createInnerDictionary(root)
print json.dumps(my_dict, sort_keys=True, indent=4)
# done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment