Created
February 9, 2017 05:47
-
-
Save mgd020/14e7b5ce65f0c67606111cda1959781f to your computer and use it in GitHub Desktop.
Flatten an element tree node into an xpath ordered dict.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import absolute_import, division, print_function, unicode_literals | |
from collections import OrderedDict | |
def flatten_xml(node): | |
""" | |
Produce an ordered dictionary of elements enountered. | |
Keys are valid xpath selectors. | |
""" | |
output = OrderedDict() | |
children = {} # by tag | |
if node: # has children | |
for child in node: | |
children.setdefault(child.tag, []).append(flatten_xml(child)) | |
elif node.text: | |
output[''] = node.text | |
for attrib, value in node.attrib.iteritems(): | |
output['@{}'.format(attrib)] = value | |
for tag, child_xpaths in children.iteritems(): | |
path = tag if len(child_xpaths) == 1 else tag + '[{}]' | |
for i, child_xpath in enumerate(child_xpaths, 1): | |
child_path = path.format(i) | |
for key, value in child_xpath.iteritems(): | |
output['{}/{}'.format(child_path, key) if key else child_path] = value | |
return output |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment