Skip to content

Instantly share code, notes, and snippets.

@ZhanruiLiang
Created January 20, 2014 17:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ZhanruiLiang/8524482 to your computer and use it in GitHub Desktop.
Save ZhanruiLiang/8524482 to your computer and use it in GitHub Desktop.
Convert XML to human readable format with proper visual indent and without closing tags or pointy brackets.
<?xml version="1.0" ?>
<project>
<tag1 a1="v1" a2="v2" a3="v3">
<ctag1>
hello
</ctag1>
<ctag2>
world
</ctag2>
</tag1>
<tag2>
bravo!
</tag2>
</project>
project:
tag1[a1 = v1, a2 = v2, a3 = v3]:
ctag1: hello
ctag2: world
tag2: bravo!
#!/usr/bin/env python
import lxml.etree as E
INLINE_ATTRS = True
def strip_tag(tag):
left = tag.find('{')
if left < 0:
return tag
right = tag.find('}', left)
return tag[right + 1:]
def convert_node(node, indents):
"""
Visualize a node recursively. Attributes will be converted to tags
starting with '@'.
For example: <point x="2" y="3"> Good point </point> will be converted to
*point:
@x: 2
@y: 3
Good point
"""
indent = ' ' * indents
contents = []
if node.text:
for line in node.text.split('\n'):
line = line.strip()
if line:
contents.append(line)
# If the whole node is short enough, we display it inline.
if not node.attrib and len(node) == 0 and len(contents) <= 1:
line = contents[0] if contents else ''
yield '{indent}{tag}: {line}'.format(
indent=indent, tag=strip_tag(node.tag), line=line)
return
if INLINE_ATTRS:
# Diplay tag name and attributes on first line
attrs = ', '.join(sorted('{} = {}'.format(attr, value.replace('\n', ' '))
for attr, value in node.attrib.items()))
template = '{indent}{tag}[{attrs}]:' if attrs else '{indent}{tag}:'
yield template.format(indent=indent, tag=strip_tag(node.tag), attrs=attrs)
else:
# Display tag name on a line and attributes on other lines.
yield '{indent}{tag}:'.format(indent=indent, tag=strip_tag(node.tag))
for attr, value in node.attrib.items():
value = ' '.join(map(str.strip, value.split('\n')))
yield '{indent} @{attr}: {value}'.format(**locals())
# Display children
for child in node:
yield from convert_node(child, indents + 1)
# Display node text
for line in contents:
yield '{indent} {line}'.format(indent=indent, line=line)
def convert(input):
infile = open(input, 'r') if isinstance(input, str) else input
root = E.fromstring(infile.read().encode('utf-8'))
if infile is not input:
infile.close()
return '\n'.join(convert_node(root, 0))
if __name__ == '__main__':
import sys
infile = open(sys.argv[1], 'r')
outfile = open(sys.argv[2], 'w') if len(sys.argv) >= 3 else sys.stdout
outfile.write(convert(infile))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment