Skip to content

Instantly share code, notes, and snippets.

@mahmoud
Forked from anonymous/gist:3079509
Created July 10, 2012 07:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mahmoud/3081857 to your computer and use it in GitHub Desktop.
Save mahmoud/3081857 to your computer and use it in GitHub Desktop.
Pretty printing XML in Python
from StringIO import StringIO
import xml.etree.cElementTree as ET
def pretty_xml(xml_str, indent=" "):
"""
A very simple, hopefully not simplistic, XML pretty printer.
Concept courtesy Mark Williams.
"""
if not hasattr(xml_str, "read"): # ElementTree uses file-like objects
fn = StringIO(xml_str) # cStringIO doesn't support UTF-8
else:
fn = xml_str
cursor = 0
out_list = []
for event, elem in ET.iterparse(fn, events=('start', 'end')):
if event == 'start':
attrs = ' '.join([k+'="'+v+'"' for k, v in elem.items()])
cur_tag = ('<{tag} {attrs}>'.format(tag=elem.tag, attrs=attrs)
if attrs else '<{tag}>'.format(tag=elem.tag))
if elem.text is None:
had_txt = False
txt = '\n'
else:
had_txt = True
txt = elem.text
out_list.extend([indent*cursor, cur_tag, txt])
cursor += 1
else:
cursor -= 1
cur_ind = cursor*indent if not had_txt else ''
out_list.extend([cur_ind, '</{0}>'.format(elem.tag), '\n'])
had_txt = False
return ''.join(out_list)
"""
The function above is about twice as fast as the following, with comparable output
formats. The one below might be slightly better, as far as looks and accuracy are concerned.
"""
import re
from xml.dom.minidom import parseString
_xml_re = re.compile('>\n\s+([^<>\s].*?)\n\s+</', re.DOTALL)
def pretty_xml_old(xml_str, indent=" "):
xml_re = _xml_re
# avoid re-prettifying large amounts of xml that is fine
if xml_str.count("\n") < 20:
pxml = parseString(xml_str).toprettyxml(indent)
return xml_re.sub('>\g<1></', pxml)
else:
return xml_str
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment