Skip to content

Instantly share code, notes, and snippets.

Last active Jan 9, 2020
What would you like to do?
Python script to pretty print XML files
import os
import re
import HTMLParser as parser
import xml.dom.minidom as minidom
import sys
# Read de file name from standard input
filename = sys.argv[1]
if os.path.isfile(filename) and os.access(filename, os.R_OK):
# Open the file in read only mode
file = open(filename, 'r')
# Read the file and decode html entities
xml = parser.HTMLParser().unescape(
# Pretify the xml
xml = minidom.parseString(xml).toprettyxml()
# Handle issue with CDATA section due minidom add extraspace
# before/after CDATA
xml = re.sub('>\s+<!', '><!', xml)
xml = re.sub(']>\s+<', ']><', xml)
# Remove empty lines
# Thanks to
print "".join([s for s in xml.strip().splitlines(True) if s.strip()])
print "File is missing or is not readable!"
except IndexError:
print "You must specify a file name!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment