Skip to content

Instantly share code, notes, and snippets.

@mgd020
Last active June 8, 2017 23:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mgd020/0c026bc51a37be3268dd9fcc6589f668 to your computer and use it in GitHub Desktop.
Save mgd020/0c026bc51a37be3268dd9fcc6589f668 to your computer and use it in GitHub Desktop.
Convert between XML and JSON
from __future__ import absolute_import, division, print_function, unicode_literals
from xml import sax
from xml.sax import saxutils
class XMLParser(sax.handler.ContentHandler):
def __init__(self, cdata_key):
self.cdata_key = cdata_key
self.root = {}
self.elem = [self.root]
def startElement(self, name, attrs): # NOQA
elem = dict(attrs.items())
self.elem[-1].setdefault(name, []).append(elem)
self.elem.append(elem)
def endElement(self, name): # NOQA
elem = self.elem[-1]
if self.cdata_key and self.cdata_key in elem:
if any(key != self.cdata_key and isinstance(value, list) for key, value in elem.iteritems()):
elem.pop(self.cdata_key)
else:
elem[self.cdata_key] = ''.join(elem[self.cdata_key])
self.elem.pop()
def characters(self, content):
if self.cdata_key:
self.elem[-1].setdefault(self.cdata_key, []).append(content)
def xml_to_json(stream, cdata_key):
"""
Read XML into json compatible data.
stream: the input stream.
cdata_key: the key for storing CDATA. If None no CDATA is stored.
Returns data dict.
"""
handler = XMLParser(cdata_key)
sax.parse(stream, handler)
return handler.root
class XMLGenerator(object):
"""Adds indent and emptyElement to xml.sax.saxutils.XMLGenerator."""
def __init__(self, stream, encoding=None, indent=None):
args = [stream]
if encoding is not None:
args.append(encoding)
self.stream = stream
self.generator = saxutils.XMLGenerator(*args)
self.indent_level = 0
self.indent_width = indent
def __getattr__(self, name):
return getattr(self.generator, name)
def endDocument(self): # NOQA
self.generator.endDocument()
self.stream.write('\n')
def startElement(self, name, attrs): # NOQA
if self.indent_level:
self.newline()
self.generator.startElement(name, attrs)
self.indent_level += self.indent_width
self.nested_elements = False
def emptyElement(self, name, attrs): # NOQA
self.newline()
attr_str = ''.join(' {}={}'.format(key, saxutils.quoteattr(value)) for key, value in attrs.iteritems())
self.stream.write('<{}{}{}/>'.format(name, attr_str, ' ' if self.indent_width else ''))
self.nested_elements = True
def endElement(self, name): # NOQA
self.indent_level -= self.indent_width
if self.nested_elements:
self.newline()
else:
self.nested_elements = True
self.generator.endElement(name)
def characters(self, content):
self.generator.characters(content)
self.nested_elements = False
def newline(self):
if not self.indent_width:
return
self.stream.write('\n')
for i in xrange(self.indent_level):
self.stream.write(' ')
def json_to_xml(data, stream, cdata_key, encoding=None, indent=None, empty=True):
"""
Write XML to stream based on json compatible data.
data: a dict with the same format that xml_to_json produces.
stream: the output stream to write to.
cdata_key: the key used to store CDATA.
encoding: XML encoding
indent: the number of spaces to indent. None or 0 means no indent or newlines.
empty: enable empty elements in the XML
Returns None.
"""
if not data:
return
not_empty = not empty
root_name, root_values = data.items()[0]
elements = [(root_name, root_values[0])]
generator = XMLGenerator(stream, encoding, indent)
generator.startDocument()
while elements:
element = elements.pop()
if isinstance(element, tuple):
name = element[0]
attrs = {}
cdata = None
children = []
for key, value in element[1].iteritems():
if key == cdata_key:
cdata = value
elif isinstance(value, list):
for elem in reversed(value):
children.append((key, elem))
else:
attrs[key] = value
if not_empty or cdata or children:
generator.startElement(name, attrs)
elements.append(name)
if children:
elements.extend(children)
elif cdata:
generator.characters(cdata)
else:
generator.emptyElement(name, attrs)
else:
generator.endElement(element)
generator.endDocument()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment