Skip to content

Instantly share code, notes, and snippets.

@yuya-takeyama
Created May 2, 2010 17:29
Show Gist options
  • Save yuya-takeyama/387300 to your computer and use it in GitHub Desktop.
Save yuya-takeyama/387300 to your computer and use it in GitHub Desktop.
Quoted from "Python Cookbook, 2nd Edition" (O'Reilly Japan). I modified it a little.
parser = Xml2Obj()
root = self.parser.parse("""<?xml version="1.0"?><parent id="top"><child1 name="paul">Text goes here</child1><child2 name="fred">More text</child2></parent>""")
# These have the same meaning.
print root.getElements('item1')[0].getData()
# => Text goes here
print root.item1[0].getData()
# => Text goes here
# -*- coding: utf-8 -*-
"""
Much of this program is quoted from "Python Cookbook, 2nd Edition" p.479 (O'Reilly Japan).
Only the method Element.__getattr__ is written by me.
And changed to encode strings to UTF-8.
"""
from xml.parsers import expat
class Element(object):
def __init__(self, name, attributes):
self.name = name
self.attributes = attributes
self.cdata = ''
self.children = []
def __getattr__(self, key):
return self.getElements(key)
def addChild(self, element):
self.children.append(element)
def getAttribute(self, key):
return self.attributes.get(key)
def getData(self):
return self.cdata
def getElements(self, name=''):
if name:
return [child for child in self.children if child.name == name]
class Xml2Obj(object):
def __init__(self):
self.root = None
self.nodeStack = []
def StartElement(self, name, attributes):
attributes = dict([(key, attributes[key].encode("utf-8")) for key in attributes])
element = Element(name.encode("utf-8"), attributes)
if self.nodeStack:
parent = self.nodeStack[-1]
parent.addChild(element)
else:
self.root = element
self.nodeStack.append(element)
def EndElement(self, name):
# self.nodeStack[-1].pop()
# I think following is exact.
self.nodeStack.pop()
def CharacterData(self, data):
if data.strip():
data = data.encode("utf-8")
element = self.nodeStack[-1]
element.cdata += data
def parse(self, string):
Parser = expat.ParserCreate()
Parser.StartElementHandler = self.StartElement
Parser.EndElementHandler = self.EndElement
Parser.CharacterDataHandler = self.CharacterData
ParserStatus = Parser.Parse(string, True)
return self.root
# -*- coding: utf-8 -*-
import unittest
from xml2obj import Element, Xml2Obj
class Xml2ObjTestCase(unittest.TestCase):
def setUp(self):
self.parser = Xml2Obj()
self.root = self.parser.parse("""<?xml version="1.0"?><parent id="top"><child1 name="paul">Text goes here</child1><child2 name="fred">More text</child2></parent>""")
self.rootJp = self.parser.parse("""<?xml version="1.0"?><parent id="最上位"><child1 name="イチロー">日本語の文字列</child1><child2 name="ジロー">もうひとつ</child2></parent>""")
def testRootIsElement(self):
self.assert_(isinstance(self.root, Element))
def testRootIsElementJp(self):
self.assert_(isinstance(self.rootJp, Element))
def testChildIsList(self):
self.assert_(isinstance(self.root.getElements('child1'), list))
self.assert_(isinstance(self.root.child1, list))
def testChildIsListJp(self):
self.assert_(isinstance(self.rootJp.getElements('child1'), list))
self.assert_(isinstance(self.rootJp.child1, list))
def testGetDataIsExact(self):
self.assertEqual(self.root.getElements('child1')[0].getData(), "Text goes here")
self.assertEqual(self.root.getElements('child2')[0].getData(), "More text")
def testGetDataIsExactJp(self):
self.assertEqual(self.rootJp.getElements('child1')[0].getData(), "日本語の文字列")
self.assertEqual(self.rootJp.getElements('child2')[0].getData(), "もうひとつ")
def testGetDataByGetattrIsExact(self):
self.assertEqual(self.root.child1[0].getData(), "Text goes here")
self.assertEqual(self.root.child2[0].getData(), "More text")
def testGetDataByGetattrIsExactJp(self):
self.assertEqual(self.rootJp.child1[0].getData(), "日本語の文字列")
self.assertEqual(self.rootJp.child2[0].getData(), "もうひとつ")
def testGetAttributeIsExact(self):
self.assertEqual(self.root.getAttribute('id'), "top")
self.assertEqual(self.root.child1[0].getAttribute('name'), "paul")
self.assertEqual(self.root.child2[0].getAttribute('name'), "fred")
def testGetAttributeIsExactJp(self):
self.assertEqual(self.rootJp.getAttribute('id'), "最上位")
self.assertEqual(self.rootJp.child1[0].getAttribute('name'), "イチロー")
self.assertEqual(self.rootJp.child2[0].getAttribute('name'), "ジロー")
def testAddChildIsExact(self):
newChild = self.parser.parse("""<?xml version="1.0"?><child3 name="john">I am a new one</child3>""")
self.root.addChild(newChild)
self.assertEqual(self.root.getElements('child3')[0].getData(), "I am a new one")
self.assertEqual(self.root.child3[0].getData(), "I am a new one")
self.assertEqual(self.root.child3[0].getAttribute('name'), "john")
def testAddChildIsExactJp(self):
newChild = self.parser.parse("""<?xml version="1.0"?><child3 name="サブロー">新要素</child3>""")
self.root.addChild(newChild)
self.assertEqual(self.root.getElements('child3')[0].getData(), "新要素")
self.assertEqual(self.root.child3[0].getData(), "新要素")
self.assertEqual(self.root.child3[0].getAttribute('name'), "サブロー")
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment