Created
May 2, 2010 17:29
-
-
Save yuya-takeyama/387300 to your computer and use it in GitHub Desktop.
Quoted from "Python Cookbook, 2nd Edition" (O'Reilly Japan). I modified it a little.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
parser = Xml2Obj() | |
root = self.parser.parse("""<?xml version="1.0"?><parent id="top"><child1 name="paul">Text goes here</child1><child2 name="fred">More text</child2></parent>""") | |
# These have the same meaning. | |
print root.getElements('item1')[0].getData() | |
# => Text goes here | |
print root.item1[0].getData() | |
# => Text goes here |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Much of this program is quoted from "Python Cookbook, 2nd Edition" p.479 (O'Reilly Japan). | |
Only the method Element.__getattr__ is written by me. | |
And changed to encode strings to UTF-8. | |
""" | |
from xml.parsers import expat | |
class Element(object): | |
def __init__(self, name, attributes): | |
self.name = name | |
self.attributes = attributes | |
self.cdata = '' | |
self.children = [] | |
def __getattr__(self, key): | |
return self.getElements(key) | |
def addChild(self, element): | |
self.children.append(element) | |
def getAttribute(self, key): | |
return self.attributes.get(key) | |
def getData(self): | |
return self.cdata | |
def getElements(self, name=''): | |
if name: | |
return [child for child in self.children if child.name == name] | |
class Xml2Obj(object): | |
def __init__(self): | |
self.root = None | |
self.nodeStack = [] | |
def StartElement(self, name, attributes): | |
attributes = dict([(key, attributes[key].encode("utf-8")) for key in attributes]) | |
element = Element(name.encode("utf-8"), attributes) | |
if self.nodeStack: | |
parent = self.nodeStack[-1] | |
parent.addChild(element) | |
else: | |
self.root = element | |
self.nodeStack.append(element) | |
def EndElement(self, name): | |
# self.nodeStack[-1].pop() | |
# I think following is exact. | |
self.nodeStack.pop() | |
def CharacterData(self, data): | |
if data.strip(): | |
data = data.encode("utf-8") | |
element = self.nodeStack[-1] | |
element.cdata += data | |
def parse(self, string): | |
Parser = expat.ParserCreate() | |
Parser.StartElementHandler = self.StartElement | |
Parser.EndElementHandler = self.EndElement | |
Parser.CharacterDataHandler = self.CharacterData | |
ParserStatus = Parser.Parse(string, True) | |
return self.root |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import unittest | |
from xml2obj import Element, Xml2Obj | |
class Xml2ObjTestCase(unittest.TestCase): | |
def setUp(self): | |
self.parser = Xml2Obj() | |
self.root = self.parser.parse("""<?xml version="1.0"?><parent id="top"><child1 name="paul">Text goes here</child1><child2 name="fred">More text</child2></parent>""") | |
self.rootJp = self.parser.parse("""<?xml version="1.0"?><parent id="最上位"><child1 name="イチロー">日本語の文字列</child1><child2 name="ジロー">もうひとつ</child2></parent>""") | |
def testRootIsElement(self): | |
self.assert_(isinstance(self.root, Element)) | |
def testRootIsElementJp(self): | |
self.assert_(isinstance(self.rootJp, Element)) | |
def testChildIsList(self): | |
self.assert_(isinstance(self.root.getElements('child1'), list)) | |
self.assert_(isinstance(self.root.child1, list)) | |
def testChildIsListJp(self): | |
self.assert_(isinstance(self.rootJp.getElements('child1'), list)) | |
self.assert_(isinstance(self.rootJp.child1, list)) | |
def testGetDataIsExact(self): | |
self.assertEqual(self.root.getElements('child1')[0].getData(), "Text goes here") | |
self.assertEqual(self.root.getElements('child2')[0].getData(), "More text") | |
def testGetDataIsExactJp(self): | |
self.assertEqual(self.rootJp.getElements('child1')[0].getData(), "日本語の文字列") | |
self.assertEqual(self.rootJp.getElements('child2')[0].getData(), "もうひとつ") | |
def testGetDataByGetattrIsExact(self): | |
self.assertEqual(self.root.child1[0].getData(), "Text goes here") | |
self.assertEqual(self.root.child2[0].getData(), "More text") | |
def testGetDataByGetattrIsExactJp(self): | |
self.assertEqual(self.rootJp.child1[0].getData(), "日本語の文字列") | |
self.assertEqual(self.rootJp.child2[0].getData(), "もうひとつ") | |
def testGetAttributeIsExact(self): | |
self.assertEqual(self.root.getAttribute('id'), "top") | |
self.assertEqual(self.root.child1[0].getAttribute('name'), "paul") | |
self.assertEqual(self.root.child2[0].getAttribute('name'), "fred") | |
def testGetAttributeIsExactJp(self): | |
self.assertEqual(self.rootJp.getAttribute('id'), "最上位") | |
self.assertEqual(self.rootJp.child1[0].getAttribute('name'), "イチロー") | |
self.assertEqual(self.rootJp.child2[0].getAttribute('name'), "ジロー") | |
def testAddChildIsExact(self): | |
newChild = self.parser.parse("""<?xml version="1.0"?><child3 name="john">I am a new one</child3>""") | |
self.root.addChild(newChild) | |
self.assertEqual(self.root.getElements('child3')[0].getData(), "I am a new one") | |
self.assertEqual(self.root.child3[0].getData(), "I am a new one") | |
self.assertEqual(self.root.child3[0].getAttribute('name'), "john") | |
def testAddChildIsExactJp(self): | |
newChild = self.parser.parse("""<?xml version="1.0"?><child3 name="サブロー">新要素</child3>""") | |
self.root.addChild(newChild) | |
self.assertEqual(self.root.getElements('child3')[0].getData(), "新要素") | |
self.assertEqual(self.root.child3[0].getData(), "新要素") | |
self.assertEqual(self.root.child3[0].getAttribute('name'), "サブロー") | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment