Skip to content

Instantly share code, notes, and snippets.

@dopuskh3
Created October 27, 2009 08:46
Show Gist options
  • Save dopuskh3/219419 to your computer and use it in GitHub Desktop.
Save dopuskh3/219419 to your computer and use it in GitHub Desktop.
import sys
from lxml import etree
CARD_MULT=1
CARD_SINGLE=0
class xmlStor:
properties = {}
def __init__(self):
self.tree = None
for i in self.properties.keys():
if not self.properties[i].has_key('card'):
self.properties[i]['card'] = CARD_SINGLE
@property
def blob(self):
if self.tree is not None:
return etree.tostring(self.tree)
return None
# @blob.setter
def setBlob(self, blob):
self.tree = etree.fromstring(blob)
def _checkproperty(self, name):
""" Check if a valid property with the given name exists """
if self.properties.has_key(name):
if self.properties[name].has_key('xpath'):
return True
return False
def __getattr__(self, name):
if name == "blob":
return self.blob()
elif name == "tree":
return self.__dict__["tree"]
if self._checkproperty(name):
try:
# execute xpath
nodes = self.tree.xpath(self.properties[name]['xpath'])
# check for results
if nodes and len(nodes) > 0:
# if this property is an attribute
if self.properties[name].has_key('attribute'):
# check cardinality
if self.properties[name]['card'] == CARD_MULT:
return [ n.get(self.properties[name]['attribute'], u"") for n in nodes ]
else:
return nodes[0].get(self.properties[name]['attribute'], u"")
else:
if self.properties[name]['card'] == CARD_MULT:
return [ n.text for n in nodes ]
else:
return nodes[0].text
# no results
else:
return None
# xpath failed
except Exception, e:
return None
# this property does not exists
else:
return None
def __setattr__(self, name, value):
if name == "blob":
self.setBlob(value)
return
if name == "tree":
self.__dict__["tree"] = value
return
if self._checkproperty(name):
try:
# execute xpath
nodes = self.tree.xpath(self.properties[name]['xpath'])
# check for results
if nodes and len(nodes) > 0:
# check cardinality
if type(value).__name__ == "list" and self.properties[name]['card'] == CARD_MULT:
l = min(len(name), len(nodes))
for nodeNum in range(0, l-1):
# for an attribute
if self.properties[name].has_key('attribute'):
nodes[nodeNum].set(self.properties[name]['attribute'], value[nodeNum])
else:
nodes[nodeNum].text = value[nodeNum]
else:
if self.properties[name].has_key("attribute"):
nodes[0].set(self.properties[name]['attribute'], value)
else:
nodes[0].text = value
# no results
else:
return None
if type(value).__name__ == "list":
if len(value) > len(nodes):
# take the parent node of the last node
parentNode = nodes[len(nodes)-1].getparent()
for i in range(len(nodes)-1, len(value)-1):
n = etree.SubElement(parentNode, nodes[len(nodes)-1].tag)
if self.properties[name].has_key('attribute'):
n.set(self.properties[name]['attribute'], value[i])
else:
n.text = value[i]
# xpath failed
except Exception, e:
return None
# this property does not exists
else:
return None
class testBlob(xmlStor):
properties = {"update": { "xpath": "//span[@class='update']" },
"uuid": { "xpath": "//div[@class='product']", "attribute": "id"},
"status": { "xpath": "//span[@class='status']" },
"links": { "xpath": "//a", "card": CARD_MULT, "attribute": "href" },
"uriLink": { "xpath": "//a[@class='uri']" } }
if __name__ == "__main__":
t = etree.parse(sys.argv[1])
print "Parsed"
blobs = []
for b in t.xpath("//div[@class='product']"):
print "---------------"
blob = testBlob()
blob.blob = etree.tostring(b)
blob.uuid = "urn:uuid:"+blob.uriLink
blob.links.append("http://foobar")
links = blob.links
links.append("http://foobar")
blob.links = links
blobs.append(blob)
for b in blobs:
print "--"
print """
Update: %s
uuid: %s
uriLink: %s
status: %s
links: %s"""%(b.update, b.uuid, b.uriLink, b.status, str(b.links))
f = open("out.html", "w")
for i in blobs:
f.write(i.blob)
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment