redapple/parslepy_xml.py

## parslepy_xml.py
import lxml.etree
import parslepy
import urllib2
import pprint

xml_parser = lxml.etree.XMLParser()
url = 'https://itunes.apple.com/us/rss/topalbums/limit=10/explicit=true/xml'
req = urllib2.Request(url)
root = lxml.etree.parse(urllib2.urlopen(req), parser=xml_parser).getroot()

xsh = parslepy.XPathSelectorHandler(
    namespaces={
        'atom': 'http://www.w3.org/2005/Atom',
        'im': 'http://itunes.apple.com/rss'
    })
rules = {
    "entries(//atom:feed/atom:entry)": [
        {
            "title": "atom:title",
            "name": "im:name",
            "id": "atom:id/@im:id",
            "artist(im:artist)": {
                "name": ".",
                "href": "@href"
            },
            "images(im:image)": [{
                "height": "@height",
                "url": "."
            }],
            "releasedate": "im:releaseDate"
        }
    ]
}
parselet = parslepy.Parselet(rules, selector_handler=xsh)
pprint.pprint(parselet.extract(root))
	import lxml.etree
	import parslepy
	import urllib2
	import pprint

	xml_parser = lxml.etree.XMLParser()
	url = 'https://itunes.apple.com/us/rss/topalbums/limit=10/explicit=true/xml'
	req = urllib2.Request(url)
	root = lxml.etree.parse(urllib2.urlopen(req), parser=xml_parser).getroot()

	xsh = parslepy.XPathSelectorHandler(
	namespaces={
	'atom': 'http://www.w3.org/2005/Atom',
	'im': 'http://itunes.apple.com/rss'
	})
	rules = {
	"entries(//atom:feed/atom:entry)": [
	{
	"title": "atom:title",
	"name": "im:name",
	"id": "atom:id/@im:id",
	"artist(im:artist)": {
	"name": ".",
	"href": "@href"
	},
	"images(im:image)": [{
	"height": "@height",
	"url": "."
	}],
	"releasedate": "im:releaseDate"
	}
	]
	}
	parselet = parslepy.Parselet(rules, selector_handler=xsh)
	pprint.pprint(parselet.extract(root))