Skip to content

Instantly share code, notes, and snippets.

@pmolchanov
Created January 19, 2016 07:42
Show Gist options
  • Save pmolchanov/6f1289a3ca6821b5c967 to your computer and use it in GitHub Desktop.
Save pmolchanov/6f1289a3ca6821b5c967 to your computer and use it in GitHub Desktop.
Parse Craigslist RSS Feed
from lxml import etree
def parseCraigslistRssFeed(url):
listings = []
xml = etree.parse(url)
channel = xml.getroot().find('{*}channel')
items = xml.getroot().findall('{*}item')
for item in items:
title = item.find('{*}title').text
desc = item.find('{*}description').text
date = item.find('{*}date').text
link = item.find('{*}link').text
enclosure = item.find('{*}enclosure')
imgsrc = None
if enclosure is not None:
imgsrc = item.find('{*}enclosure').attrib.get('resource')
listings.append({
'title': title,
'desc': desc,
'date': date,
'link': link,
'imgsrc': imgsrc
})
return listings
print(parseCraigslistRssFeed('''http://miami.craigslist.org/search/sga?format=rss&is_paid=all&query=surfboard&search_distance_type=mi'''))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment