Skip to content

Instantly share code, notes, and snippets.

@spiermar
Last active December 15, 2015 06:39
Show Gist options
  • Save spiermar/5217827 to your computer and use it in GitHub Desktop.
Save spiermar/5217827 to your computer and use it in GitHub Desktop.
import urllib2
import xml.etree.ElementTree as ET
from xml.dom import minidom
results = ET.Element('results')
def get_posts(url):
retry = 0
while (retry < 3):
try:
res = urllib2.urlopen(url).read()
break
except:
retry += 1
try:
root = ET.fromstring(res)
except:
return None
items = root.findall('.//item')
for item in items:
title = item.find('title').text
s = title.find(' - ')
user = title[:s]
msg = title[s+3:]
link = item.find('link').text
date = item.find('pubDate').text
description = item.find('description').text
s = description.find('img src=')
e = description.find('>',s)
image = description[s+8:e]
result_item = ET.Element('item')
result_item.set('user', user)
result_item.set('link', link)
result_item.set('date', date)
result_item.set('image', image)
results.append(result_item)
link = items[-1].find('link').text
s = link.rfind('/')
max = link[s+1:]
return max
max = ''
for i in range(800):
max = get_posts('http://spots.io/venue/74027012.rss?max=' + max)
if max is None:
break
print minidom.parseString(ET.tostring(results)).toprettyxml(indent=" ")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment