Skip to content

Instantly share code, notes, and snippets.

@statico
Created June 26, 2011 08:08
Show Gist options
  • Save statico/1047390 to your computer and use it in GitHub Desktop.
Save statico/1047390 to your computer and use it in GitHub Desktop.
Replacing Yahoo! Pipes with Python
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import datetime
import htmllib
import urllib
import PyRSS2Gen
from BeautifulSoup import BeautifulSoup
def get(url):
return urllib.urlopen(url).read()
def unescape(s):
p = htmllib.HTMLParser(None)
p.save_bgn()
p.feed(s)
return p.save_end()
# Don't print if not on a tty.
if not sys.stdout.isatty():
from StringIO import StringIO
sys.stdout = StringIO()
# Grab original feed.
FEED_URL = 'http://feeds2.feedburner.com/html5games-de?format=xml'
print 'Fetching', FEED_URL
feed = BeautifulSoup(get(FEED_URL))
# Iterate over latest N items.
items = []
for item in feed.findAll('item')[0:5]:
href = item.contents[2] # <link> is broken on feedburner
print 'Fetching', href
page = BeautifulSoup(get(href))
img = page.find('img',
**{'class': 'attachment-single-post-thumbnail wp-post-image'})
print 'Image:', img
items.append(PyRSS2Gen.RSSItem(
title=item.find('title').string,
link=href,
description='<a href="%s">%s</a><br/>%s' % (
href, img, item.find('description').string\
.replace('&lt;', '<').replace('&gt;', '>')),
guid=PyRSS2Gen.Guid(href),
pubDate=None,
))
rss = PyRSS2Gen.RSS2(
title='html5games.de with images',
link='http://www.html5games.de/',
description='Fast täglich gibt es neue kostenlose Online Spiel in HTML5.',
lastBuildDate=datetime.datetime.utcnow(),
items=items)
# foo.py -> foo.rss
filename = os.path.basename(sys.argv[0]).replace('.py', '.rss')
path = os.path.join(os.path.dirname(sys.argv[0]), filename)
print 'Writing', path
rss.write_xml(open(path, 'w'), encoding='utf-8')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment