Skip to content

Instantly share code, notes, and snippets.

@jepler
Created March 19, 2013 03:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jepler/5193404 to your computer and use it in GitHub Desktop.
Save jepler/5193404 to your computer and use it in GitHub Desktop.
python code to get title, link, and my comment from a newsblur blurblog feed
import os
import re
import sys
import xml.etree.ElementTree
import BeautifulSoup
def exc_wrap(f):
def inner(*args, **kw):
try:
return f(*args, **kw)
except Exception, e:
raise
return inner
@exc_wrap
def get_title(e):
return e.findtext('{http://www.w3.org/2005/Atom}title')
@exc_wrap
def get_link(e):
return e.find('{http://www.w3.org/2005/Atom}link').get('href')
def normtext(el):
print >>sys.stderr, "normtext", el
return re.sub("\s+", " ", " ".join(
se.text or '' for se in el.getiterator()).strip())
def bsparse(text):
soup = BeautifulSoup.BeautifulSoup(text,
convertEntities=BeautifulSoup.BeautifulSoup.HTML_ENTITIES)
def emit(soup):
if isinstance(soup, BeautifulSoup.NavigableString):
if isinstance(soup, BeautifulSoup.Comment):
return
builder.data(soup)
else:
builder.start(soup.name, dict(soup.attrs))
for s in soup:
emit(s)
builder.end(soup.name)
builder = xml.etree.ElementTree.TreeBuilder()
emit(soup)
return builder.close()
@exc_wrap
def get_comment(e):
b = e.find('{http://www.w3.org/2005/Atom}summary').text
print >>sys.stderr, "<<<", repr(b)
b = bsparse(b)
print >>sys.stderr, ">>>", b
if not b: return
global gb; gb = b
b = b[0]
if b.tag != 'table': return
if b.get('cellpadding') != "0": return
if b.get('style') != 'border: 1px solid #E0E0E0; margin: 0; padding: 0; background-color: #F0F0F0': return
bc = b.getchildren()
if len(bc) != 2: return
t0 = normtext(bc[0])
t1 = normtext(bc[1])
if t0 != 'jepler shared this story': return
return t1
def parse_rss(u):
x = xml.etree.ElementTree.fromstring(u)
for e in x.findall('{http://www.w3.org/2005/Atom}entry'):
t = get_title(e)
l = get_link(e)
c = get_comment(e)
print >>sys.stderr, repr(c)
if t or l or c:
yield t, l, c
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment