Skip to content

Instantly share code, notes, and snippets.

@lcruz
Last active August 29, 2015 14:08
Show Gist options
  • Save lcruz/09fa3eae327611826b49 to your computer and use it in GitHub Desktop.
Save lcruz/09fa3eae327611826b49 to your computer and use it in GitHub Desktop.
Leer feed
"""
Este es un caso particular
"""
import feedparser
import json
from bs4 import BeautifulSoup
def find_picture(summary):
soup = BeautifulSoup(summary)
image = soup.find("img")
return image.get("src") if image else None
def get_feeds():
d = feedparser.parse("http://feeds.feedburner.com/calendario-odonto")
entries = [
{ "title" : x.title, "link": x.link, "summary" : x.summary, "image": find_picture(x.summary)}
for x in d['entries']
]
return json.dumps(entries)
"""
Este es un caso generalizado para obtener todos los feeds
"""
import feedparser
import json
from bs4 import BeautifulSoup
URL_FEED = "http://feeds.feedburner.com/%s"
def find_picture(summary):
soup = BeautifulSoup(summary)
image = soup.find("img")
return image.get("src") if image else None
def find_text(summary):
soup = BeautifulSoup(summary)
text = soup.find("div")
return text.get_text() if text else None
def find_categories(summary):
soup = BeautifulSoup(summary)
return map(lambda x:x.get_text(), soup.find("div", { "class" : "element element-itemcategory last" }).find_all("a"))
def get_feeds(feed, f):
d = feedparser.parse(URL_FEED % feed)
entries = [f(x) for x in d['entries']]
return entries
get_feeds("calendario-odonto",
lambda x:{ "title" : x.title, "link": x.link, "summary" : x.summary, "image": find_picture(x.summary)})
get_feeds("escuela/arquitectura",
lambda x:{ "title" : x.title, "link": x.link, "summary" : x.summary, "image": find_picture(x.summary), "categories" : find_categories(x.summary)})
get_feeds("AnunciosUft",
lambda x:{ "title" : x.title, "link": x.link, "summary" : find_text(x.summary)})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment