Last active
August 29, 2015 14:08
-
-
Save lcruz/09fa3eae327611826b49 to your computer and use it in GitHub Desktop.
Leer feed
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Este es un caso particular | |
""" | |
import feedparser | |
import json | |
from bs4 import BeautifulSoup | |
def find_picture(summary): | |
soup = BeautifulSoup(summary) | |
image = soup.find("img") | |
return image.get("src") if image else None | |
def get_feeds(): | |
d = feedparser.parse("http://feeds.feedburner.com/calendario-odonto") | |
entries = [ | |
{ "title" : x.title, "link": x.link, "summary" : x.summary, "image": find_picture(x.summary)} | |
for x in d['entries'] | |
] | |
return json.dumps(entries) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Este es un caso generalizado para obtener todos los feeds | |
""" | |
import feedparser | |
import json | |
from bs4 import BeautifulSoup | |
URL_FEED = "http://feeds.feedburner.com/%s" | |
def find_picture(summary): | |
soup = BeautifulSoup(summary) | |
image = soup.find("img") | |
return image.get("src") if image else None | |
def find_text(summary): | |
soup = BeautifulSoup(summary) | |
text = soup.find("div") | |
return text.get_text() if text else None | |
def find_categories(summary): | |
soup = BeautifulSoup(summary) | |
return map(lambda x:x.get_text(), soup.find("div", { "class" : "element element-itemcategory last" }).find_all("a")) | |
def get_feeds(feed, f): | |
d = feedparser.parse(URL_FEED % feed) | |
entries = [f(x) for x in d['entries']] | |
return entries | |
get_feeds("calendario-odonto", | |
lambda x:{ "title" : x.title, "link": x.link, "summary" : x.summary, "image": find_picture(x.summary)}) | |
get_feeds("escuela/arquitectura", | |
lambda x:{ "title" : x.title, "link": x.link, "summary" : x.summary, "image": find_picture(x.summary), "categories" : find_categories(x.summary)}) | |
get_feeds("AnunciosUft", | |
lambda x:{ "title" : x.title, "link": x.link, "summary" : find_text(x.summary)}) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment