Created
June 20, 2013 08:56
-
-
Save kjoconnor/5821280 to your computer and use it in GitHub Desktop.
Grab icons from RSS feed URLs via the awesome getfavicon.appspot.com service
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import requests | |
import tldextract | |
from collections import deque | |
from urllib import quote_plus | |
from urlparse import urlparse, urlunparse | |
DEFAULT_ICON = "http://mozorg.cdn.mozilla.net/media/img/"\ | |
"trademarks/feed-icon-14x14.png" | |
def _getfavicon_url(url): | |
return "http://g.etfv.co/{url}?defaulticon=none"\ | |
.format(url=quote_plus(url)) | |
def _replace_default_icon(url): | |
parsed_url = list(urlparse(url)) | |
parsed_url[4] = u'defaulticon={default_icon}'\ | |
.format(default_icon=DEFAULT_ICON) | |
return urlunparse(parsed_url) | |
def get_icon(url): | |
return_url = None | |
try: | |
parsed_url = list(urlparse(url)) | |
except: | |
logging.info( | |
"Couldn't parse URL {url}, skipping icon update." | |
.format(url=url) | |
) | |
r = requests.get(_getfavicon_url(url)) | |
if r.status_code == 200: | |
return_url = _replace_default_icon(r.url) | |
elif r.status_code == 204: | |
# Try and go higher up the FQDN chain to see if we can get an icon | |
url_parts = deque(parsed_url[1].split('.')) | |
url_parts.popleft() | |
joined_url = '.'.join(url_parts) | |
new_url = parsed_url | |
new_url[1] = joined_url | |
new_url = urlunparse(new_url) | |
tld = tldextract.extract(new_url)[2] | |
if tld == joined_url: | |
# We've popped down to the TLD, give up | |
return None | |
return_url = get_icon(new_url) | |
else: | |
# getfavicon service is broken | |
return None | |
return return_url | |
if __name__ == "__main__": | |
feed_urls = [ | |
'http://rss.slashdot.org/Slashdot/slashdot', | |
'http://feeds.joystiq.com/weblogsinc/joystiq', | |
'http://www.jwz.org/blog/feed/', | |
'http://feeds.feedburner.com/Ksplice', | |
'http://blog.xkcd.com/feed/', | |
'http://xkcd.com/rss.xml', | |
] | |
successes = 0 | |
failures = list() | |
for feed_url in feed_urls: | |
returned_url = get_icon(feed_url) | |
if returned_url is not None: | |
print "Got %s for %s" % (returned_url, feed_url) | |
successes = successes + 1 | |
else: | |
failures.append(feed_url) | |
print "No icon for %s" % feed_url | |
print "Successes: %s" % successes | |
print "Failures: %s" % failures |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This works on about 88% of feeds I test.
Notes: