Skip to content

Instantly share code, notes, and snippets.

@kjoconnor
Created June 20, 2013 08:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kjoconnor/5821280 to your computer and use it in GitHub Desktop.
Save kjoconnor/5821280 to your computer and use it in GitHub Desktop.
Grab icons from RSS feed URLs via the awesome getfavicon.appspot.com service
import logging
import requests
import tldextract
from collections import deque
from urllib import quote_plus
from urlparse import urlparse, urlunparse
DEFAULT_ICON = "http://mozorg.cdn.mozilla.net/media/img/"\
"trademarks/feed-icon-14x14.png"
def _getfavicon_url(url):
return "http://g.etfv.co/{url}?defaulticon=none"\
.format(url=quote_plus(url))
def _replace_default_icon(url):
parsed_url = list(urlparse(url))
parsed_url[4] = u'defaulticon={default_icon}'\
.format(default_icon=DEFAULT_ICON)
return urlunparse(parsed_url)
def get_icon(url):
return_url = None
try:
parsed_url = list(urlparse(url))
except:
logging.info(
"Couldn't parse URL {url}, skipping icon update."
.format(url=url)
)
r = requests.get(_getfavicon_url(url))
if r.status_code == 200:
return_url = _replace_default_icon(r.url)
elif r.status_code == 204:
# Try and go higher up the FQDN chain to see if we can get an icon
url_parts = deque(parsed_url[1].split('.'))
url_parts.popleft()
joined_url = '.'.join(url_parts)
new_url = parsed_url
new_url[1] = joined_url
new_url = urlunparse(new_url)
tld = tldextract.extract(new_url)[2]
if tld == joined_url:
# We've popped down to the TLD, give up
return None
return_url = get_icon(new_url)
else:
# getfavicon service is broken
return None
return return_url
if __name__ == "__main__":
feed_urls = [
'http://rss.slashdot.org/Slashdot/slashdot',
'http://feeds.joystiq.com/weblogsinc/joystiq',
'http://www.jwz.org/blog/feed/',
'http://feeds.feedburner.com/Ksplice',
'http://blog.xkcd.com/feed/',
'http://xkcd.com/rss.xml',
]
successes = 0
failures = list()
for feed_url in feed_urls:
returned_url = get_icon(feed_url)
if returned_url is not None:
print "Got %s for %s" % (returned_url, feed_url)
successes = successes + 1
else:
failures.append(feed_url)
print "No icon for %s" % feed_url
print "Successes: %s" % successes
print "Failures: %s" % failures
@kjoconnor
Copy link
Author

This works on about 88% of feeds I test.

Notes:

  • getfavicon doesn't seem to like redirects
  • For feedburner or other RSS hosting services, it may make more sense to try the HtmlUrl element of the RSS feeds instead of the XmlUrl, as all of them will just come back with feedburner which is pretty boring.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment