Skip to content

Instantly share code, notes, and snippets.

@acceptable-security
Last active December 17, 2015 22:29
Show Gist options
  • Save acceptable-security/5682592 to your computer and use it in GitHub Desktop.
Save acceptable-security/5682592 to your computer and use it in GitHub Desktop.
It downloads every single octocat in the octocat RSS feed. Requires feedparser and beautiful soup 4, written in/for Python 2.7 (.4)
import feedparser
import urllib
from bs4 import *
from urllib2 import urlopen
def urlexists(url):
try:
code = urlopen(url)
return True
except:
return False
def getimgfromsummary(summary):
bs = BeautifulSoup(summary)
return bs.img.get('src')
def pngorjpg(fname):
try:
code = urlopen("http://octodex.github.com/images/" + fname + ".png")
return fname + ".png"
except:
return fname + ".jpg"
def downloadurl(url,file):
urllib.urlretrieve(url, file)
url = "http://feeds.feedburner.com/Octocats"
feed = feedparser.parse(url)
items = feed["items"]
print "indexing"
for item in items:
iurl = getimgfromsummary(item["summary"])
ext = iurl.split('.')[-1:][0]
name = item["title"]
fname = name.lower()
if fname.endswith(" v2"):
fname = fname.split(" v2")[0]
fname = fname + "_v2"
fname = fname.replace(" ","")
fname = fname + "." + ext
#fname = pngorjpg(fname)
#nurl = "http://octodex.github.com/images/" + fname
downloadurl(iurl, fname)
print "Indexed " + name
print "Done indexing"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment