Last active
December 17, 2015 22:29
-
-
Save acceptable-security/5682592 to your computer and use it in GitHub Desktop.
It downloads every single octocat in the octocat RSS feed. Requires feedparser and beautiful soup 4, written in/for Python 2.7 (.4)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import feedparser | |
import urllib | |
from bs4 import * | |
from urllib2 import urlopen | |
def urlexists(url): | |
try: | |
code = urlopen(url) | |
return True | |
except: | |
return False | |
def getimgfromsummary(summary): | |
bs = BeautifulSoup(summary) | |
return bs.img.get('src') | |
def pngorjpg(fname): | |
try: | |
code = urlopen("http://octodex.github.com/images/" + fname + ".png") | |
return fname + ".png" | |
except: | |
return fname + ".jpg" | |
def downloadurl(url,file): | |
urllib.urlretrieve(url, file) | |
url = "http://feeds.feedburner.com/Octocats" | |
feed = feedparser.parse(url) | |
items = feed["items"] | |
print "indexing" | |
for item in items: | |
iurl = getimgfromsummary(item["summary"]) | |
ext = iurl.split('.')[-1:][0] | |
name = item["title"] | |
fname = name.lower() | |
if fname.endswith(" v2"): | |
fname = fname.split(" v2")[0] | |
fname = fname + "_v2" | |
fname = fname.replace(" ","") | |
fname = fname + "." + ext | |
#fname = pngorjpg(fname) | |
#nurl = "http://octodex.github.com/images/" + fname | |
downloadurl(iurl, fname) | |
print "Indexed " + name | |
print "Done indexing" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment