public
Created

Fetches Mila Kunis pics from http://reddit.com/r/milakunis | Doesn't use Reddit API. I'll work on that later. Wanted to download few pics fast. Made it up . Downloaded 28 pics and got restricted by reddit. I'll also add header later.

  • Download Gist
getKunisPics.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
from BeautifulSoup import BeautifulSoup
import urllib2
import os
import re
 
def getLinks(URL, pics=[]):
print len(pics), "pics yet."
print "fetching", URL
try:
page = urllib2.urlopen(URL)
except urllib2.HTTPError:
print "Reddit restricts bots"
print "Couldn't fetch"
print URL
print "Returning all the pics. Hope it works."
print len(pics)
return pics
print "page fetched"
soup = BeautifulSoup(page.read())
 
links = soup.findAll("a")
for link in links:
pic_url = link.get("href")
if pic_url and "http://i.imgur.com/" in pic_url:
pics.append(pic_url)
pics = list(set(pics))
#print pics
next = soup.find("a", rel="nofollow next")
if next:
next_url = next.get("href")
if next_url:
getLinks(next_url, pics)
return pics
 
def fetchPics(pics, default_dir):
print len(pics), "pics"
#pics = list(set(pics))
for pic in pics:
data = opener.open(pic)
filename = re.split("/",pic)[-1]
filename = default_dir+"/"+"Mila_Kunis_"+filename
print "downloading and saving to", filename
f = open(filename, "wb")
f.write(data.read())
f.close()
#URL = "http://www.reddit.com/r/MilaKunis/search?q=wallpaper&restrict_sr=on"
URLs = ["http://www.reddit.com/r/MilaKunis/", "http://www.reddit.com/r/MilaKunis/search?q=wallpaper&restrict_sr=on"]
default_dir = os.path.join(os.path.expanduser("~"),"Pictures/")
for URL in URLs:
pics = getLinks(URL)
fetchPics(pics, default_dir)

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.