Skip to content

Instantly share code, notes, and snippets.

@jayrambhia
Created February 8, 2013 23:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jayrambhia/4742846 to your computer and use it in GitHub Desktop.
Save jayrambhia/4742846 to your computer and use it in GitHub Desktop.
Fetches Mila Kunis pics from http://reddit.com/r/milakunis | Doesn't use Reddit API. I'll work on that later. Wanted to download few pics fast. Made it up . Downloaded 28 pics and got restricted by reddit. I'll also add header later.
from BeautifulSoup import BeautifulSoup
import urllib2
import os
import re
def getLinks(URL, pics=[]):
print len(pics), "pics yet."
print "fetching", URL
try:
page = urllib2.urlopen(URL)
except urllib2.HTTPError:
print "Reddit restricts bots"
print "Couldn't fetch"
print URL
print "Returning all the pics. Hope it works."
print len(pics)
return pics
print "page fetched"
soup = BeautifulSoup(page.read())
links = soup.findAll("a")
for link in links:
pic_url = link.get("href")
if pic_url and "http://i.imgur.com/" in pic_url:
pics.append(pic_url)
pics = list(set(pics))
#print pics
next = soup.find("a", rel="nofollow next")
if next:
next_url = next.get("href")
if next_url:
getLinks(next_url, pics)
return pics
def fetchPics(pics, default_dir):
print len(pics), "pics"
#pics = list(set(pics))
for pic in pics:
data = opener.open(pic)
filename = re.split("/",pic)[-1]
filename = default_dir+"/"+"Mila_Kunis_"+filename
print "downloading and saving to", filename
f = open(filename, "wb")
f.write(data.read())
f.close()
#URL = "http://www.reddit.com/r/MilaKunis/search?q=wallpaper&restrict_sr=on"
URLs = ["http://www.reddit.com/r/MilaKunis/", "http://www.reddit.com/r/MilaKunis/search?q=wallpaper&restrict_sr=on"]
default_dir = os.path.join(os.path.expanduser("~"),"Pictures/")
for URL in URLs:
pics = getLinks(URL)
fetchPics(pics, default_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment