Skip to content

Instantly share code, notes, and snippets.

@ncweinhold
Created April 17, 2011 19:11
Show Gist options
  • Save ncweinhold/924369 to your computer and use it in GitHub Desktop.
Save ncweinhold/924369 to your computer and use it in GitHub Desktop.
Quick concurrent f7u12 image downloader - learning gevent
import gevent
from gevent import monkey
from BeautifulSoup import BeautifulSoup
import os
import urllib2
monkey.patch_all()
front_page = "http://www.reddit.com/r/fffffffuuuuuuuuuuuu/"
def get_links(url):
"""Given a url corresponding to the particular reddit page, parse the html getting all imgur links"""
page = urllib2.urlopen(url).read()
html = BeautifulSoup(page)
links = html.findAll(attrs={"class" : "thumbnail "})
return links
def download_file(url):
"""Given a url referring to an image on imgur, download this to a local file"""
if not url.endswith(('.png', '.jpg', '.gif')):
url = url + ".png"
filename = os.path.basename(url)
f = open(filename, "wb")
img = urllib2.urlopen(url)
while 1:
data = img.read(1024)
if not data: break
f.write(data)
f.close()
if __name__ == "__main__":
imgs = get_links(front_page)
jobs = [gevent.spawn(download_file, img['href']) for img in imgs]
gevent.joinall(jobs, timeout=15)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment