Skip to content

Instantly share code, notes, and snippets.

@yejianye
Created June 25, 2015 20:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yejianye/2140e7158aa8cae48874 to your computer and use it in GitHub Desktop.
Save yejianye/2140e7158aa8cae48874 to your computer and use it in GitHub Desktop.
from gevent import monkey
monkey.patch_all()
import requests
import gevent
import traceback
from gevent.pool import Pool
def imap(urls, size, break_time=None):
pool = Pool(size)
def send(url):
print url
while True:
try:
req = requests.get(url, timeout=10, headers={'User-agent':
'Mozilla/5.0 (Macintosh; Intel '
'Mac OS X 10_10_3) AppleWebKit'
'/537.36 (KHTML, like Gecko) '
'Chrome/43.0.2357.124 Safari/537.36'})
return req.content
except (requests.RequestException, requests.ConnectionError, requests.HTTPError):
traceback.print_exc()
if break_time:
gevent.sleep(break_time)
return pool.imap_unordered(send, urls)
def urls(amount):
for i in xrange(amount):
# yield 'http://s.taobao.com/?a=' + str(i)
yield 'https://www.baidu.com/?a=' + str(i)
for r in imap(urls(1000), size=20):
print len(r)
print 'Finished'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment