Skip to content

Instantly share code, notes, and snippets.

@markchadwick
Created March 21, 2013 17:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save markchadwick/5215126 to your computer and use it in GitHub Desktop.
Save markchadwick/5215126 to your computer and use it in GitHub Desktop.
import pycurl
import sys
import time
from cStringIO import StringIO
def fetch(requets, concurrent=5):
multi = pycurl.CurlMulti()
# Sadly, we need to track of pending curls, or they'll get CG'd and
# mysteriously disappear. Don't ask me!
curls = []
num_handes = 0
unscheduled_reqs = True
while num_handes or unscheduled_reqs or curls:
# If the concurrency cap hasn't been reached yet, another request can be
# pulled off and added to the multi.
if unscheduled_reqs and num_handes < concurrent:
try:
url, payload = requets.next()
except StopIteration:
unscheduled_reqs = False
continue
body = StringIO()
curl = pycurl.Curl()
curl.setopt(pycurl.URL, url)
curl.setopt(pycurl.WRITEFUNCTION, body.write)
curl.body = body
curl.payload = payload
curls.append(curl)
multi.add_handle(curl)
# Perform any curl requests that need to happen.
ret = pycurl.E_CALL_MULTI_PERFORM
while ret == pycurl.E_CALL_MULTI_PERFORM:
ret, num_handles = multi.perform()
# Wait at maximum for one second for a file descriptor to become available.
# Restart if not.
ret = multi.select(1.0)
if ret == -1:
continue
# Finally, deal with any complete or error'd curls that may have been
# resolved in this loop.
while True:
num_q, ok_list, err_list = multi.info_read()
for c in ok_list:
yield 'ok', (c.payload, c.body.getvalue())
multi.remove_handle(c)
curls.remove(c)
for c, errno, errmsg in err_list:
c.fp.close()
c.fp = None
yield 'fail', c.url
multi.remove_handle(c)
curls.remove(c)
if not num_q:
break
def main(count, url):
print 'Getting %s from %s' % (count, url)
requests = ((url, 'req-%s' % i) for i in range(count))
start = time.time()
for ok, resp in fetch(requests, concurrent=100):
pass
print ok, resp
delta = time.time() - start
print '%.02f req/s' % (count / delta)
if __name__ == '__main__':
count = int(sys.argv[1])
url = sys.argv[2]
sys.exit(main(count, url))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment