Created
March 21, 2013 17:54
-
-
Save markchadwick/5215126 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pycurl | |
import sys | |
import time | |
from cStringIO import StringIO | |
def fetch(requets, concurrent=5): | |
multi = pycurl.CurlMulti() | |
# Sadly, we need to track of pending curls, or they'll get CG'd and | |
# mysteriously disappear. Don't ask me! | |
curls = [] | |
num_handes = 0 | |
unscheduled_reqs = True | |
while num_handes or unscheduled_reqs or curls: | |
# If the concurrency cap hasn't been reached yet, another request can be | |
# pulled off and added to the multi. | |
if unscheduled_reqs and num_handes < concurrent: | |
try: | |
url, payload = requets.next() | |
except StopIteration: | |
unscheduled_reqs = False | |
continue | |
body = StringIO() | |
curl = pycurl.Curl() | |
curl.setopt(pycurl.URL, url) | |
curl.setopt(pycurl.WRITEFUNCTION, body.write) | |
curl.body = body | |
curl.payload = payload | |
curls.append(curl) | |
multi.add_handle(curl) | |
# Perform any curl requests that need to happen. | |
ret = pycurl.E_CALL_MULTI_PERFORM | |
while ret == pycurl.E_CALL_MULTI_PERFORM: | |
ret, num_handles = multi.perform() | |
# Wait at maximum for one second for a file descriptor to become available. | |
# Restart if not. | |
ret = multi.select(1.0) | |
if ret == -1: | |
continue | |
# Finally, deal with any complete or error'd curls that may have been | |
# resolved in this loop. | |
while True: | |
num_q, ok_list, err_list = multi.info_read() | |
for c in ok_list: | |
yield 'ok', (c.payload, c.body.getvalue()) | |
multi.remove_handle(c) | |
curls.remove(c) | |
for c, errno, errmsg in err_list: | |
c.fp.close() | |
c.fp = None | |
yield 'fail', c.url | |
multi.remove_handle(c) | |
curls.remove(c) | |
if not num_q: | |
break | |
def main(count, url): | |
print 'Getting %s from %s' % (count, url) | |
requests = ((url, 'req-%s' % i) for i in range(count)) | |
start = time.time() | |
for ok, resp in fetch(requests, concurrent=100): | |
pass | |
print ok, resp | |
delta = time.time() - start | |
print '%.02f req/s' % (count / delta) | |
if __name__ == '__main__': | |
count = int(sys.argv[1]) | |
url = sys.argv[2] | |
sys.exit(main(count, url)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment