Skip to content

Instantly share code, notes, and snippets.

@140am
Created June 11, 2014 21:59
Show Gist options
  • Save 140am/c78754cb116868b91dfd to your computer and use it in GitHub Desktop.
Save 140am/c78754cb116868b91dfd to your computer and use it in GitHub Desktop.
""" Python HTTP client implementations benchmark
# Bandwidth Throughput
INFO:__main__:TEST: pycurl
INFO:__main__:pycurl: Returned status in 17 ms
INFO:__main__:Completed read of 1527248057 bytes in 15410 ms at 756 Mbps
INFO:__main__:TEST: urllib2
INFO:__main__:urllib2: Returned status in 18 ms
INFO:__main__:Completed read of 1527248057 bytes in 14554 ms at 800 Mbps
INFO:__main__:TEST: urllib3
INFO:__main__:urllib3: Returned status in 3 ms
INFO:__main__:Completed read of 1527248057 bytes in 15352 ms at 758 Mbps
INFO:__main__:TEST: requests
INFO:__main__:requests: Returned status in 35 ms
INFO:__main__:Completed read of 1527248057 bytes in 14534 ms at 801 Mbps
# CPU / Memory Usage
-- curl
32219 cdn 20 0 169396 7664 3584 S 25.6 0.0 0:02.52 python
-- urllib2
32281 cdn 20 0 189632 9428 3920 S 29.5 0.0 0:06.09 python
-- urllib3
33218 cdn 20 0 202200 11476 3852 S 27.6 0.0 0:02.44 python
-- urllib3 + gevent
33286 cdn 20 0 285612 12764 4260 R 80.7 0.0 0:06.58 python
-- requests
32219 cdn 20 0 213052 13892 4268 S 37.4 0.0 0:16.52 python
"""
import time
import os
import pycurl
import cStringIO
import asyncore
import socket
import logging
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)
class HTTPClient(asyncore.dispatcher):
def __init__(self, host, path):
asyncore.dispatcher.__init__(self)
self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
self.connect( (host, 80) )
self.buffer = (
'GET %s HTTP/1.1\n'
'Accept: */*\n'
'Accept-Encoding: gzip, deflate, compress\n'
'Host: %s\n'
'User-Agent: Beluga CDNode/0.9.0\r\n\r\n'
) % (path, host)
self.cc = 0
def handle_connect(self):
pass
def handle_close(self):
self.close()
def handle_read(self):
buf = self.recv(1024 * 64)
self.cc += len(buf)
return buf
def writable(self):
return (len(self.buffer) > 0)
def handle_write(self):
sent = self.send(self.buffer)
self.buffer = self.buffer[sent:]
def print_stat(time_s, file_size):
runtime = (time.time() * 1000) - time_s
download_rate = (file_size / (runtime/1000)) * 8
download_rate = download_rate / 1024 / 1024
log.info('Completed read of %i bytes in %i ms at %i Mbps' % (
file_size, runtime, download_rate
))
if __name__ == "__main__":
test = ['pycurl', 'requests', 'urllib2', 'urllib3']
test = ['urllib3']
file_input = 'http://pcdn.adam.gs/test.mp4'
file_output = 'test.mp4'
CHUNK_SIZE = 1024 * 64
if test and 'geventhttpclient' in test:
log.info('TEST: geventhttpclient')
def fetch_page(http, url):
log.info('gevent greenlet')
file_size = 0
time_s = time.time() * 1000
response = http.get(url.request_uri)
assert response.status_code == 200
log.info('geventhttpclient: Returned status in %i ms' % (
(time.time() * 1000) - time_s
))
chunk = response.read(CHUNK_SIZE)
while chunk:
file_size += len(chunk)
chunk = response.read(CHUNK_SIZE)
gevent.sleep(0)
print_stat(time_s, file_size)
import gevent
import gevent.pool
from geventhttpclient.url import URL
url = URL(file_input)
from geventhttpclient import HTTPClient as GeventHTTPClient
http = GeventHTTPClient.from_url(url, concurrency=10)
CON = 2
pool = gevent.pool.Pool(CON)
for i in range(CON):
log.info('connecting to: %s' % url)
pool.spawn(fetch_page, http, url)
"""
with open(file_output, 'w') as fp:
data = response.read(CHUNK_SIZE)
while data:
fp.write(data)
data = response.read(CHUNK_SIZE)
"""
pool.join()
http.close()
time.sleep(1)
if test and 'pycurl' in test:
log.info('TEST: pycurl')
time_s = time.time() * 1000
file_size = 0
temp_buffer_head = cStringIO.StringIO()
temp_buffer_body = cStringIO.StringIO()
printed_header = None
def temp_write_header(header):
global printed_header
if not printed_header:
printed_header = True
log.info('pycurl: Returned status in %i ms' % (
(time.time() * 1000) - time_s
))
# look for first CRLF after header response
if header.find('\r\n\r\n') != -1:
log.warn('HEADER DONE')
temp_buffer_head.write(header)
return len(header)
def temp_write_func(chunk):
#log.info('READ CHUNK: %i' % len(chunk))
return len(chunk)
def print_progress(download_t, download_d, upload_t, upload_d):
log.info(
"Total to download %d bytes, have %d bytes so far" % (
download_t, download_d
))
curl = pycurl.Curl()
curl.setopt(pycurl.URL, file_input)
curl.setopt(pycurl.CONNECTTIMEOUT, 10)
curl.setopt(pycurl.TIMEOUT, 300)
curl.setopt(pycurl.HTTPHEADER, ["Accept:"])
#curl.setopt(pycurl.WRITEHEADER, temp_buffer_head)
#curl.setopt(pycurl.WRITEDATA, temp_buffer_body)
curl.setopt(pycurl.HEADERFUNCTION, temp_write_header)
curl.setopt(pycurl.WRITEFUNCTION, temp_write_func)
curl.setopt(curl.NOPROGRESS, 1)
#curl.setopt(curl.PROGRESSFUNCTION, print_progress)
curl.setopt(pycurl.FOLLOWLOCATION, 1)
curl.setopt(pycurl.MAXREDIRS, 5)
#curl.setopt(curl.NOBODY, True)
curl.setopt(curl.USERAGENT, "Mozilla/5.0 (compatible; pycurl)")
curl.perform()
file_size = curl.getinfo(curl.CONTENT_LENGTH_DOWNLOAD)
log.info("HTTP-code: %s" % curl.getinfo(curl.HTTP_CODE))
log.info("Total-time: %s" % curl.getinfo(curl.TOTAL_TIME))
log.info("Document size: %d bytes" % curl.getinfo(curl.SIZE_DOWNLOAD))
log.info("Effective URL: %s" % curl.getinfo(curl.EFFECTIVE_URL))
log.info("Content-type: %s" % curl.getinfo(curl.CONTENT_TYPE))
log.info("Namelookup-time: %s" % curl.getinfo(curl.NAMELOOKUP_TIME))
log.info("Redirect-time: %s" % curl.getinfo(curl.REDIRECT_TIME))
log.info("Redirect-count: %s" % curl.getinfo(curl.REDIRECT_COUNT))
epoch = curl.getinfo(curl.INFO_FILETIME)
log.info("Filetime: %d (%s)" % (epoch, time.ctime(epoch)))
temp_buffer_head.flush()
log.info('HEADER: %s' % temp_buffer_head.getvalue())
curl.close()
temp_buffer_head.close()
temp_buffer_body.close()
print_stat(time_s, file_size)
time.sleep(1)
if test and 'urllib2' in test:
log.info('TEST: urllib2')
time_s = time.time() * 1000
file_size = 0
import gevent
from gevent import monkey
monkey.patch_all()
#import geventhttpclient.httplib
#geventhttpclient.httplib.patch()
import urllib2
req = urllib2.urlopen(file_input)
log.info('urllib2: Returned status in %i ms' % (
(time.time() * 1000) - time_s
))
while True:
chunk = req.read(CHUNK_SIZE)
if not chunk: break
file_size += len(chunk)
#output.write(chunk)
gevent.sleep(0)
print_stat(time_s, file_size)
time.sleep(1)
if test and 'urllib3' in test:
log.info('TEST: urllib3')
file_size = 0
from gevent import monkey
monkey.patch_all()
import urllib3
# LRU of 10 connections
http = urllib3.PoolManager(
num_pools=10
)
req = http.request(
method='HEAD',
url=file_input,
preload_content=False
)
time_s = time.time() * 1000
req = http.request(
method='GET',
url=file_input,
preload_content=False
)
log.info('urllib3: Returned status in %i ms' % (
(time.time() * 1000) - time_s
))
for chunk in req.stream():
if not chunk: break
file_size += len(chunk)
#output.write(chunk)
req.release_conn()
print_stat(time_s, file_size)
time.sleep(1)
if test and 'asyncore' in test:
log.info('TEST: asyncore')
time_s = time.time() * 1000
file_size = 0
import urlparse
url_obj = urlparse.urlparse(file_input)
client = HTTPClient(url_obj.netloc, url_obj.path)
asyncore.loop()
log.info('asyncore: Returned status in %i ms' % (
(time.time() * 1000) - time_s
))
print_stat(time_s, client.cc)
time.sleep(1)
if test and 'requests' in test:
log.info('TEST: requests')
import requests
time_s = time.time() * 1000
file_size = 0
r = requests.get(file_input, stream=True)
log.info('requests: Returned status in %i ms' % (
(time.time() * 1000) - time_s
))
for chunk in r.iter_content(chunk_size=CHUNK_SIZE):
if chunk:
file_size += len(chunk)
print_stat(time_s, file_size)
time.sleep(1)
##
## write tests
if test and 'urlretrieve' in test:
log.info('TEST: urlretrieve')
time_s = time.time() * 1000
import urllib
urllib.urlretrieve(file_input, file_output)
file_size = os.stat(file_output).st_size
print_stat(time_s, file_size)
time.sleep(1)
if test and 'urllib2-disk' in test:
log.info('TEST: urllib2-disk')
time_s = time.time() * 1000
import urllib2
req = urllib2.urlopen(file_input)
with open(file_output, 'wb') as fp:
while True:
chunk = req.read(CHUNK_SIZE)
if not chunk: break
fp.write(chunk)
file_size = os.stat(file_output).st_size
print_stat(time_s, file_size)
time.sleep(1)
if test and 'shutil' in test:
log.info('TEST: shutil')
time_s = time.time() * 1000
import shutil
import urllib2
req = urllib2.urlopen(file_input)
with open(file_output, 'wb') as fp:
shutil.copyfileobj(req, fp)
file_size = os.stat(file_output).st_size
print_stat(time_s, file_size)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment