Created
June 11, 2014 21:59
-
-
Save 140am/c78754cb116868b91dfd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Python HTTP client implementations benchmark | |
# Bandwidth Throughput | |
INFO:__main__:TEST: pycurl | |
INFO:__main__:pycurl: Returned status in 17 ms | |
INFO:__main__:Completed read of 1527248057 bytes in 15410 ms at 756 Mbps | |
INFO:__main__:TEST: urllib2 | |
INFO:__main__:urllib2: Returned status in 18 ms | |
INFO:__main__:Completed read of 1527248057 bytes in 14554 ms at 800 Mbps | |
INFO:__main__:TEST: urllib3 | |
INFO:__main__:urllib3: Returned status in 3 ms | |
INFO:__main__:Completed read of 1527248057 bytes in 15352 ms at 758 Mbps | |
INFO:__main__:TEST: requests | |
INFO:__main__:requests: Returned status in 35 ms | |
INFO:__main__:Completed read of 1527248057 bytes in 14534 ms at 801 Mbps | |
# CPU / Memory Usage | |
-- curl | |
32219 cdn 20 0 169396 7664 3584 S 25.6 0.0 0:02.52 python | |
-- urllib2 | |
32281 cdn 20 0 189632 9428 3920 S 29.5 0.0 0:06.09 python | |
-- urllib3 | |
33218 cdn 20 0 202200 11476 3852 S 27.6 0.0 0:02.44 python | |
-- urllib3 + gevent | |
33286 cdn 20 0 285612 12764 4260 R 80.7 0.0 0:06.58 python | |
-- requests | |
32219 cdn 20 0 213052 13892 4268 S 37.4 0.0 0:16.52 python | |
""" | |
import time | |
import os | |
import pycurl | |
import cStringIO | |
import asyncore | |
import socket | |
import logging | |
logging.basicConfig(level=logging.INFO) | |
log = logging.getLogger(__name__) | |
class HTTPClient(asyncore.dispatcher): | |
def __init__(self, host, path): | |
asyncore.dispatcher.__init__(self) | |
self.create_socket(socket.AF_INET, socket.SOCK_STREAM) | |
self.connect( (host, 80) ) | |
self.buffer = ( | |
'GET %s HTTP/1.1\n' | |
'Accept: */*\n' | |
'Accept-Encoding: gzip, deflate, compress\n' | |
'Host: %s\n' | |
'User-Agent: Beluga CDNode/0.9.0\r\n\r\n' | |
) % (path, host) | |
self.cc = 0 | |
def handle_connect(self): | |
pass | |
def handle_close(self): | |
self.close() | |
def handle_read(self): | |
buf = self.recv(1024 * 64) | |
self.cc += len(buf) | |
return buf | |
def writable(self): | |
return (len(self.buffer) > 0) | |
def handle_write(self): | |
sent = self.send(self.buffer) | |
self.buffer = self.buffer[sent:] | |
def print_stat(time_s, file_size): | |
runtime = (time.time() * 1000) - time_s | |
download_rate = (file_size / (runtime/1000)) * 8 | |
download_rate = download_rate / 1024 / 1024 | |
log.info('Completed read of %i bytes in %i ms at %i Mbps' % ( | |
file_size, runtime, download_rate | |
)) | |
if __name__ == "__main__": | |
test = ['pycurl', 'requests', 'urllib2', 'urllib3'] | |
test = ['urllib3'] | |
file_input = 'http://pcdn.adam.gs/test.mp4' | |
file_output = 'test.mp4' | |
CHUNK_SIZE = 1024 * 64 | |
if test and 'geventhttpclient' in test: | |
log.info('TEST: geventhttpclient') | |
def fetch_page(http, url): | |
log.info('gevent greenlet') | |
file_size = 0 | |
time_s = time.time() * 1000 | |
response = http.get(url.request_uri) | |
assert response.status_code == 200 | |
log.info('geventhttpclient: Returned status in %i ms' % ( | |
(time.time() * 1000) - time_s | |
)) | |
chunk = response.read(CHUNK_SIZE) | |
while chunk: | |
file_size += len(chunk) | |
chunk = response.read(CHUNK_SIZE) | |
gevent.sleep(0) | |
print_stat(time_s, file_size) | |
import gevent | |
import gevent.pool | |
from geventhttpclient.url import URL | |
url = URL(file_input) | |
from geventhttpclient import HTTPClient as GeventHTTPClient | |
http = GeventHTTPClient.from_url(url, concurrency=10) | |
CON = 2 | |
pool = gevent.pool.Pool(CON) | |
for i in range(CON): | |
log.info('connecting to: %s' % url) | |
pool.spawn(fetch_page, http, url) | |
""" | |
with open(file_output, 'w') as fp: | |
data = response.read(CHUNK_SIZE) | |
while data: | |
fp.write(data) | |
data = response.read(CHUNK_SIZE) | |
""" | |
pool.join() | |
http.close() | |
time.sleep(1) | |
if test and 'pycurl' in test: | |
log.info('TEST: pycurl') | |
time_s = time.time() * 1000 | |
file_size = 0 | |
temp_buffer_head = cStringIO.StringIO() | |
temp_buffer_body = cStringIO.StringIO() | |
printed_header = None | |
def temp_write_header(header): | |
global printed_header | |
if not printed_header: | |
printed_header = True | |
log.info('pycurl: Returned status in %i ms' % ( | |
(time.time() * 1000) - time_s | |
)) | |
# look for first CRLF after header response | |
if header.find('\r\n\r\n') != -1: | |
log.warn('HEADER DONE') | |
temp_buffer_head.write(header) | |
return len(header) | |
def temp_write_func(chunk): | |
#log.info('READ CHUNK: %i' % len(chunk)) | |
return len(chunk) | |
def print_progress(download_t, download_d, upload_t, upload_d): | |
log.info( | |
"Total to download %d bytes, have %d bytes so far" % ( | |
download_t, download_d | |
)) | |
curl = pycurl.Curl() | |
curl.setopt(pycurl.URL, file_input) | |
curl.setopt(pycurl.CONNECTTIMEOUT, 10) | |
curl.setopt(pycurl.TIMEOUT, 300) | |
curl.setopt(pycurl.HTTPHEADER, ["Accept:"]) | |
#curl.setopt(pycurl.WRITEHEADER, temp_buffer_head) | |
#curl.setopt(pycurl.WRITEDATA, temp_buffer_body) | |
curl.setopt(pycurl.HEADERFUNCTION, temp_write_header) | |
curl.setopt(pycurl.WRITEFUNCTION, temp_write_func) | |
curl.setopt(curl.NOPROGRESS, 1) | |
#curl.setopt(curl.PROGRESSFUNCTION, print_progress) | |
curl.setopt(pycurl.FOLLOWLOCATION, 1) | |
curl.setopt(pycurl.MAXREDIRS, 5) | |
#curl.setopt(curl.NOBODY, True) | |
curl.setopt(curl.USERAGENT, "Mozilla/5.0 (compatible; pycurl)") | |
curl.perform() | |
file_size = curl.getinfo(curl.CONTENT_LENGTH_DOWNLOAD) | |
log.info("HTTP-code: %s" % curl.getinfo(curl.HTTP_CODE)) | |
log.info("Total-time: %s" % curl.getinfo(curl.TOTAL_TIME)) | |
log.info("Document size: %d bytes" % curl.getinfo(curl.SIZE_DOWNLOAD)) | |
log.info("Effective URL: %s" % curl.getinfo(curl.EFFECTIVE_URL)) | |
log.info("Content-type: %s" % curl.getinfo(curl.CONTENT_TYPE)) | |
log.info("Namelookup-time: %s" % curl.getinfo(curl.NAMELOOKUP_TIME)) | |
log.info("Redirect-time: %s" % curl.getinfo(curl.REDIRECT_TIME)) | |
log.info("Redirect-count: %s" % curl.getinfo(curl.REDIRECT_COUNT)) | |
epoch = curl.getinfo(curl.INFO_FILETIME) | |
log.info("Filetime: %d (%s)" % (epoch, time.ctime(epoch))) | |
temp_buffer_head.flush() | |
log.info('HEADER: %s' % temp_buffer_head.getvalue()) | |
curl.close() | |
temp_buffer_head.close() | |
temp_buffer_body.close() | |
print_stat(time_s, file_size) | |
time.sleep(1) | |
if test and 'urllib2' in test: | |
log.info('TEST: urllib2') | |
time_s = time.time() * 1000 | |
file_size = 0 | |
import gevent | |
from gevent import monkey | |
monkey.patch_all() | |
#import geventhttpclient.httplib | |
#geventhttpclient.httplib.patch() | |
import urllib2 | |
req = urllib2.urlopen(file_input) | |
log.info('urllib2: Returned status in %i ms' % ( | |
(time.time() * 1000) - time_s | |
)) | |
while True: | |
chunk = req.read(CHUNK_SIZE) | |
if not chunk: break | |
file_size += len(chunk) | |
#output.write(chunk) | |
gevent.sleep(0) | |
print_stat(time_s, file_size) | |
time.sleep(1) | |
if test and 'urllib3' in test: | |
log.info('TEST: urllib3') | |
file_size = 0 | |
from gevent import monkey | |
monkey.patch_all() | |
import urllib3 | |
# LRU of 10 connections | |
http = urllib3.PoolManager( | |
num_pools=10 | |
) | |
req = http.request( | |
method='HEAD', | |
url=file_input, | |
preload_content=False | |
) | |
time_s = time.time() * 1000 | |
req = http.request( | |
method='GET', | |
url=file_input, | |
preload_content=False | |
) | |
log.info('urllib3: Returned status in %i ms' % ( | |
(time.time() * 1000) - time_s | |
)) | |
for chunk in req.stream(): | |
if not chunk: break | |
file_size += len(chunk) | |
#output.write(chunk) | |
req.release_conn() | |
print_stat(time_s, file_size) | |
time.sleep(1) | |
if test and 'asyncore' in test: | |
log.info('TEST: asyncore') | |
time_s = time.time() * 1000 | |
file_size = 0 | |
import urlparse | |
url_obj = urlparse.urlparse(file_input) | |
client = HTTPClient(url_obj.netloc, url_obj.path) | |
asyncore.loop() | |
log.info('asyncore: Returned status in %i ms' % ( | |
(time.time() * 1000) - time_s | |
)) | |
print_stat(time_s, client.cc) | |
time.sleep(1) | |
if test and 'requests' in test: | |
log.info('TEST: requests') | |
import requests | |
time_s = time.time() * 1000 | |
file_size = 0 | |
r = requests.get(file_input, stream=True) | |
log.info('requests: Returned status in %i ms' % ( | |
(time.time() * 1000) - time_s | |
)) | |
for chunk in r.iter_content(chunk_size=CHUNK_SIZE): | |
if chunk: | |
file_size += len(chunk) | |
print_stat(time_s, file_size) | |
time.sleep(1) | |
## | |
## write tests | |
if test and 'urlretrieve' in test: | |
log.info('TEST: urlretrieve') | |
time_s = time.time() * 1000 | |
import urllib | |
urllib.urlretrieve(file_input, file_output) | |
file_size = os.stat(file_output).st_size | |
print_stat(time_s, file_size) | |
time.sleep(1) | |
if test and 'urllib2-disk' in test: | |
log.info('TEST: urllib2-disk') | |
time_s = time.time() * 1000 | |
import urllib2 | |
req = urllib2.urlopen(file_input) | |
with open(file_output, 'wb') as fp: | |
while True: | |
chunk = req.read(CHUNK_SIZE) | |
if not chunk: break | |
fp.write(chunk) | |
file_size = os.stat(file_output).st_size | |
print_stat(time_s, file_size) | |
time.sleep(1) | |
if test and 'shutil' in test: | |
log.info('TEST: shutil') | |
time_s = time.time() * 1000 | |
import shutil | |
import urllib2 | |
req = urllib2.urlopen(file_input) | |
with open(file_output, 'wb') as fp: | |
shutil.copyfileobj(req, fp) | |
file_size = os.stat(file_output).st_size | |
print_stat(time_s, file_size) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment