Created
December 25, 2010 13:14
-
-
Save amcgregor/754869 to your computer and use it in GitHub Desktop.
A comparison of two methods to canonicalize (to byte strings) a list of 2-tuples containing byte strings and unicode strings.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import unicode_literals, print_function | |
"""Compare two algorithms that canonicalize a list of tuples. | |
Python 2.7.0: | |
Good headers: | |
List creation and iteration: 2.98474693298 | |
List iteration and substitution: 3.3568974336 | |
11.0861445122% penalty | |
Mixed headers: | |
List creation and iteration: 7.56983908017 | |
List iteration and substitution: 3.39938569069 | |
55.0930256947% improvement | |
Bad headers: | |
List creation and iteration: 10.04327027 | |
List iteration and substitution: 3.36879269282 | |
66.4572136142% improvement | |
Python 3.1.3: | |
Good headers: | |
List creation and iteration: 3.96421766281 | |
List iteration and substitution: 4.36530995369 | |
9.18817438242% penalty | |
Mixed headers: | |
List creation and iteration: 11.3370667299 | |
List iteration and substitution: 4.5164826711 | |
60.1618057058% improvement | |
Bad headers: | |
List creation and iteration: 16.6681366762 | |
List iteration and substitution: 4.34285736084 | |
73.9451538872% improvement | |
""" | |
import sys | |
# Python 3.x compatibility. | |
if sys.version_info >= (3, 0): | |
unicode = str | |
else: | |
range = xrange | |
headers_good = [ | |
(b'Content-Type', b"text/plain"), | |
(b'Content-Length', b'27'), | |
(b'Content-MD5', b"1234567890123456789012") | |
] | |
headers_mixed = [ | |
(b'Content-Type', "text/plain"), | |
(b'Content-Length', b'27'), | |
('Content-MD5', "1234567890123456789012") | |
] | |
headers_bad = [ | |
('Content-Type', "text/plain"), | |
('Content-Length', '27'), | |
('Content-MD5', "1234567890123456789012") | |
] | |
def foo(headers): | |
headers_ = list() | |
for name, value in headers: | |
if not isinstance(name, unicode) and not isinstance(value, unicode): | |
continue | |
if isinstance(name, unicode): | |
name = name.encode('iso-8859-1') | |
if isinstance(value, unicode): | |
value = value.encode('iso-8859-1') | |
headers_.append((name, value)) | |
return headers_ | |
def bar(headers): | |
for i in range(len(headers)): | |
name, value = headers[i] | |
if not isinstance(name, unicode) and not isinstance(value, unicode): | |
continue | |
if isinstance(name, unicode): | |
name = name.encode('iso-8859-1') | |
if isinstance(value, unicode): | |
value = value.encode('iso-8859-1') | |
headers[i] = (name, value) | |
return headers | |
def avgresult(c, count=3.0): | |
return sum([c() for i in range(3)]) / count | |
def result(t1, t2): | |
print(abs(1.0 - (t1 if t2 > t1 else t2) / (t2 if t2 > t1 else t1)) * 100, '%', " improvement" if 1.0 - t1 / t2 < 0 else " penalty", sep='') | |
if __name__ == '__main__': | |
from timeit import Timer | |
print("Running each test three times and averaging the result;\nplease wait, this could take some time.\n\nGood headers:") | |
t1 = avgresult(lambda: Timer("foo(headers_good)", "from __main__ import foo, headers_good").timeit()) | |
print("List creation and iteration:", t1) | |
t2 = avgresult(lambda: Timer("bar(headers_good)", "from __main__ import bar, headers_good").timeit()) | |
print("List iteration and substitution:", t2) | |
result(t1, t2) | |
print("\nMixed headers:") | |
t1 = avgresult(lambda: Timer("foo(headers_mixed)", "from __main__ import foo, headers_mixed").timeit()) | |
print("List creation and iteration:", t1) | |
t2 = avgresult(lambda: Timer("bar(headers_mixed)", "from __main__ import bar, headers_mixed").timeit()) | |
print("List iteration and substitution:", t2) | |
result(t1, t2) | |
print("\nBad headers:") | |
t1 = avgresult(lambda: Timer("foo(headers_bad)", "from __main__ import foo, headers_bad").timeit()) | |
print("List creation and iteration:", t1) | |
t2 = avgresult(lambda: Timer("bar(headers_bad)", "from __main__ import bar, headers_bad").timeit()) | |
print("List iteration and substitution:", t2) | |
result(t1, t2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment