Skip to content

Instantly share code, notes, and snippets.

@amcgregor
Created December 25, 2010 13:14
Show Gist options
  • Save amcgregor/754869 to your computer and use it in GitHub Desktop.
Save amcgregor/754869 to your computer and use it in GitHub Desktop.
A comparison of two methods to canonicalize (to byte strings) a list of 2-tuples containing byte strings and unicode strings.
#!/usr/bin/env python
from __future__ import unicode_literals, print_function
"""Compare two algorithms that canonicalize a list of tuples.
Python 2.7.0:
Good headers:
List creation and iteration: 2.98474693298
List iteration and substitution: 3.3568974336
11.0861445122% penalty
Mixed headers:
List creation and iteration: 7.56983908017
List iteration and substitution: 3.39938569069
55.0930256947% improvement
Bad headers:
List creation and iteration: 10.04327027
List iteration and substitution: 3.36879269282
66.4572136142% improvement
Python 3.1.3:
Good headers:
List creation and iteration: 3.96421766281
List iteration and substitution: 4.36530995369
9.18817438242% penalty
Mixed headers:
List creation and iteration: 11.3370667299
List iteration and substitution: 4.5164826711
60.1618057058% improvement
Bad headers:
List creation and iteration: 16.6681366762
List iteration and substitution: 4.34285736084
73.9451538872% improvement
"""
import sys
# Python 3.x compatibility.
if sys.version_info >= (3, 0):
unicode = str
else:
range = xrange
headers_good = [
(b'Content-Type', b"text/plain"),
(b'Content-Length', b'27'),
(b'Content-MD5', b"1234567890123456789012")
]
headers_mixed = [
(b'Content-Type', "text/plain"),
(b'Content-Length', b'27'),
('Content-MD5', "1234567890123456789012")
]
headers_bad = [
('Content-Type', "text/plain"),
('Content-Length', '27'),
('Content-MD5', "1234567890123456789012")
]
def foo(headers):
headers_ = list()
for name, value in headers:
if not isinstance(name, unicode) and not isinstance(value, unicode):
continue
if isinstance(name, unicode):
name = name.encode('iso-8859-1')
if isinstance(value, unicode):
value = value.encode('iso-8859-1')
headers_.append((name, value))
return headers_
def bar(headers):
for i in range(len(headers)):
name, value = headers[i]
if not isinstance(name, unicode) and not isinstance(value, unicode):
continue
if isinstance(name, unicode):
name = name.encode('iso-8859-1')
if isinstance(value, unicode):
value = value.encode('iso-8859-1')
headers[i] = (name, value)
return headers
def avgresult(c, count=3.0):
return sum([c() for i in range(3)]) / count
def result(t1, t2):
print(abs(1.0 - (t1 if t2 > t1 else t2) / (t2 if t2 > t1 else t1)) * 100, '%', " improvement" if 1.0 - t1 / t2 < 0 else " penalty", sep='')
if __name__ == '__main__':
from timeit import Timer
print("Running each test three times and averaging the result;\nplease wait, this could take some time.\n\nGood headers:")
t1 = avgresult(lambda: Timer("foo(headers_good)", "from __main__ import foo, headers_good").timeit())
print("List creation and iteration:", t1)
t2 = avgresult(lambda: Timer("bar(headers_good)", "from __main__ import bar, headers_good").timeit())
print("List iteration and substitution:", t2)
result(t1, t2)
print("\nMixed headers:")
t1 = avgresult(lambda: Timer("foo(headers_mixed)", "from __main__ import foo, headers_mixed").timeit())
print("List creation and iteration:", t1)
t2 = avgresult(lambda: Timer("bar(headers_mixed)", "from __main__ import bar, headers_mixed").timeit())
print("List iteration and substitution:", t2)
result(t1, t2)
print("\nBad headers:")
t1 = avgresult(lambda: Timer("foo(headers_bad)", "from __main__ import foo, headers_bad").timeit())
print("List creation and iteration:", t1)
t2 = avgresult(lambda: Timer("bar(headers_bad)", "from __main__ import bar, headers_bad").timeit())
print("List iteration and substitution:", t2)
result(t1, t2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment