Created
January 23, 2009 16:49
-
-
Save zed/51074 to your computer and use it in GitHub Desktop.
Generate performance plot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
*.db | |
*.xy | |
*.png | |
*.pyc | |
*.pyo |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
http://stackoverflow.com/questions/489999/python-convert-list-of-ints-to-one-number#490031 | |
""" | |
import random | |
from itertools import imap | |
FUNS = [] | |
def test_fun(fun): | |
FUNS.append(fun) | |
return fun | |
@test_fun | |
def _map(nums): | |
return int(''.join(map(str, nums))) | |
@test_fun | |
def _imap(nums): | |
return int(''.join(imap(str, nums))) | |
@test_fun | |
def _interpolation(nums): | |
return int(''.join('%d' % num for num in nums)) | |
@test_fun | |
def _genexp(nums): | |
return int(''.join(str(num) for num in nums)) | |
@test_fun | |
def _sum(nums): | |
return sum(digit * 10 ** (len(nums) - 1 - i) | |
for i, digit in enumerate(nums)) | |
@test_fun | |
def _reduce(nums): | |
return int(reduce(lambda x, y: x + str(y), nums, '')) | |
@test_fun | |
def _builtins(nums): | |
return int(filter(str.isdigit, repr(nums))) | |
@test_fun | |
def _accumulator(nums): | |
tot = 0 | |
for num in nums: | |
tot *= 10 | |
tot += num | |
return tot | |
def randrange1_10(digit_count): | |
return [random.randrange(1, 10) for i in xrange(digit_count)] | |
if __name__ == '__main__': | |
from subprocess import check_call | |
import sys | |
assert_results = [fun([1, 2, 3]) for fun in FUNS] | |
assert all(result == 123 for result in assert_results) | |
check_call(["python", "make-figures.py"] + | |
["--sort-function=cdleary." + f.__name__ for f in FUNS] + | |
["--sequence-creator=cdleary.randrange1_10"] + | |
sys.argv[1:]) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""Generate performance graphs. See | |
http://stackoverflow.com/questions/464960/code-golf-combining-multiple-sorted-lists-into-a-single-sorted-list#464967 | |
""" | |
from __future__ import with_statement | |
from optparse import OptionParser, make_option | |
import collections | |
import copy | |
import glob | |
import itertools | |
import math | |
import operator | |
import os | |
import pickle | |
import re | |
import sys | |
import tempfile | |
import timeit | |
# numpy, matplotlib used only for plotting | |
import numpy | |
import pylab | |
from merge_funcs import (merge_26, sort_builtin, merge_alabaster, sort_deestan, | |
make_sequence_creator, sorted_random, ones) | |
def eval_dottedname(dottedname): | |
""" | |
>>> eval_dottedname("os.path.join") #doctest: +ELLIPSIS | |
<function join at 0x...> | |
>>> eval_dottedname("merge_funcs.sort_builtin") #doctest: +ELLIPSIS | |
<function sort_builtin at 0x...> | |
>>> eval_dottedname("merge_funcs") #doctest: +ELLIPSIS | |
<module 'merge_funcs' from '...'> | |
""" | |
return reduce(getattr, dottedname.split(".")[1:], __import__(dottedname.partition(".")[0])) | |
def usec(function, arguments, readonly_args=False): | |
""" | |
Given `function` and `arguments` return a time (in microseconds) it takes to make the call. | |
Note: There is an overhead in executing a function that does nothing. | |
http://www.rosettacode.org/wiki/Query_Performance#Python | |
>>> 5 > usec(nothing, [], True) > 0 | |
True | |
>>> 5 > usec(identity, [1], True) > 0 | |
True | |
""" | |
funcname = function.__name__ | |
fd, pickle_fn = tempfile.mkstemp(suffix='.pickle', prefix='usec', text=True) | |
pickle.dump((function, arguments), os.fdopen(fd, 'w')) | |
del function | |
del arguments # free memory | |
timer = timeit.Timer(stmt='%(funcname)s(*args)' % vars(), | |
setup='import pickle; %(funcname)s, args = pickle.load(open(%(pickle_fn)r))' % vars()) | |
try: | |
try: | |
seconds, N = 0, 1 | |
while seconds < 0.2: | |
seconds = min(timer.repeat(repeat=3, number=N)) | |
N *= 10 | |
if not readonly_args: | |
break | |
microseconds = round(1000000 * seconds / (N / 10), 1) # per loop | |
return microseconds | |
except RuntimeError, err: | |
#XXX for `merge_alabaster` | |
if "maximum recursion depth exceeded" in err.message: | |
print >> sys.stderr, err.message | |
return 0 | |
else: | |
raise | |
except: | |
timer.print_exc(file=sys.stderr) | |
raise | |
def nothing(*args): pass | |
def identity(x): return x | |
def writedat(filename, x, y, xprecision=3, yprecision=5): | |
""" | |
Write two equal-sized numerical arrays `x' and `y' to a two-column text file named `filename'. | |
The first column of the file contains values from an `x'-array | |
with a given `xprecision', the second -- values from `y'-array | |
with `yprecision'. | |
http://www.rosettacode.org/wiki/Write_float_arrays_to_a_text_file#Python | |
Example usage | |
>>> import math | |
>>> x = [1, 2, 3, 1e11] | |
>>> y = map(math.sqrt, x) | |
>>> y | |
[1.0, 1.4142135623730951, 1.7320508075688772, 316227.76601683791] | |
>>> writedat('sqrt.dat', x, y) | |
written 'sqrt.dat' | |
>>> # check | |
... | |
>>> for line in open('sqrt.dat'): #doctest:+NORMALIZE_WHITESPACE | |
... print line | |
... | |
1 1 | |
2 1.4142 | |
3 1.7321 | |
1e+011 3.1623e+005 | |
>>> import os | |
>>> os.remove('sqrt.dat') | |
""" | |
with open(filename,'w') as f: | |
for a, b in itertools.izip(x, y): | |
print >> f, "%.*g\t%.*g" % (xprecision, a, yprecision, b) | |
print "written '%s'" % filename | |
def write_timings(npoints, maxN, sort_functions, sequence_creators, prefix=''): | |
""" | |
http://www.rosettacode.org/wiki/Measure_relative_performance_of_sorting_algorithms_implementations#Write_timings | |
""" | |
maxexp = math.log(maxN, 2) | |
step = (maxexp - 1) / npoints | |
Ns = [2**e for e in range(1, int(maxexp + 1.5), int(step + .5) or 1)] | |
for sort in sort_functions: | |
for make_seq in sequence_creators: | |
L = make_seq(10) | |
Lcopy = copy.deepcopy(L) | |
assert L == Lcopy | |
_ = sort(L) | |
_ = sort(L) | |
Ts = map(lambda n: usec(sort, (make_seq(n),), readonly_args=(L == Lcopy)), Ns) | |
writedat('%s%s-%s-%d-%d.xy' % (prefix, sort.__name__, make_seq.__name__, | |
len(Ns), max(Ns)), Ns, Ts) | |
def plotdd(dictplotdict,prefix=''): | |
"""See ``plot_timings()`` below.""" | |
symbols = list('o^v<>s+xDd1234hHp|_') | |
colors = list('bgrcmyk') # split string on distinct characters | |
for npoints, plotdict in dictplotdict.iteritems(): | |
for ttle, lst in plotdict.iteritems(): | |
pylab.hold(False) | |
for i, (label, polynom, x, y) in enumerate(sorted(lst,key=operator.itemgetter(0))): | |
# dots | |
pylab.plot(x, y, colors[i % len(colors)] + symbols[i % len(symbols)], | |
# legend | |
label='%.2g %s + %.2g %s' % (polynom[1], polynom.variable, | |
polynom[0], label)) | |
pylab.hold(True) # plot on the same graph | |
y = numpy.polyval(polynom, x) | |
print polynom, label | |
# lines | |
pylab.plot(x, y, colors[i % len(colors)], label= '_nolegend_') | |
pylab.legend(loc='upper left') | |
pylab.xlabel(polynom.variable) | |
pylab.ylabel('log2( time in microseconds )') | |
pylab.title(ttle, verticalalignment='bottom') | |
figname = '%(prefix)s_%(npoints)03d%(ttle)s' % vars() | |
pylab.savefig(figname+'.png') | |
pylab.savefig(figname+'.pdf') | |
print figname | |
def plot_timings(prefix=''): | |
""" | |
http://www.rosettacode.org/wiki/Measure_relative_performance_of_sorting_algorithms_implementations#Plot_timings | |
""" | |
makedict = lambda: collections.defaultdict(lambda: collections.defaultdict(list)) | |
df = makedict() | |
ds = makedict() | |
# populate plot dictionaries | |
for filename in glob.glob('%s*.xy' % prefix): | |
m = re.match(r'%s([^-]+)-([^-]+)-(\d+)-(\d+)\.xy' % prefix, filename) | |
print filename | |
assert m, filename | |
funcname, seqname, npoints, maxN = m.groups() | |
npoints, maxN = int(npoints), int(maxN) | |
Ns, Ts = numpy.loadtxt(filename, dtype='f', unpack=True) | |
# Ns -- sequences lengths | |
# Ts -- corresponding times | |
assert len(Ns) == len(Ts) == npoints | |
# find approximating polynom in a form log(y) = a*log(x) + b | |
logsafe = numpy.logical_and(Ns>0, Ts>0) | |
Ts = numpy.log2(Ts[logsafe]) | |
Ns = numpy.log2(Ns[logsafe]) | |
coeffs = numpy.polyfit(Ns, Ts, deg=1) | |
poly = numpy.poly1d(coeffs, variable='log2(N)') | |
# | |
df[npoints][funcname].append((seqname, poly, Ns, Ts)) | |
ds[npoints][seqname].append((funcname, poly, Ns, Ts)) | |
# actual plotting | |
plotdd(df, prefix) | |
plotdd(ds, prefix) # see ``plotdd()`` above | |
def dottednames2funcs(dottednames): | |
return [eval_dottedname(f if "." in f else "%s.%s" % (__name__, f)) | |
for f in dottednames] | |
def main(args=None): | |
""" Based on: | |
http://www.rosettacode.org/wiki/Measure_relative_performance_of_sorting_algorithms_implementations#Figures:_log2.28_time_in_microseconds_.29_vs._log2.28_sequence_length_.29 | |
""" | |
# process args | |
if args is None: | |
args = sys.argv[1:] | |
default_sort_functions = "merge_26 sort_builtin merge_funcs.merge_alabaster".split() | |
p = OptionParser(description=__doc__, option_list=[ | |
make_option("--plot-only", action="store_true", | |
help="don't recompute *.xy files, just plot them"), | |
make_option("-p", "--prefix", | |
help="a prefix for *.xy files (default: '%default')"), | |
make_option("--npoints", type="int", | |
help="desirable number of sample point on the graph"), | |
make_option("--nsublists", type="int", | |
help="number of sublists (default: %default)"), | |
make_option("--maxn", type="int", | |
help="maximum size of sublists"), | |
make_option("-s", "--sort-function", action="append", | |
help=("a sort function to plot, to add more functions to comparison" | |
" specify the option multiple times" | |
" (default: %s)" % default_sort_functions)), | |
make_option("--sequence-creator", action="append", | |
help=("a sequence creator function accepts N return a sequence" | |
"(default: special case to test sorting multiple sorted lists)")), | |
]) | |
p.set_defaults(plot_only=False, prefix='', | |
npoints=100, nsublists=10, maxn=2**10) | |
options, args = p.parse_args(args) | |
if args: | |
p.error("script does not accept arguments, but given %r" % args) | |
# measure performance and write acquired data to *.xy files | |
M = options.nsublists # number of sublists | |
if not options.sequence_creator: | |
sequence_creators = [make_sequence_creator(f, M) | |
for f in (sorted_random(2**31-1), ones, range)] | |
else: | |
sequence_creators = dottednames2funcs(options.sequence_creator) | |
if not options.sort_function: | |
options.sort_function = default_sort_functions | |
sort_functions = dottednames2funcs(options.sort_function) | |
if not options.plot_only: | |
write_timings(npoints=options.npoints, maxN=options.maxn, | |
sort_functions=sort_functions, | |
sequence_creators=sequence_creators, | |
prefix=options.prefix) | |
# plot *.xy files | |
plot_timings(options.prefix) | |
if __name__ == '__main__': | |
import doctest; doctest.testmod() | |
sys.exit(main(sys.argv[1:])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
http://stackoverflow.com/questions/464960/code-golf-combining-multiple-sorted-lists-into-a-single-sorted-list#464967 | |
""" | |
from heapq import heappop, heapreplace, heapify | |
import copy | |
import random | |
def merge_26(lists): | |
""" | |
See `heapq.merge()` | |
http://svn.python.org/view/python/trunk/Lib/heapq.py?view=markup | |
>>> merge_26([[1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]]) | |
[0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25] | |
""" | |
_heappop, _heapreplace, _StopIteration = heappop, heapreplace, StopIteration | |
h = [] | |
h_append = h.append | |
for itnum, it in enumerate(map(iter, lists)): | |
try: | |
next = it.next | |
h_append([next(), itnum, next]) | |
except _StopIteration: | |
pass | |
heapify(h) | |
result = [] | |
r_append = result.append | |
while 1: | |
try: | |
while 1: | |
v, itnum, next = s = h[0] # raises IndexError when h is empty | |
r_append(v) | |
s[0] = next() # raises StopIteration when exhausted | |
_heapreplace(h, s) # restore heap condition | |
except _StopIteration: | |
_heappop(h) # remove empty iterator | |
except IndexError: | |
return result | |
def sort_builtin(lists): | |
L = sum(lists, []) | |
L.sort() | |
return L | |
def m(c,l): | |
""" | |
http://stackoverflow.com/questions/464960/code-golf-combining-multiple-sorted-lists-into-a-single-sorted-list#464967 | |
>>> m([], [[1,4], [2,6], [3,5]]) | |
[1, 2, 3, 4, 5, 6] | |
>>> L = [[1]*10 for _ in range(100)] | |
>>> RL = 1007 | |
>>> import sys | |
>>> rl = sys.getrecursionlimit() | |
>>> if rl < RL: | |
... len(m([], copy.deepcopy(L))) | |
Traceback (most recent call last): | |
... | |
RuntimeError: maximum recursion depth exceeded | |
>>> sys.setrecursionlimit(RL) | |
>>> len(m([], copy.deepcopy(L))) | |
1000 | |
>>> sys.setrecursionlimit(rl) | |
""" | |
try: | |
c += [l[min((m[0], i) for i,m in enumerate(l) if m)[1]].pop(0)] | |
return m(c,l) | |
except ValueError: | |
return c | |
def merge_alabaster(lists): | |
return m([], lists) | |
def sort_deestan(i): | |
""" | |
http://stackoverflow.com/questions/464960/code-golf-combining-multiple-sorted-lists-into-a-single-sorted-list#465044 | |
""" | |
y=[];x=sum(i,[]) | |
while x:n=min(x);y+=[n];x.remove(n) | |
return y | |
merge_funcs = (merge_26, sort_builtin, merge_alabaster, sort_deestan) | |
def test_mergefuncs(): | |
""" | |
>>> test_data = ( | |
... ([[1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]], | |
... [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]), | |
... ([[1]*10 for _ in range(99)], | |
... [1]*990), | |
... ) | |
>>> ok = True | |
>>> for f in merge_funcs: #doctest:+ELLIPSIS | |
... for input_lists, expected in test_data: | |
... print '-', | |
... r = f(copy.deepcopy(input_lists)) | |
... if r != expected: | |
... print(f.__name__, input_lists, r, expected) | |
... ok = False | |
... for c in sequence_creators: | |
... print ':', | |
... L = c(20) | |
... r = f(copy.deepcopy(L)) | |
... if r != merge_26(L): | |
... print(f.__name__, L, r, merge_26(L)) | |
... ok = False | |
- - : : : - - : : : - - : : : - - : : : | |
>>> if ok: | |
... print("ok") | |
... | |
ok | |
""" | |
pass | |
################################################################################ | |
def make_sequence_creator(func, M): | |
f = lambda n: [func(n) for m in xrange(M)] | |
f.__name__ = func.__name__ | |
return f | |
def ones(n): | |
return [1]*n | |
def sorted_random(max_): | |
def f(n): | |
L = random.sample(xrange(max_), n) | |
L.sort() | |
return L | |
f.__name__ = "sorted_random_%d" % max_ | |
return f | |
M = 10 # number of lists | |
sequence_creators = [make_sequence_creator(f, M) for f in (sorted_random(2**31-1), ones, range)] | |
################################################################################ | |
if __name__ == '__main__': | |
import doctest; doctest.testmod() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment