Skip to content

Instantly share code, notes, and snippets.

@zed
Created January 23, 2009 16:49
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zed/51074 to your computer and use it in GitHub Desktop.
Save zed/51074 to your computer and use it in GitHub Desktop.
Generate performance plot
*.db
*.xy
*.pdf
*.png
*.pyc
*.pyo
"""
http://stackoverflow.com/questions/489999/python-convert-list-of-ints-to-one-number#490031
"""
import random
from itertools import imap
FUNS = []
def test_fun(fun):
FUNS.append(fun)
return fun
@test_fun
def _map(nums):
return int(''.join(map(str, nums)))
@test_fun
def _imap(nums):
return int(''.join(imap(str, nums)))
@test_fun
def _interpolation(nums):
return int(''.join('%d' % num for num in nums))
@test_fun
def _genexp(nums):
return int(''.join(str(num) for num in nums))
@test_fun
def _sum(nums):
return sum(digit * 10 ** (len(nums) - 1 - i)
for i, digit in enumerate(nums))
@test_fun
def _reduce(nums):
return int(reduce(lambda x, y: x + str(y), nums, ''))
@test_fun
def _builtins(nums):
return int(filter(str.isdigit, repr(nums)))
@test_fun
def _accumulator(nums):
tot = 0
for num in nums:
tot *= 10
tot += num
return tot
def randrange1_10(digit_count):
return [random.randrange(1, 10) for i in xrange(digit_count)]
if __name__ == '__main__':
from subprocess import check_call
import sys
assert_results = [fun([1, 2, 3]) for fun in FUNS]
assert all(result == 123 for result in assert_results)
check_call(["python", "make-figures.py"] +
["--sort-function=cdleary." + f.__name__ for f in FUNS] +
["--sequence-creator=cdleary.randrange1_10"] +
sys.argv[1:])
#!/usr/bin/env python
"""Generate performance graphs. See
http://stackoverflow.com/questions/464960/code-golf-combining-multiple-sorted-lists-into-a-single-sorted-list#464967
"""
from __future__ import with_statement
from optparse import OptionParser, make_option
import collections
import copy
import glob
import itertools
import math
import operator
import os
import pickle
import re
import sys
import tempfile
import timeit
# numpy, matplotlib used only for plotting
import numpy
import pylab
from merge_funcs import (merge_26, sort_builtin, merge_alabaster, sort_deestan,
make_sequence_creator, sorted_random, ones)
def eval_dottedname(dottedname):
"""
>>> eval_dottedname("os.path.join") #doctest: +ELLIPSIS
<function join at 0x...>
>>> eval_dottedname("merge_funcs.sort_builtin") #doctest: +ELLIPSIS
<function sort_builtin at 0x...>
>>> eval_dottedname("merge_funcs") #doctest: +ELLIPSIS
<module 'merge_funcs' from '...'>
"""
return reduce(getattr, dottedname.split(".")[1:], __import__(dottedname.partition(".")[0]))
def usec(function, arguments, readonly_args=False):
"""
Given `function` and `arguments` return a time (in microseconds) it takes to make the call.
Note: There is an overhead in executing a function that does nothing.
http://www.rosettacode.org/wiki/Query_Performance#Python
>>> 5 > usec(nothing, [], True) > 0
True
>>> 5 > usec(identity, [1], True) > 0
True
"""
funcname = function.__name__
fd, pickle_fn = tempfile.mkstemp(suffix='.pickle', prefix='usec', text=True)
pickle.dump((function, arguments), os.fdopen(fd, 'w'))
del function
del arguments # free memory
timer = timeit.Timer(stmt='%(funcname)s(*args)' % vars(),
setup='import pickle; %(funcname)s, args = pickle.load(open(%(pickle_fn)r))' % vars())
try:
try:
seconds, N = 0, 1
while seconds < 0.2:
seconds = min(timer.repeat(repeat=3, number=N))
N *= 10
if not readonly_args:
break
microseconds = round(1000000 * seconds / (N / 10), 1) # per loop
return microseconds
except RuntimeError, err:
#XXX for `merge_alabaster`
if "maximum recursion depth exceeded" in err.message:
print >> sys.stderr, err.message
return 0
else:
raise
except:
timer.print_exc(file=sys.stderr)
raise
def nothing(*args): pass
def identity(x): return x
def writedat(filename, x, y, xprecision=3, yprecision=5):
"""
Write two equal-sized numerical arrays `x' and `y' to a two-column text file named `filename'.
The first column of the file contains values from an `x'-array
with a given `xprecision', the second -- values from `y'-array
with `yprecision'.
http://www.rosettacode.org/wiki/Write_float_arrays_to_a_text_file#Python
Example usage
>>> import math
>>> x = [1, 2, 3, 1e11]
>>> y = map(math.sqrt, x)
>>> y
[1.0, 1.4142135623730951, 1.7320508075688772, 316227.76601683791]
>>> writedat('sqrt.dat', x, y)
written 'sqrt.dat'
>>> # check
...
>>> for line in open('sqrt.dat'): #doctest:+NORMALIZE_WHITESPACE
... print line
...
1 1
2 1.4142
3 1.7321
1e+011 3.1623e+005
>>> import os
>>> os.remove('sqrt.dat')
"""
with open(filename,'w') as f:
for a, b in itertools.izip(x, y):
print >> f, "%.*g\t%.*g" % (xprecision, a, yprecision, b)
print "written '%s'" % filename
def write_timings(npoints, maxN, sort_functions, sequence_creators, prefix=''):
"""
http://www.rosettacode.org/wiki/Measure_relative_performance_of_sorting_algorithms_implementations#Write_timings
"""
maxexp = math.log(maxN, 2)
step = (maxexp - 1) / npoints
Ns = [2**e for e in range(1, int(maxexp + 1.5), int(step + .5) or 1)]
for sort in sort_functions:
for make_seq in sequence_creators:
L = make_seq(10)
Lcopy = copy.deepcopy(L)
assert L == Lcopy
_ = sort(L)
_ = sort(L)
Ts = map(lambda n: usec(sort, (make_seq(n),), readonly_args=(L == Lcopy)), Ns)
writedat('%s%s-%s-%d-%d.xy' % (prefix, sort.__name__, make_seq.__name__,
len(Ns), max(Ns)), Ns, Ts)
def plotdd(dictplotdict,prefix=''):
"""See ``plot_timings()`` below."""
symbols = list('o^v<>s+xDd1234hHp|_')
colors = list('bgrcmyk') # split string on distinct characters
for npoints, plotdict in dictplotdict.iteritems():
for ttle, lst in plotdict.iteritems():
pylab.hold(False)
for i, (label, polynom, x, y) in enumerate(sorted(lst,key=operator.itemgetter(0))):
# dots
pylab.plot(x, y, colors[i % len(colors)] + symbols[i % len(symbols)],
# legend
label='%.2g %s + %.2g %s' % (polynom[1], polynom.variable,
polynom[0], label))
pylab.hold(True) # plot on the same graph
y = numpy.polyval(polynom, x)
print polynom, label
# lines
pylab.plot(x, y, colors[i % len(colors)], label= '_nolegend_')
pylab.legend(loc='upper left')
pylab.xlabel(polynom.variable)
pylab.ylabel('log2( time in microseconds )')
pylab.title(ttle, verticalalignment='bottom')
figname = '%(prefix)s_%(npoints)03d%(ttle)s' % vars()
pylab.savefig(figname+'.png')
pylab.savefig(figname+'.pdf')
print figname
def plot_timings(prefix=''):
"""
http://www.rosettacode.org/wiki/Measure_relative_performance_of_sorting_algorithms_implementations#Plot_timings
"""
makedict = lambda: collections.defaultdict(lambda: collections.defaultdict(list))
df = makedict()
ds = makedict()
# populate plot dictionaries
for filename in glob.glob('%s*.xy' % prefix):
m = re.match(r'%s([^-]+)-([^-]+)-(\d+)-(\d+)\.xy' % prefix, filename)
print filename
assert m, filename
funcname, seqname, npoints, maxN = m.groups()
npoints, maxN = int(npoints), int(maxN)
Ns, Ts = numpy.loadtxt(filename, dtype='f', unpack=True)
# Ns -- sequences lengths
# Ts -- corresponding times
assert len(Ns) == len(Ts) == npoints
# find approximating polynom in a form log(y) = a*log(x) + b
logsafe = numpy.logical_and(Ns>0, Ts>0)
Ts = numpy.log2(Ts[logsafe])
Ns = numpy.log2(Ns[logsafe])
coeffs = numpy.polyfit(Ns, Ts, deg=1)
poly = numpy.poly1d(coeffs, variable='log2(N)')
#
df[npoints][funcname].append((seqname, poly, Ns, Ts))
ds[npoints][seqname].append((funcname, poly, Ns, Ts))
# actual plotting
plotdd(df, prefix)
plotdd(ds, prefix) # see ``plotdd()`` above
def dottednames2funcs(dottednames):
return [eval_dottedname(f if "." in f else "%s.%s" % (__name__, f))
for f in dottednames]
def main(args=None):
""" Based on:
http://www.rosettacode.org/wiki/Measure_relative_performance_of_sorting_algorithms_implementations#Figures:_log2.28_time_in_microseconds_.29_vs._log2.28_sequence_length_.29
"""
# process args
if args is None:
args = sys.argv[1:]
default_sort_functions = "merge_26 sort_builtin merge_funcs.merge_alabaster".split()
p = OptionParser(description=__doc__, option_list=[
make_option("--plot-only", action="store_true",
help="don't recompute *.xy files, just plot them"),
make_option("-p", "--prefix",
help="a prefix for *.xy files (default: '%default')"),
make_option("--npoints", type="int",
help="desirable number of sample point on the graph"),
make_option("--nsublists", type="int",
help="number of sublists (default: %default)"),
make_option("--maxn", type="int",
help="maximum size of sublists"),
make_option("-s", "--sort-function", action="append",
help=("a sort function to plot, to add more functions to comparison"
" specify the option multiple times"
" (default: %s)" % default_sort_functions)),
make_option("--sequence-creator", action="append",
help=("a sequence creator function accepts N return a sequence"
"(default: special case to test sorting multiple sorted lists)")),
])
p.set_defaults(plot_only=False, prefix='',
npoints=100, nsublists=10, maxn=2**10)
options, args = p.parse_args(args)
if args:
p.error("script does not accept arguments, but given %r" % args)
# measure performance and write acquired data to *.xy files
M = options.nsublists # number of sublists
if not options.sequence_creator:
sequence_creators = [make_sequence_creator(f, M)
for f in (sorted_random(2**31-1), ones, range)]
else:
sequence_creators = dottednames2funcs(options.sequence_creator)
if not options.sort_function:
options.sort_function = default_sort_functions
sort_functions = dottednames2funcs(options.sort_function)
if not options.plot_only:
write_timings(npoints=options.npoints, maxN=options.maxn,
sort_functions=sort_functions,
sequence_creators=sequence_creators,
prefix=options.prefix)
# plot *.xy files
plot_timings(options.prefix)
if __name__ == '__main__':
import doctest; doctest.testmod()
sys.exit(main(sys.argv[1:]))
#!/usr/bin/env python
"""
http://stackoverflow.com/questions/464960/code-golf-combining-multiple-sorted-lists-into-a-single-sorted-list#464967
"""
from heapq import heappop, heapreplace, heapify
import copy
import random
def merge_26(lists):
"""
See `heapq.merge()`
http://svn.python.org/view/python/trunk/Lib/heapq.py?view=markup
>>> merge_26([[1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]])
[0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
"""
_heappop, _heapreplace, _StopIteration = heappop, heapreplace, StopIteration
h = []
h_append = h.append
for itnum, it in enumerate(map(iter, lists)):
try:
next = it.next
h_append([next(), itnum, next])
except _StopIteration:
pass
heapify(h)
result = []
r_append = result.append
while 1:
try:
while 1:
v, itnum, next = s = h[0] # raises IndexError when h is empty
r_append(v)
s[0] = next() # raises StopIteration when exhausted
_heapreplace(h, s) # restore heap condition
except _StopIteration:
_heappop(h) # remove empty iterator
except IndexError:
return result
def sort_builtin(lists):
L = sum(lists, [])
L.sort()
return L
def m(c,l):
"""
http://stackoverflow.com/questions/464960/code-golf-combining-multiple-sorted-lists-into-a-single-sorted-list#464967
>>> m([], [[1,4], [2,6], [3,5]])
[1, 2, 3, 4, 5, 6]
>>> L = [[1]*10 for _ in range(100)]
>>> RL = 1007
>>> import sys
>>> rl = sys.getrecursionlimit()
>>> if rl < RL:
... len(m([], copy.deepcopy(L)))
Traceback (most recent call last):
...
RuntimeError: maximum recursion depth exceeded
>>> sys.setrecursionlimit(RL)
>>> len(m([], copy.deepcopy(L)))
1000
>>> sys.setrecursionlimit(rl)
"""
try:
c += [l[min((m[0], i) for i,m in enumerate(l) if m)[1]].pop(0)]
return m(c,l)
except ValueError:
return c
def merge_alabaster(lists):
return m([], lists)
def sort_deestan(i):
"""
http://stackoverflow.com/questions/464960/code-golf-combining-multiple-sorted-lists-into-a-single-sorted-list#465044
"""
y=[];x=sum(i,[])
while x:n=min(x);y+=[n];x.remove(n)
return y
merge_funcs = (merge_26, sort_builtin, merge_alabaster, sort_deestan)
def test_mergefuncs():
"""
>>> test_data = (
... ([[1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]],
... [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]),
... ([[1]*10 for _ in range(99)],
... [1]*990),
... )
>>> ok = True
>>> for f in merge_funcs: #doctest:+ELLIPSIS
... for input_lists, expected in test_data:
... print '-',
... r = f(copy.deepcopy(input_lists))
... if r != expected:
... print(f.__name__, input_lists, r, expected)
... ok = False
... for c in sequence_creators:
... print ':',
... L = c(20)
... r = f(copy.deepcopy(L))
... if r != merge_26(L):
... print(f.__name__, L, r, merge_26(L))
... ok = False
- - : : : - - : : : - - : : : - - : : :
>>> if ok:
... print("ok")
...
ok
"""
pass
################################################################################
def make_sequence_creator(func, M):
f = lambda n: [func(n) for m in xrange(M)]
f.__name__ = func.__name__
return f
def ones(n):
return [1]*n
def sorted_random(max_):
def f(n):
L = random.sample(xrange(max_), n)
L.sort()
return L
f.__name__ = "sorted_random_%d" % max_
return f
M = 10 # number of lists
sequence_creators = [make_sequence_creator(f, M) for f in (sorted_random(2**31-1), ones, range)]
################################################################################
if __name__ == '__main__':
import doctest; doctest.testmod()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment