Skip to content

Instantly share code, notes, and snippets.

@zed
Last active April 1, 2022 12:49
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zed/235404 to your computer and use it in GitHub Desktop.
Save zed/235404 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2
"""Generate performance graphs. See
http://stackoverflow.com/questions/464960/code-golf-combining-multiple-sorted-lists-into-a-single-sorted-list#464967
"""
from __future__ import with_statement
from optparse import OptionParser, make_option
import collections
import copy
import glob
import itertools
import math
import operator
import os
import pickle
import re
import sys
import tempfile
import timeit
import warnings
# numpy, matplotlib used only for plotting
try:
import numpy
import pylab
except ImportError:
warnings.warn("can't import numpy/pylab")
try:
from merge_funcs import (merge_26, sort_builtin, merge_alabaster,
sort_deestan)
except ImportError:
pass
__version__ = "1.0.0"
def make_sequence_creator(func, M):
def f(n): return [func(n) for m in xrange(M)]
f.__name__ = func.__name__
return f
def ones(n):
return [1] * n
def sorted_random(max_):
def f(n):
L = random.sample(xrange(max_), n)
# XXX L.sort()
return L
f.__name__ = "sorted_random_%d" % max_
return f
def eval_dottedname(dottedname):
"""
>>> eval_dottedname("os.path.join") #doctest: +ELLIPSIS
<function join at 0x...>
>>> eval_dottedname("sys.exit") #doctest: +ELLIPSIS
<built-in function exit>
>>> eval_dottedname("sys") #doctest: +ELLIPSIS
<module 'sys' (built-in)>
"""
return reduce(getattr, dottedname.split(".")[1:], __import__(dottedname.partition(".")[0]))
def usec(function, arguments, readonly_args=False):
"""
Given `function` and `arguments` return a time (in microseconds) it takes to make the call.
Note: There is an overhead in executing a function that does nothing.
http://www.rosettacode.org/wiki/Query_Performance#Python
>>> 5 > usec(nothing, [], True) > 0
True
>>> 5 > usec(identity, [1], True) > 0
True
"""
funcname = function.__name__
fd, pickle_fn = tempfile.mkstemp(
suffix='.pickle', prefix='usec', text=True)
pickle.dump((function, arguments), os.fdopen(fd, 'w'))
del function
del arguments # free memory
timer = timeit.Timer(stmt='%(funcname)s(*args)' % vars(),
setup='import pickle; %(funcname)s, args = pickle.load(open(%(pickle_fn)r))' % vars())
try:
try:
seconds, N = 0, 1
while seconds < 0.2:
seconds = min(timer.repeat(repeat=3, number=N))
N *= 10
if not readonly_args:
break
microseconds = round(1000000 * seconds / (N / 10), 1) # per loop
return microseconds
except RuntimeError, err:
# XXX for `merge_alabaster`
if "maximum recursion depth exceeded" in err.message:
print >> sys.stderr, err.message
return 0
else:
raise
except:
timer.print_exc(file=sys.stderr)
raise
def nothing(*args): pass
def identity(x): return x
def writedat(filename, x, y, xprecision=3, yprecision=5):
r"""
Write two equal-sized numerical arrays `x' and `y' to a two-column text file named `filename'.
The first column of the file contains values from an `x'-array
with a given `xprecision', the second -- values from `y'-array
with `yprecision'.
http://www.rosettacode.org/wiki/Write_float_arrays_to_a_text_file#Python
Example usage
>>> import math
>>> x = [1, 2, 3, 1e11]
>>> y = map(math.sqrt, x)
>>> y
[1.0, 1.4142135623730951, 1.7320508075688772, 316227.76601683791]
>>> writedat('sqrt.dat', x, y)
written 'sqrt.dat'
>>> # check
...
>>> open('sqrt.dat').read()
'1\t1\n2\t1.4142\n3\t1.7321\n1e+11\t3.1623e+05\n'
>>> import os
>>> os.remove('sqrt.dat')
"""
with open(filename, 'w') as f:
for a, b in itertools.izip(x, y):
print >> f, "%.*g\t%.*g" % (xprecision, a, yprecision, b)
print "written '%s'" % filename
def write_timings(npoints, maxN, sort_functions, sequence_creators, prefix=''):
"""
http://www.rosettacode.org/wiki/Measure_relative_performance_of_sorting_algorithms_implementations#Write_timings
"""
maxexp = math.log(maxN, 2)
step = (maxexp - 1) / npoints
Ns = [2**e for e in range(1, int(maxexp + 1.5), int(step + .5) or 1)]
for sort in sort_functions:
for make_seq in sequence_creators:
L = make_seq(10)
Lcopy = copy.deepcopy(L)
assert L == Lcopy
_ = sort(L)
_ = sort(L)
Ts = map(lambda n: usec(sort, (make_seq(n),),
readonly_args=(L == Lcopy)), Ns)
writedat('%s%s-%s-%d-%d.xy' % (prefix, sort.__name__, make_seq.__name__,
len(Ns), max(Ns)), Ns, Ts)
def plotdd(dictplotdict, prefix=''):
"""See ``plot_timings()`` below."""
symbols = list('o^v<>s+xDd1234hHp|_')
colors = list('bgrcmyk') # split string on distinct characters
for npoints, plotdict in dictplotdict.iteritems():
for ttle, lst in plotdict.iteritems():
pylab.hold(False)
for i, (label, polynom, x, y) in enumerate(sorted(lst, key=operator.itemgetter(0))):
# dots
pylab.plot(x, y, colors[i % len(colors)] + symbols[i % len(symbols)],
# legend
label='%.2g %s + %.2g %s' % (polynom[1], polynom.variable,
polynom[0], label))
pylab.hold(True) # plot on the same graph
y = numpy.polyval(polynom, x)
print polynom, label
# lines
pylab.plot(x, y, colors[i % len(colors)], label='_nolegend_')
pylab.legend(loc='upper left')
pylab.xlabel(polynom.variable)
pylab.ylabel('log2( time in microseconds )')
pylab.title(ttle, verticalalignment='bottom')
figname = '%(prefix)s_%(npoints)03d%(ttle)s' % vars()
pylab.savefig(figname + '.png')
pylab.savefig(figname + '.pdf')
print figname
def plot_timings(prefix=''):
"""
http://www.rosettacode.org/wiki/Measure_relative_performance_of_sorting_algorithms_implementations#Plot_timings
"""
def makedict(): return collections.defaultdict(
lambda: collections.defaultdict(list))
df = makedict()
ds = makedict()
# populate plot dictionaries
for filename in glob.glob('%s*.xy' % prefix):
m = re.match(r'%s([^-]+)-([^-]+)-(\d+)-(\d+)\.xy' % prefix, filename)
print filename
assert m, filename
funcname, seqname, npoints, maxN = m.groups()
npoints, maxN = int(npoints), int(maxN)
Ns, Ts = numpy.loadtxt(filename, dtype='f', unpack=True)
# Ns -- sequences lengths
# Ts -- corresponding times
assert len(Ns) == len(Ts) == npoints
# find approximating polynom in a form log(y) = a*log(x) + b
logsafe = numpy.logical_and(Ns > 0, Ts > 0)
Ts = numpy.log2(Ts[logsafe])
Ns = numpy.log2(Ns[logsafe])
coeffs = numpy.polyfit(Ns, Ts, deg=1)
poly = numpy.poly1d(coeffs, variable='log2(N)')
#
df[npoints][funcname].append((seqname, poly, Ns, Ts))
ds[npoints][seqname].append((funcname, poly, Ns, Ts))
# actual plotting
plotdd(df, prefix)
plotdd(ds, prefix) # see ``plotdd()`` above
def dottednames2funcs(dottednames):
return [eval_dottedname(f if "." in f else "%s.%s" % (__name__, f))
for f in dottednames]
def main(args=None):
""" Based on:
http://www.rosettacode.org/wiki/Measure_relative_performance_of_sorting_algorithms_implementations#Figures:_log2.28_time_in_microseconds_.29_vs._log2.28_sequence_length_.29
"""
# process args
if args is None:
args = sys.argv[1:]
default_sort_functions = "merge_26 sort_builtin merge_funcs.merge_alabaster".split()
p = OptionParser(description=__doc__, option_list=[
make_option("--plot-only", action="store_true",
help="don't recompute *.xy files, just plot them"),
make_option("-p", "--prefix",
help="a prefix for *.xy files (default: '%default')"),
make_option("--npoints", type="int",
help="desirable number of sample point on the graph"),
make_option("--nsublists", type="int",
help="number of sublists (default: %default)"),
make_option("--maxn", type="int",
help="maximum size of sublists"),
make_option("-s", "--sort-function", action="append",
help=("a sort function to plot, to add more functions to comparison"
" specify the option multiple times"
" (default: %s)" % default_sort_functions)),
make_option("--sequence-creator", action="append",
help=("a sequence creator function accepts N return a sequence"
"(default: special case to test sorting multiple sorted lists)")),
])
p.set_defaults(plot_only=False, prefix='',
npoints=100, nsublists=10, maxn=2**10)
options, args = p.parse_args(args)
if args:
p.error("script does not accept arguments, but given %r" % args)
if not options.plot_only:
# measure performance and write acquired data to *.xy files
M = options.nsublists # number of sublists
if not options.sequence_creator:
sequence_creators = [make_sequence_creator(f, M)
for f in (sorted_random(2**31 - 1), ones, range)]
else:
sequence_creators = dottednames2funcs(options.sequence_creator)
if not options.sort_function:
options.sort_function = default_sort_functions
sort_functions = dottednames2funcs(options.sort_function)
write_timings(npoints=options.npoints, maxN=options.maxn,
sort_functions=sort_functions,
sequence_creators=sequence_creators,
prefix=options.prefix)
# plot *.xy files
plot_timings(options.prefix)
if __name__ == '__main__':
if '--test' in sys.argv:
import doctest
doctest.testmod()
sys.argv.remove('--test')
sys.exit(main(sys.argv[1:]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment