Last active
April 1, 2022 12:49
-
-
Save zed/235404 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
"""Generate performance graphs. See | |
http://stackoverflow.com/questions/464960/code-golf-combining-multiple-sorted-lists-into-a-single-sorted-list#464967 | |
""" | |
from __future__ import with_statement | |
from optparse import OptionParser, make_option | |
import collections | |
import copy | |
import glob | |
import itertools | |
import math | |
import operator | |
import os | |
import pickle | |
import re | |
import sys | |
import tempfile | |
import timeit | |
import warnings | |
# numpy, matplotlib used only for plotting | |
try: | |
import numpy | |
import pylab | |
except ImportError: | |
warnings.warn("can't import numpy/pylab") | |
try: | |
from merge_funcs import (merge_26, sort_builtin, merge_alabaster, | |
sort_deestan) | |
except ImportError: | |
pass | |
__version__ = "1.0.0" | |
def make_sequence_creator(func, M): | |
def f(n): return [func(n) for m in xrange(M)] | |
f.__name__ = func.__name__ | |
return f | |
def ones(n): | |
return [1] * n | |
def sorted_random(max_): | |
def f(n): | |
L = random.sample(xrange(max_), n) | |
# XXX L.sort() | |
return L | |
f.__name__ = "sorted_random_%d" % max_ | |
return f | |
def eval_dottedname(dottedname): | |
""" | |
>>> eval_dottedname("os.path.join") #doctest: +ELLIPSIS | |
<function join at 0x...> | |
>>> eval_dottedname("sys.exit") #doctest: +ELLIPSIS | |
<built-in function exit> | |
>>> eval_dottedname("sys") #doctest: +ELLIPSIS | |
<module 'sys' (built-in)> | |
""" | |
return reduce(getattr, dottedname.split(".")[1:], __import__(dottedname.partition(".")[0])) | |
def usec(function, arguments, readonly_args=False): | |
""" | |
Given `function` and `arguments` return a time (in microseconds) it takes to make the call. | |
Note: There is an overhead in executing a function that does nothing. | |
http://www.rosettacode.org/wiki/Query_Performance#Python | |
>>> 5 > usec(nothing, [], True) > 0 | |
True | |
>>> 5 > usec(identity, [1], True) > 0 | |
True | |
""" | |
funcname = function.__name__ | |
fd, pickle_fn = tempfile.mkstemp( | |
suffix='.pickle', prefix='usec', text=True) | |
pickle.dump((function, arguments), os.fdopen(fd, 'w')) | |
del function | |
del arguments # free memory | |
timer = timeit.Timer(stmt='%(funcname)s(*args)' % vars(), | |
setup='import pickle; %(funcname)s, args = pickle.load(open(%(pickle_fn)r))' % vars()) | |
try: | |
try: | |
seconds, N = 0, 1 | |
while seconds < 0.2: | |
seconds = min(timer.repeat(repeat=3, number=N)) | |
N *= 10 | |
if not readonly_args: | |
break | |
microseconds = round(1000000 * seconds / (N / 10), 1) # per loop | |
return microseconds | |
except RuntimeError, err: | |
# XXX for `merge_alabaster` | |
if "maximum recursion depth exceeded" in err.message: | |
print >> sys.stderr, err.message | |
return 0 | |
else: | |
raise | |
except: | |
timer.print_exc(file=sys.stderr) | |
raise | |
def nothing(*args): pass | |
def identity(x): return x | |
def writedat(filename, x, y, xprecision=3, yprecision=5): | |
r""" | |
Write two equal-sized numerical arrays `x' and `y' to a two-column text file named `filename'. | |
The first column of the file contains values from an `x'-array | |
with a given `xprecision', the second -- values from `y'-array | |
with `yprecision'. | |
http://www.rosettacode.org/wiki/Write_float_arrays_to_a_text_file#Python | |
Example usage | |
>>> import math | |
>>> x = [1, 2, 3, 1e11] | |
>>> y = map(math.sqrt, x) | |
>>> y | |
[1.0, 1.4142135623730951, 1.7320508075688772, 316227.76601683791] | |
>>> writedat('sqrt.dat', x, y) | |
written 'sqrt.dat' | |
>>> # check | |
... | |
>>> open('sqrt.dat').read() | |
'1\t1\n2\t1.4142\n3\t1.7321\n1e+11\t3.1623e+05\n' | |
>>> import os | |
>>> os.remove('sqrt.dat') | |
""" | |
with open(filename, 'w') as f: | |
for a, b in itertools.izip(x, y): | |
print >> f, "%.*g\t%.*g" % (xprecision, a, yprecision, b) | |
print "written '%s'" % filename | |
def write_timings(npoints, maxN, sort_functions, sequence_creators, prefix=''): | |
""" | |
http://www.rosettacode.org/wiki/Measure_relative_performance_of_sorting_algorithms_implementations#Write_timings | |
""" | |
maxexp = math.log(maxN, 2) | |
step = (maxexp - 1) / npoints | |
Ns = [2**e for e in range(1, int(maxexp + 1.5), int(step + .5) or 1)] | |
for sort in sort_functions: | |
for make_seq in sequence_creators: | |
L = make_seq(10) | |
Lcopy = copy.deepcopy(L) | |
assert L == Lcopy | |
_ = sort(L) | |
_ = sort(L) | |
Ts = map(lambda n: usec(sort, (make_seq(n),), | |
readonly_args=(L == Lcopy)), Ns) | |
writedat('%s%s-%s-%d-%d.xy' % (prefix, sort.__name__, make_seq.__name__, | |
len(Ns), max(Ns)), Ns, Ts) | |
def plotdd(dictplotdict, prefix=''): | |
"""See ``plot_timings()`` below.""" | |
symbols = list('o^v<>s+xDd1234hHp|_') | |
colors = list('bgrcmyk') # split string on distinct characters | |
for npoints, plotdict in dictplotdict.iteritems(): | |
for ttle, lst in plotdict.iteritems(): | |
pylab.hold(False) | |
for i, (label, polynom, x, y) in enumerate(sorted(lst, key=operator.itemgetter(0))): | |
# dots | |
pylab.plot(x, y, colors[i % len(colors)] + symbols[i % len(symbols)], | |
# legend | |
label='%.2g %s + %.2g %s' % (polynom[1], polynom.variable, | |
polynom[0], label)) | |
pylab.hold(True) # plot on the same graph | |
y = numpy.polyval(polynom, x) | |
print polynom, label | |
# lines | |
pylab.plot(x, y, colors[i % len(colors)], label='_nolegend_') | |
pylab.legend(loc='upper left') | |
pylab.xlabel(polynom.variable) | |
pylab.ylabel('log2( time in microseconds )') | |
pylab.title(ttle, verticalalignment='bottom') | |
figname = '%(prefix)s_%(npoints)03d%(ttle)s' % vars() | |
pylab.savefig(figname + '.png') | |
pylab.savefig(figname + '.pdf') | |
print figname | |
def plot_timings(prefix=''): | |
""" | |
http://www.rosettacode.org/wiki/Measure_relative_performance_of_sorting_algorithms_implementations#Plot_timings | |
""" | |
def makedict(): return collections.defaultdict( | |
lambda: collections.defaultdict(list)) | |
df = makedict() | |
ds = makedict() | |
# populate plot dictionaries | |
for filename in glob.glob('%s*.xy' % prefix): | |
m = re.match(r'%s([^-]+)-([^-]+)-(\d+)-(\d+)\.xy' % prefix, filename) | |
print filename | |
assert m, filename | |
funcname, seqname, npoints, maxN = m.groups() | |
npoints, maxN = int(npoints), int(maxN) | |
Ns, Ts = numpy.loadtxt(filename, dtype='f', unpack=True) | |
# Ns -- sequences lengths | |
# Ts -- corresponding times | |
assert len(Ns) == len(Ts) == npoints | |
# find approximating polynom in a form log(y) = a*log(x) + b | |
logsafe = numpy.logical_and(Ns > 0, Ts > 0) | |
Ts = numpy.log2(Ts[logsafe]) | |
Ns = numpy.log2(Ns[logsafe]) | |
coeffs = numpy.polyfit(Ns, Ts, deg=1) | |
poly = numpy.poly1d(coeffs, variable='log2(N)') | |
# | |
df[npoints][funcname].append((seqname, poly, Ns, Ts)) | |
ds[npoints][seqname].append((funcname, poly, Ns, Ts)) | |
# actual plotting | |
plotdd(df, prefix) | |
plotdd(ds, prefix) # see ``plotdd()`` above | |
def dottednames2funcs(dottednames): | |
return [eval_dottedname(f if "." in f else "%s.%s" % (__name__, f)) | |
for f in dottednames] | |
def main(args=None): | |
""" Based on: | |
http://www.rosettacode.org/wiki/Measure_relative_performance_of_sorting_algorithms_implementations#Figures:_log2.28_time_in_microseconds_.29_vs._log2.28_sequence_length_.29 | |
""" | |
# process args | |
if args is None: | |
args = sys.argv[1:] | |
default_sort_functions = "merge_26 sort_builtin merge_funcs.merge_alabaster".split() | |
p = OptionParser(description=__doc__, option_list=[ | |
make_option("--plot-only", action="store_true", | |
help="don't recompute *.xy files, just plot them"), | |
make_option("-p", "--prefix", | |
help="a prefix for *.xy files (default: '%default')"), | |
make_option("--npoints", type="int", | |
help="desirable number of sample point on the graph"), | |
make_option("--nsublists", type="int", | |
help="number of sublists (default: %default)"), | |
make_option("--maxn", type="int", | |
help="maximum size of sublists"), | |
make_option("-s", "--sort-function", action="append", | |
help=("a sort function to plot, to add more functions to comparison" | |
" specify the option multiple times" | |
" (default: %s)" % default_sort_functions)), | |
make_option("--sequence-creator", action="append", | |
help=("a sequence creator function accepts N return a sequence" | |
"(default: special case to test sorting multiple sorted lists)")), | |
]) | |
p.set_defaults(plot_only=False, prefix='', | |
npoints=100, nsublists=10, maxn=2**10) | |
options, args = p.parse_args(args) | |
if args: | |
p.error("script does not accept arguments, but given %r" % args) | |
if not options.plot_only: | |
# measure performance and write acquired data to *.xy files | |
M = options.nsublists # number of sublists | |
if not options.sequence_creator: | |
sequence_creators = [make_sequence_creator(f, M) | |
for f in (sorted_random(2**31 - 1), ones, range)] | |
else: | |
sequence_creators = dottednames2funcs(options.sequence_creator) | |
if not options.sort_function: | |
options.sort_function = default_sort_functions | |
sort_functions = dottednames2funcs(options.sort_function) | |
write_timings(npoints=options.npoints, maxN=options.maxn, | |
sort_functions=sort_functions, | |
sequence_creators=sequence_creators, | |
prefix=options.prefix) | |
# plot *.xy files | |
plot_timings(options.prefix) | |
if __name__ == '__main__': | |
if '--test' in sys.argv: | |
import doctest | |
doctest.testmod() | |
sys.argv.remove('--test') | |
sys.exit(main(sys.argv[1:])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment