Instantly share code, notes, and snippets.

# zed/.gitignore Created Jan 23, 2009

What would you like to do?
Generate performance plot
 *.db *.xy *.pdf *.png *.pyc *.pyo
 """ http://stackoverflow.com/questions/489999/python-convert-list-of-ints-to-one-number#490031 """ import random from itertools import imap FUNS = [] def test_fun(fun): FUNS.append(fun) return fun @test_fun def _map(nums): return int(''.join(map(str, nums))) @test_fun def _imap(nums): return int(''.join(imap(str, nums))) @test_fun def _interpolation(nums): return int(''.join('%d' % num for num in nums)) @test_fun def _genexp(nums): return int(''.join(str(num) for num in nums)) @test_fun def _sum(nums): return sum(digit * 10 ** (len(nums) - 1 - i) for i, digit in enumerate(nums)) @test_fun def _reduce(nums): return int(reduce(lambda x, y: x + str(y), nums, '')) @test_fun def _builtins(nums): return int(filter(str.isdigit, repr(nums))) @test_fun def _accumulator(nums): tot = 0 for num in nums: tot *= 10 tot += num return tot def randrange1_10(digit_count): return [random.randrange(1, 10) for i in xrange(digit_count)] if __name__ == '__main__': from subprocess import check_call import sys assert_results = [fun([1, 2, 3]) for fun in FUNS] assert all(result == 123 for result in assert_results) check_call(["python", "make-figures.py"] + ["--sort-function=cdleary." + f.__name__ for f in FUNS] + ["--sequence-creator=cdleary.randrange1_10"] + sys.argv[1:])
 #!/usr/bin/env python """Generate performance graphs. See http://stackoverflow.com/questions/464960/code-golf-combining-multiple-sorted-lists-into-a-single-sorted-list#464967 """ from __future__ import with_statement from optparse import OptionParser, make_option import collections import copy import glob import itertools import math import operator import os import pickle import re import sys import tempfile import timeit # numpy, matplotlib used only for plotting import numpy import pylab from merge_funcs import (merge_26, sort_builtin, merge_alabaster, sort_deestan, make_sequence_creator, sorted_random, ones) def eval_dottedname(dottedname): """ >>> eval_dottedname("os.path.join") #doctest: +ELLIPSIS >>> eval_dottedname("merge_funcs.sort_builtin") #doctest: +ELLIPSIS >>> eval_dottedname("merge_funcs") #doctest: +ELLIPSIS """ return reduce(getattr, dottedname.split(".")[1:], __import__(dottedname.partition(".")[0])) def usec(function, arguments, readonly_args=False): """ Given `function` and `arguments` return a time (in microseconds) it takes to make the call. Note: There is an overhead in executing a function that does nothing. http://www.rosettacode.org/wiki/Query_Performance#Python >>> 5 > usec(nothing, [], True) > 0 True >>> 5 > usec(identity, [1], True) > 0 True """ funcname = function.__name__ fd, pickle_fn = tempfile.mkstemp(suffix='.pickle', prefix='usec', text=True) pickle.dump((function, arguments), os.fdopen(fd, 'w')) del function del arguments # free memory timer = timeit.Timer(stmt='%(funcname)s(*args)' % vars(), setup='import pickle; %(funcname)s, args = pickle.load(open(%(pickle_fn)r))' % vars()) try: try: seconds, N = 0, 1 while seconds < 0.2: seconds = min(timer.repeat(repeat=3, number=N)) N *= 10 if not readonly_args: break microseconds = round(1000000 * seconds / (N / 10), 1) # per loop return microseconds except RuntimeError, err: #XXX for `merge_alabaster` if "maximum recursion depth exceeded" in err.message: print >> sys.stderr, err.message return 0 else: raise except: timer.print_exc(file=sys.stderr) raise def nothing(*args): pass def identity(x): return x def writedat(filename, x, y, xprecision=3, yprecision=5): """ Write two equal-sized numerical arrays `x' and `y' to a two-column text file named `filename'. The first column of the file contains values from an `x'-array with a given `xprecision', the second -- values from `y'-array with `yprecision'. http://www.rosettacode.org/wiki/Write_float_arrays_to_a_text_file#Python Example usage >>> import math >>> x = [1, 2, 3, 1e11] >>> y = map(math.sqrt, x) >>> y [1.0, 1.4142135623730951, 1.7320508075688772, 316227.76601683791] >>> writedat('sqrt.dat', x, y) written 'sqrt.dat' >>> # check ... >>> for line in open('sqrt.dat'): #doctest:+NORMALIZE_WHITESPACE ... print line ... 1 1 2 1.4142 3 1.7321 1e+011 3.1623e+005 >>> import os >>> os.remove('sqrt.dat') """ with open(filename,'w') as f: for a, b in itertools.izip(x, y): print >> f, "%.*g\t%.*g" % (xprecision, a, yprecision, b) print "written '%s'" % filename def write_timings(npoints, maxN, sort_functions, sequence_creators, prefix=''): """ http://www.rosettacode.org/wiki/Measure_relative_performance_of_sorting_algorithms_implementations#Write_timings """ maxexp = math.log(maxN, 2) step = (maxexp - 1) / npoints Ns = [2**e for e in range(1, int(maxexp + 1.5), int(step + .5) or 1)] for sort in sort_functions: for make_seq in sequence_creators: L = make_seq(10) Lcopy = copy.deepcopy(L) assert L == Lcopy _ = sort(L) _ = sort(L) Ts = map(lambda n: usec(sort, (make_seq(n),), readonly_args=(L == Lcopy)), Ns) writedat('%s%s-%s-%d-%d.xy' % (prefix, sort.__name__, make_seq.__name__, len(Ns), max(Ns)), Ns, Ts) def plotdd(dictplotdict,prefix=''): """See ``plot_timings()`` below.""" symbols = list('o^v<>s+xDd1234hHp|_') colors = list('bgrcmyk') # split string on distinct characters for npoints, plotdict in dictplotdict.iteritems(): for ttle, lst in plotdict.iteritems(): pylab.hold(False) for i, (label, polynom, x, y) in enumerate(sorted(lst,key=operator.itemgetter(0))): # dots pylab.plot(x, y, colors[i % len(colors)] + symbols[i % len(symbols)], # legend label='%.2g %s + %.2g %s' % (polynom[1], polynom.variable, polynom[0], label)) pylab.hold(True) # plot on the same graph y = numpy.polyval(polynom, x) print polynom, label # lines pylab.plot(x, y, colors[i % len(colors)], label= '_nolegend_') pylab.legend(loc='upper left') pylab.xlabel(polynom.variable) pylab.ylabel('log2( time in microseconds )') pylab.title(ttle, verticalalignment='bottom') figname = '%(prefix)s_%(npoints)03d%(ttle)s' % vars() pylab.savefig(figname+'.png') pylab.savefig(figname+'.pdf') print figname def plot_timings(prefix=''): """ http://www.rosettacode.org/wiki/Measure_relative_performance_of_sorting_algorithms_implementations#Plot_timings """ makedict = lambda: collections.defaultdict(lambda: collections.defaultdict(list)) df = makedict() ds = makedict() # populate plot dictionaries for filename in glob.glob('%s*.xy' % prefix): m = re.match(r'%s([^-]+)-([^-]+)-(\d+)-(\d+)\.xy' % prefix, filename) print filename assert m, filename funcname, seqname, npoints, maxN = m.groups() npoints, maxN = int(npoints), int(maxN) Ns, Ts = numpy.loadtxt(filename, dtype='f', unpack=True) # Ns -- sequences lengths # Ts -- corresponding times assert len(Ns) == len(Ts) == npoints # find approximating polynom in a form log(y) = a*log(x) + b logsafe = numpy.logical_and(Ns>0, Ts>0) Ts = numpy.log2(Ts[logsafe]) Ns = numpy.log2(Ns[logsafe]) coeffs = numpy.polyfit(Ns, Ts, deg=1) poly = numpy.poly1d(coeffs, variable='log2(N)') # df[npoints][funcname].append((seqname, poly, Ns, Ts)) ds[npoints][seqname].append((funcname, poly, Ns, Ts)) # actual plotting plotdd(df, prefix) plotdd(ds, prefix) # see ``plotdd()`` above def dottednames2funcs(dottednames): return [eval_dottedname(f if "." in f else "%s.%s" % (__name__, f)) for f in dottednames] def main(args=None): """ Based on: http://www.rosettacode.org/wiki/Measure_relative_performance_of_sorting_algorithms_implementations#Figures:_log2.28_time_in_microseconds_.29_vs._log2.28_sequence_length_.29 """ # process args if args is None: args = sys.argv[1:] default_sort_functions = "merge_26 sort_builtin merge_funcs.merge_alabaster".split() p = OptionParser(description=__doc__, option_list=[ make_option("--plot-only", action="store_true", help="don't recompute *.xy files, just plot them"), make_option("-p", "--prefix", help="a prefix for *.xy files (default: '%default')"), make_option("--npoints", type="int", help="desirable number of sample point on the graph"), make_option("--nsublists", type="int", help="number of sublists (default: %default)"), make_option("--maxn", type="int", help="maximum size of sublists"), make_option("-s", "--sort-function", action="append", help=("a sort function to plot, to add more functions to comparison" " specify the option multiple times" " (default: %s)" % default_sort_functions)), make_option("--sequence-creator", action="append", help=("a sequence creator function accepts N return a sequence" "(default: special case to test sorting multiple sorted lists)")), ]) p.set_defaults(plot_only=False, prefix='', npoints=100, nsublists=10, maxn=2**10) options, args = p.parse_args(args) if args: p.error("script does not accept arguments, but given %r" % args) # measure performance and write acquired data to *.xy files M = options.nsublists # number of sublists if not options.sequence_creator: sequence_creators = [make_sequence_creator(f, M) for f in (sorted_random(2**31-1), ones, range)] else: sequence_creators = dottednames2funcs(options.sequence_creator) if not options.sort_function: options.sort_function = default_sort_functions sort_functions = dottednames2funcs(options.sort_function) if not options.plot_only: write_timings(npoints=options.npoints, maxN=options.maxn, sort_functions=sort_functions, sequence_creators=sequence_creators, prefix=options.prefix) # plot *.xy files plot_timings(options.prefix) if __name__ == '__main__': import doctest; doctest.testmod() sys.exit(main(sys.argv[1:]))
 #!/usr/bin/env python """ http://stackoverflow.com/questions/464960/code-golf-combining-multiple-sorted-lists-into-a-single-sorted-list#464967 """ from heapq import heappop, heapreplace, heapify import copy import random def merge_26(lists): """ See `heapq.merge()` http://svn.python.org/view/python/trunk/Lib/heapq.py?view=markup >>> merge_26([[1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]]) [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25] """ _heappop, _heapreplace, _StopIteration = heappop, heapreplace, StopIteration h = [] h_append = h.append for itnum, it in enumerate(map(iter, lists)): try: next = it.next h_append([next(), itnum, next]) except _StopIteration: pass heapify(h) result = [] r_append = result.append while 1: try: while 1: v, itnum, next = s = h[0] # raises IndexError when h is empty r_append(v) s[0] = next() # raises StopIteration when exhausted _heapreplace(h, s) # restore heap condition except _StopIteration: _heappop(h) # remove empty iterator except IndexError: return result def sort_builtin(lists): L = sum(lists, []) L.sort() return L def m(c,l): """ http://stackoverflow.com/questions/464960/code-golf-combining-multiple-sorted-lists-into-a-single-sorted-list#464967 >>> m([], [[1,4], [2,6], [3,5]]) [1, 2, 3, 4, 5, 6] >>> L = [[1]*10 for _ in range(100)] >>> RL = 1007 >>> import sys >>> rl = sys.getrecursionlimit() >>> if rl < RL: ... len(m([], copy.deepcopy(L))) Traceback (most recent call last): ... RuntimeError: maximum recursion depth exceeded >>> sys.setrecursionlimit(RL) >>> len(m([], copy.deepcopy(L))) 1000 >>> sys.setrecursionlimit(rl) """ try: c += [l[min((m[0], i) for i,m in enumerate(l) if m)[1]].pop(0)] return m(c,l) except ValueError: return c def merge_alabaster(lists): return m([], lists) def sort_deestan(i): """ http://stackoverflow.com/questions/464960/code-golf-combining-multiple-sorted-lists-into-a-single-sorted-list#465044 """ y=[];x=sum(i,[]) while x:n=min(x);y+=[n];x.remove(n) return y merge_funcs = (merge_26, sort_builtin, merge_alabaster, sort_deestan) def test_mergefuncs(): """ >>> test_data = ( ... ([[1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]], ... [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]), ... ([[1]*10 for _ in range(99)], ... [1]*990), ... ) >>> ok = True >>> for f in merge_funcs: #doctest:+ELLIPSIS ... for input_lists, expected in test_data: ... print '-', ... r = f(copy.deepcopy(input_lists)) ... if r != expected: ... print(f.__name__, input_lists, r, expected) ... ok = False ... for c in sequence_creators: ... print ':', ... L = c(20) ... r = f(copy.deepcopy(L)) ... if r != merge_26(L): ... print(f.__name__, L, r, merge_26(L)) ... ok = False - - : : : - - : : : - - : : : - - : : : >>> if ok: ... print("ok") ... ok """ pass ################################################################################ def make_sequence_creator(func, M): f = lambda n: [func(n) for m in xrange(M)] f.__name__ = func.__name__ return f def ones(n): return [1]*n def sorted_random(max_): def f(n): L = random.sample(xrange(max_), n) L.sort() return L f.__name__ = "sorted_random_%d" % max_ return f M = 10 # number of lists sequence_creators = [make_sequence_creator(f, M) for f in (sorted_random(2**31-1), ones, range)] ################################################################################ if __name__ == '__main__': import doctest; doctest.testmod()