zed/measure-performance-condense-lists.py Secret

## measure-performance-condense-lists.py
#!/usr/bin/env python
import sys
from collections import OrderedDict, defaultdict
from copy import deepcopy
from itertools import chain, combinations_with_replacement, repeat
from random import randrange

imap = map  # Python 3
try:
    from itertools import imap
except ImportError:
    pass

from reporttime import accept_funcs, get_functions_with_prefix, measure


# input data for testing, performance measurements
_a = [1, 2, 3]
_b = [3, 4]
_i = [21, 22]
_c = [88, 7, 8]
_e = [5, 4]
_d = [3, 50]
_f = [8, 9]
_g = [9, 10]
_h = [20, 21]

lst_OP = [_a, _b, _c, _i, _e, _d, _f, _g, _h, _a, _c, _i] * 1000
lst_BK = ([list(range(4 * ii,   4 * (ii + 1) - 1))
           for ii in range(300)] +
          [list(range(4 * ii + 2, 4 * (ii + 1) + 1))
           for ii in range(300)])
lst_rnd = [[randrange(10 ** 3) for _ in repeat(None, randrange(100))]
           for _ in repeat(None, 1000)]


def condense_blckknght(master_list):
    """
    http://stackoverflow.com/a/13716804
    """
    values = {}
    overlaps = {}
    for i, sublist  in enumerate(master_list):
        for v in sublist:
            if v in values:
                current = i
                while current in overlaps:
                    current = overlaps[current]
                other = values[v]
                while other in overlaps:
                    other = overlaps[other]
                if current < other:
                    overlaps[other] = current
                elif other < current:
                    overlaps[current] = other
                # else: #current == other: # do nothing
            else:
                values[v] = i
    output = []
    output_map = {}
    for i, sublist in enumerate(master_list):
        if i not in overlaps:
            output_map[i] = len(output)
            output.append(sublist[:])
        else:
            root = overlaps[i]
            while root in overlaps:
                root = overlaps[root]
            for v in sublist:
                if values[v] == i:
                    output[output_map[root]].append(v)
    return output


def disjoint_set_find(djs, node):  # disjoint set find, with path
                                   # compression
    if node not in djs:  # base case, node is a root of a set
        return node
    djs[node] = disjoint_set_find(djs, djs[node])  # recurse, caching results
    return djs[node]


def condense_blckknght2(master_list):
    """
    http://stackoverflow.com/a/13716804
    """
    values = {}    # maps each value to the first sublist that contains it
    overlaps = {}  # maps sublist indexes to each other to form a disjoint set
    for i, sublist  in enumerate(master_list):
        for v in sublist:
            if v not in values:  # no overlap, so just store value
                values[v] = i
            else:  # overlap detected, do a disjoint set union
                current_set = disjoint_set_find(overlaps, i)
                other_set = disjoint_set_find(overlaps, values[v])
                if current_set < other_set:
                    overlaps[other_set] = current_set
                elif other_set < current_set:
                    overlaps[current_set] = other_set
                # ignore case where sets are equal

    output = []  # results
    output_map = {}  # map from original indexes to output indexes
    for i, sublist in enumerate(master_list):
        if i not in overlaps:  # this is a root of a disjoint set
            output_map[i] = len(output)  # update index output mapping
            output.append(sublist[:])  # copy sublist into output   * FIXME
        else:
            root = disjoint_set_find(overlaps, i)
            for v in sublist:  # add our unique elements into output list
                if values[v] == i:  # keep only in the first list   * FIXME
                    output[output_map[root]].append(v)
    return output


def condense_blckknght3(master_list):
    """
    http://stackoverflow.com/a/13716804
    """
    values = {}    # maps each value to the first sublist that contains it
    overlaps = {}  # maps sublist indexes to each other to form a disjoint set
    for i, sublist  in enumerate(master_list):
        for v in sublist:
            if v not in values:  # no overlap, so just store value
                values[v] = i
            else:  # overlap detected, do a disjoint set union
                current_set = disjoint_set_find(overlaps, i)
                other_set = disjoint_set_find(overlaps, values[v])
                if current_set < other_set:
                    overlaps[other_set] = current_set
                elif other_set < current_set:
                    overlaps[current_set] = other_set
                # ignore case where sets are equal

    output = []  # results
    index = {}  # map from original indexes to output indexes
    for i, sublist in enumerate(master_list):
        if i not in overlaps:  # this is a root of a disjoint set
            index[i] = len(output)  # update index output mapping
            output.append([])
            append = output[index[i]].append
        else:
            root = disjoint_set_find(overlaps, i)
            append = output[index[root]].append

        # copy sublist into output removing duplicates
        for v in sublist:  # add our unique elements into output list
            if values[v] is not None:  # not seen, add to output
                append(v)
                values[v] = None  # mark as already seen
    return output


def disjoint_set_union(djs, first, second):
    first = disjoint_set_find(djs, first)    # find root of first set
    second = disjoint_set_find(djs, second)  # and of second set
    if first < second:  # make smaller root the root of the new combined set
        djs[second] = first
    elif second < first:
        djs[first] = second
    # deliberately ignore the case where first == second (same set)


def condense_BK(master_list):
    """
    http://stackoverflow.com/a/13716804
    """
    values = OrderedDict()  # maps values to the first sublist
                            # containing them
    overlaps = {}  # maps sublist indexes to each other to form a disjoint set
    for i, sublist  in enumerate(master_list):
        for v in sublist:
            if v not in values:  # no overlap, so just store value
                values[v] = i
            else:  # overlap detected, do a disjoint set union
                disjoint_set_union(overlaps, values[v], i)
    output = []  # results
    output_map = {}  # map from original indexes to output indexes
    for v, i, in values.items():  # iterate over values in order
        root = disjoint_set_find(overlaps, i)
        if root not in output_map:
            output_map[i] = len(output)
            output.append([])  # create new output sublists as necessary
        output[output_map[root]].append(v)
    return output


def condense_jfs(lists):
    """
    http://stackoverflow.com/a/13715626
    """
    # remember original positions
    positions = {}
    for pos, item in enumerate(chain.from_iterable(lists)):
        if item not in positions:
            positions[item] = pos

    # condense disregarding order
    sets = _condense_sets(set(L) for L in lists)

    # restore order
    return [sorted(s, key=positions.get) for s in sets] or [[]]


def _condense_sets(sets):
    result = []
    for candidate in sets:
        for current in result:
            if candidate & current:   # found overlap
                current |= candidate  # combine (merge sets)

                # new items from candidate may create an overlap
                # between current set and the remaining result sets
                result = _condense_sets(result)  # merge such sets
                break
        else:  # no common elements found (or result is empty)
            result.append(candidate)
    return result


def _condense_sberry(lst):  # raises IndexError on lst_BK
    """
    http://stackoverflow.com/a/13715377
    """
    lookup = {}
    out = []
    index = 0
    for grp in lst:
        keys = [lookup.get(num, None) for num in grp]
        keys = [key for key in keys if key is not None]
        if len(keys):
            if len(set(keys)) != 1:
                for num in grp:
                    out[keys[0]].append(num)
                seen = set()
                keys = [key for key in keys
                        if key not in seen and not seen.add(key)]
                for key in keys[1:]:
                    out[keys[0]].extend(out[key])
                    del out[key]
                seen = set()
                out[keys[0]] = [item for item in out[keys[0]]
                                if item not in seen and not seen.add(item)]
            else:
                for num in grp:
                    lookup[num] = keys[0]
                out[keys[0]].extend(grp)
                seen = set()
                out[keys[0]] = [item for item in out[keys[0]]
                                if item not in seen and not seen.add(item)]
        else:
            out.append(grp)
            for num in grp:
                lookup[num] = index
            index += 1
    return out


def __condense_eyquem(passed_list):  # fail tests
    """
    http://stackoverflow.com/a/13716448
    """
    L = deepcopy(passed_list)

    dic = OrderedDict()
    for subl in L:
        for x in subl:
            if x not in dic:
                dic[x] = subl
    ##print 'dic at start'
    ##print '\n'.join('%-3s : %s' % (a,dic[a])
    ##                for a in dic) + '\n'

    for sublist in L:
        short = []
        short.extend(el for el in sublist
                     if el not in short)
        seen = []
        for k, val in dic.items():
            if val is sublist:
                break
            if k in short:
                if val not in seen:
                    seen.append(val)

        sumseen = []
        for elseen in seen:
            for y in elseen:
                sumseen.append(y)
                dic[y] = sumseen

        if seen:
            for el in sublist:
                if el not in sumseen:
                    sumseen.append(el)
                    dic[el] = sumseen

        sublist[:] = short

    cumul = []
    cumul.extend(lu for lu in dic.values()
                 if lu not in cumul)
    return cumul


def connected_components(edges):
    neighbors = defaultdict(set)
    for a, b in edges:
        neighbors[a].add(b)
        neighbors[b].add(a)
    seen = set()

    def component(node, neighbors=neighbors, seen=seen, see=seen.add):
        unseen = set([node])
        next_unseen = unseen.pop
        while unseen:
            node = next_unseen()
            see(node)
            unseen |= neighbors[node] - seen
            yield node
    for node in neighbors:
        if node not in seen:
            yield component(node)


def _condense_pillmuncher(lists):
    """
    http://stackoverflow.com/a/13736192/
    """
    tuples = combinations_with_replacement(enumerate(imap(tuple, lists)), 2)
    overlapping = ((a, b) for a, b in tuples
                          if not set(a[1]).isdisjoint(b[1]))
    seen = set()
    see = seen.add
    for component in connected_components(overlapping):
        yield [item for each in sorted(component)
                    for item in each[1]
                    if not (item in seen or see(item))]


def _condense_pillmuncher2(lists):
    """
    http://stackoverflow.com/a/13736192/
    """
    neighbors = defaultdict(set)
    pos = {}
    i = 0
    for each in lists:
        for i, a in enumerate(each, i + 1):
            neighbors[a].update(each)
            if a not in pos:
                pos[a] = i
    seen = set()

    def component(node, neighbors=neighbors, seen=seen, see=seen.add):
        unseen = set([node])
        next_unseen = unseen.pop
        while unseen:
            node = next_unseen()
            see(node)
            unseen |= neighbors[node] - seen
            yield node
    for node in neighbors:
        if node not in seen:
            yield sorted(component(node), key=pos.get)


def __condense_pillmuncher(lists):  # slow
    return list(_condense_pillmuncher(lists))


def condense_pillmuncher2(lists):
    return list(_condense_pillmuncher2(lists))


def condense_hynekcer(lists):
    """
    http://stackoverflow.com/a/13896383
    """
    groups = {}         # items divided into groups {id(the_set): the_set,...}
    members = {}        # mapping from item to group
    positions = {}      # mapping from item to sequential ordering
    iposition = 0       # counter for positions
    for sublist in lists:
        if not sublist or members.get(sublist[0], set()).issuperset(sublist):
            continue    # speed-up condition if all is from one group
        common = set([x for x in sublist if x in members])
        if common:      # any common group can be a destination for merge
            dst_group = members[common.pop()]
            common = common - dst_group   # are more groups common for sublist?
            while common:
                grp = members[common.pop()]
                if len(grp) > len(dst_group):   # merge shorter into longer grp
                    grp, dst_group = dst_group, grp
                dst_group.update(grp)
                for item in grp:
                    members[item] = dst_group
                del groups[id(grp)]
                common = common - dst_group
        else:           # or create a new group if nothing common
            dst_group = set()
            groups[id(dst_group)] = dst_group
        newitems = []
        for item in sublist:    # merge also new items
            if item not in positions:
                positions[item] = iposition
                iposition += 1
                newitems.append(item)
                members[item] = dst_group
        dst_group.update(newitems)
    return [sorted(x, key=positions.get) for x in groups.values()]


def _condense_hynekcer2(lists):
    """
    http://stackoverflow.com/a/13896383
    """
    neighbors = defaultdict(set)  # mapping from items to sublists
    positions = {}                # mapping from items to sequential ordering
    position = 0
    for each in lists:
        for item in each:
            neighbors[item].update(each)
            if item not in positions:
                positions[item] = position
                position += 1
    seen = set()
    see = seen.add
    for node in neighbors:
        if node not in seen:
            unseen = set([node])      # this is a "todo" set
            next_unseen = unseen.pop  # method alias, not called now
            group = []                # collects the output
            while unseen:
                node = next_unseen()
                see(node)
                unseen |= neighbors[node] - seen
                group.append(node)
            yield sorted(group, key=positions.get)


def condense_hynekcer2(lists):
    return list(_condense_hynekcer2(lists))


def __condense_qarma(input):  # fail tests
    """
    http://stackoverflow.com/a/13951145
    """
    def unique(l):
        s = set()
        for i in l:
            if i not in s:
                s.add(i)
                yield(i)

    tmp = list(unique(sum(input, [])))

    tmp = filter(None, [filter(lambda x: isinstance(x, str), tmp),
                        filter(lambda x: not isinstance(x, str), tmp)])

    # if only numbers or only strings are present, flatten the list
    if len(tmp) == 1:
        tmp = tmp[0]

    return tmp


@accept_funcs
def test_funcs(funcs, args, ignore_order=True):
    """Check that all functions produce the same result.

    Each function should accept a readonly sequence as an argument
    """
    orig_args = deepcopy(args)
    expected = sorted(funcs[0][1](*args))   # pylint: disable=W0142
    for name, f in funcs:
        r = f(*args)  # pylint: disable=W0142
        if ignore_order:
            r.sort()
        assert r == expected, (name, args, expected)
        assert orig_args == args, name


def main():
    mod = sys.modules[__name__]
    funcs = get_functions_with_prefix('condense_', module=mod)
    for args in [lst_BK] * 2 + [lst_OP] * 2:
        test_funcs(funcs, [args])

    verbose = '--verbose' in sys.argv
    for name in "lst_OP lst_BK lst_rnd".split():
        lst = getattr(mod, name)
        measure(funcs, args=[lst], comment=name, verbose=verbose)


if __name__ == "__main__":
    main()

## reporttime.py
# -*- coding: utf-8 -*-
import inspect
import sys
import timeit
from functools import partial, wraps
from timeit import default_timer as timer


def measure_func(func, args):
    """Measure how long `func(*args)` takes in seconds."""
    n = 1
    f = partial(func, *args)  # pylint: disable=W0142
    while True:
        start = timer()
        r = timeit.repeat(f, number=n, repeat=1)
        if timer() - start > 1:  # at least 1 second per measurement
            break
        n *= 2
    return min(r + timeit.repeat(f, number=n, repeat=2)) / n


def accept_funcs(func):
    """Add function names if necessary.

    Decorator for functions that accept either a sequence of functions
    or (name, function) pairs.
    """
    @wraps(func)
    def wrapper(funcs, *args, **kwargs):
        if hasattr(funcs[0], '__name__'):
            funcs = [(f.__name__, f) for f in funcs]
        return func(funcs, *args, **kwargs)
    return wrapper


def human_seconds(seconds, fmt="%.3g %s"):
    """Return human-readable string that represents given seconds."""
    t = 1e6 * seconds  # start with µsec
    for suff in "usec msec".split():
        if t < 1000:
            return fmt % (t, suff)
        t /= 1000
    return fmt % (t, " sec")


@accept_funcs
def measure(funcs, args, comment='', verbose=False):
    """Report how long `f(*args)` takes for each f in funcs."""
    # measure performance
    results = []
    w = max(len(name) for name, _ in funcs)
    for name, f in funcs:
        results.append((measure_func(f, args), name))
        if verbose:
            print("{:{}s} {:>9s} {}".format(
                    name, w, human_seconds(results[-1][0]), comment))

    # print sorted results
    results.sort()
    mint = results[0][0]  # minimal time
    ratios = ["%5.2f" % (t / mint,) for t, _ in results]
    maxratio_width = max(len(r) for r in ratios)
    #  header
    print("{:{}s} {:>9s} {:>{}s} {}".format(
            "name", w, "time", "ratio", maxratio_width, "comment"))
    ratios = [s.rjust(maxratio_width) for s in ratios]
    for (t, name), ratio in zip(results, ratios):
        print("{:{}s} {:>9s} {} {}".format(
                name, w, human_seconds(t), ratio, comment))


def get_functions_with_prefix(prefix, module=None):
    if module is None:  # use caller's module
        modname = inspect.currentframe().f_back.f_globals['__name__']
        module = sys.modules[modname]
    all_funcs = inspect.getmembers(module, inspect.isfunction)
    return [f for name, f in all_funcs if name.startswith(prefix)]

## results.txt
name                       time ratio comment
condense_hynekcer     5.79 msec  1.00 lst_OP
condense_hynekcer2     7.4 msec  1.28 lst_OP
condense_pillmuncher2 11.5 msec  1.99 lst_OP
condense_blckknght    16.8 msec  2.91 lst_OP
condense_jfs            26 msec  4.49 lst_OP
condense_BK           30.5 msec  5.28 lst_OP
condense_blckknght2   30.9 msec  5.34 lst_OP
condense_blckknght3   32.5 msec  5.62 lst_OP

name                       time  ratio comment
condense_blckknght     964 usec   1.00 lst_BK
condense_blckknght2   1.41 msec   1.47 lst_BK
condense_blckknght3   1.46 msec   1.51 lst_BK
condense_hynekcer2    1.95 msec   2.03 lst_BK
condense_pillmuncher2  2.1 msec   2.18 lst_BK
condense_hynekcer     2.12 msec   2.20 lst_BK
condense_BK           3.39 msec   3.52 lst_BK
condense_jfs           544 msec 563.66 lst_BK

name                       time ratio comment
condense_hynekcer     6.86 msec  1.00 lst_rnd
condense_jfs          16.8 msec  2.44 lst_rnd
condense_blckknght    28.6 msec  4.16 lst_rnd
condense_blckknght2   56.1 msec  8.18 lst_rnd
condense_blckknght3   56.3 msec  8.21 lst_rnd
condense_BK           70.2 msec 10.23 lst_rnd
condense_pillmuncher2  324 msec 47.22 lst_rnd
condense_hynekcer2     334 msec 48.61 lst_rnd
	#!/usr/bin/env python
	import sys
	from collections import OrderedDict, defaultdict
	from copy import deepcopy
	from itertools import chain, combinations_with_replacement, repeat
	from random import randrange

	imap = map # Python 3
	try:
	from itertools import imap
	except ImportError:
	pass

	from reporttime import accept_funcs, get_functions_with_prefix, measure


	# input data for testing, performance measurements
	_a = [1, 2, 3]
	_b = [3, 4]
	_i = [21, 22]
	_c = [88, 7, 8]
	_e = [5, 4]
	_d = [3, 50]
	_f = [8, 9]
	_g = [9, 10]
	_h = [20, 21]

	lst_OP = [_a, _b, _c, _i, _e, _d, _f, _g, _h, _a, _c, _i] * 1000
	lst_BK = ([list(range(4 * ii, 4 * (ii + 1) - 1))
	for ii in range(300)] +
	[list(range(4 * ii + 2, 4 * (ii + 1) + 1))
	for ii in range(300)])
	lst_rnd = [[randrange(10 ** 3) for _ in repeat(None, randrange(100))]
	for _ in repeat(None, 1000)]


	def condense_blckknght(master_list):
	"""
	http://stackoverflow.com/a/13716804
	"""
	values = {}
	overlaps = {}
	for i, sublist in enumerate(master_list):
	for v in sublist:
	if v in values:
	current = i
	while current in overlaps:
	current = overlaps[current]
	other = values[v]
	while other in overlaps:
	other = overlaps[other]
	if current < other:
	overlaps[other] = current
	elif other < current:
	overlaps[current] = other
	# else: #current == other: # do nothing
	else:
	values[v] = i
	output = []
	output_map = {}
	for i, sublist in enumerate(master_list):
	if i not in overlaps:
	output_map[i] = len(output)
	output.append(sublist[:])
	else:
	root = overlaps[i]
	while root in overlaps:
	root = overlaps[root]
	for v in sublist:
	if values[v] == i:
	output[output_map[root]].append(v)
	return output


	def disjoint_set_find(djs, node): # disjoint set find, with path
	# compression
	if node not in djs: # base case, node is a root of a set
	return node
	djs[node] = disjoint_set_find(djs, djs[node]) # recurse, caching results
	return djs[node]


	def condense_blckknght2(master_list):
	"""
	http://stackoverflow.com/a/13716804
	"""
	values = {} # maps each value to the first sublist that contains it
	overlaps = {} # maps sublist indexes to each other to form a disjoint set
	for i, sublist in enumerate(master_list):
	for v in sublist:
	if v not in values: # no overlap, so just store value
	values[v] = i
	else: # overlap detected, do a disjoint set union
	current_set = disjoint_set_find(overlaps, i)
	other_set = disjoint_set_find(overlaps, values[v])
	if current_set < other_set:
	overlaps[other_set] = current_set
	elif other_set < current_set:
	overlaps[current_set] = other_set
	# ignore case where sets are equal

	output = [] # results
	output_map = {} # map from original indexes to output indexes
	for i, sublist in enumerate(master_list):
	if i not in overlaps: # this is a root of a disjoint set
	output_map[i] = len(output) # update index output mapping
	output.append(sublist[:]) # copy sublist into output * FIXME
	else:
	root = disjoint_set_find(overlaps, i)
	for v in sublist: # add our unique elements into output list
	if values[v] == i: # keep only in the first list * FIXME
	output[output_map[root]].append(v)
	return output


	def condense_blckknght3(master_list):
	"""
	http://stackoverflow.com/a/13716804
	"""
	values = {} # maps each value to the first sublist that contains it
	overlaps = {} # maps sublist indexes to each other to form a disjoint set
	for i, sublist in enumerate(master_list):
	for v in sublist:
	if v not in values: # no overlap, so just store value
	values[v] = i
	else: # overlap detected, do a disjoint set union
	current_set = disjoint_set_find(overlaps, i)
	other_set = disjoint_set_find(overlaps, values[v])
	if current_set < other_set:
	overlaps[other_set] = current_set
	elif other_set < current_set:
	overlaps[current_set] = other_set
	# ignore case where sets are equal

	output = [] # results
	index = {} # map from original indexes to output indexes
	for i, sublist in enumerate(master_list):
	if i not in overlaps: # this is a root of a disjoint set
	index[i] = len(output) # update index output mapping
	output.append([])
	append = output[index[i]].append
	else:
	root = disjoint_set_find(overlaps, i)
	append = output[index[root]].append

	# copy sublist into output removing duplicates
	for v in sublist: # add our unique elements into output list
	if values[v] is not None: # not seen, add to output
	append(v)
	values[v] = None # mark as already seen
	return output


	def disjoint_set_union(djs, first, second):
	first = disjoint_set_find(djs, first) # find root of first set
	second = disjoint_set_find(djs, second) # and of second set
	if first < second: # make smaller root the root of the new combined set
	djs[second] = first
	elif second < first:
	djs[first] = second
	# deliberately ignore the case where first == second (same set)


	def condense_BK(master_list):
	"""
	http://stackoverflow.com/a/13716804
	"""
	values = OrderedDict() # maps values to the first sublist
	# containing them
	overlaps = {} # maps sublist indexes to each other to form a disjoint set
	for i, sublist in enumerate(master_list):
	for v in sublist:
	if v not in values: # no overlap, so just store value
	values[v] = i
	else: # overlap detected, do a disjoint set union
	disjoint_set_union(overlaps, values[v], i)
	output = [] # results
	output_map = {} # map from original indexes to output indexes
	for v, i, in values.items(): # iterate over values in order
	root = disjoint_set_find(overlaps, i)
	if root not in output_map:
	output_map[i] = len(output)
	output.append([]) # create new output sublists as necessary
	output[output_map[root]].append(v)
	return output


	def condense_jfs(lists):
	"""
	http://stackoverflow.com/a/13715626
	"""
	# remember original positions
	positions = {}
	for pos, item in enumerate(chain.from_iterable(lists)):
	if item not in positions:
	positions[item] = pos

	# condense disregarding order
	sets = _condense_sets(set(L) for L in lists)

	# restore order
	return [sorted(s, key=positions.get) for s in sets] or [[]]


	def _condense_sets(sets):
	result = []
	for candidate in sets:
	for current in result:
	if candidate & current: # found overlap
	current \|= candidate # combine (merge sets)

	# new items from candidate may create an overlap
	# between current set and the remaining result sets
	result = _condense_sets(result) # merge such sets
	break
	else: # no common elements found (or result is empty)
	result.append(candidate)
	return result


	def _condense_sberry(lst): # raises IndexError on lst_BK
	"""
	http://stackoverflow.com/a/13715377
	"""
	lookup = {}
	out = []
	index = 0
	for grp in lst:
	keys = [lookup.get(num, None) for num in grp]
	keys = [key for key in keys if key is not None]
	if len(keys):
	if len(set(keys)) != 1:
	for num in grp:
	out[keys[0]].append(num)
	seen = set()
	keys = [key for key in keys
	if key not in seen and not seen.add(key)]
	for key in keys[1:]:
	out[keys[0]].extend(out[key])
	del out[key]
	seen = set()
	out[keys[0]] = [item for item in out[keys[0]]
	if item not in seen and not seen.add(item)]
	else:
	for num in grp:
	lookup[num] = keys[0]
	out[keys[0]].extend(grp)
	seen = set()
	out[keys[0]] = [item for item in out[keys[0]]
	if item not in seen and not seen.add(item)]
	else:
	out.append(grp)
	for num in grp:
	lookup[num] = index
	index += 1
	return out


	def __condense_eyquem(passed_list): # fail tests
	"""
	http://stackoverflow.com/a/13716448
	"""
	L = deepcopy(passed_list)

	dic = OrderedDict()
	for subl in L:
	for x in subl:
	if x not in dic:
	dic[x] = subl
	##print 'dic at start'
	##print '\n'.join('%-3s : %s' % (a,dic[a])
	## for a in dic) + '\n'

	for sublist in L:
	short = []
	short.extend(el for el in sublist
	if el not in short)
	seen = []
	for k, val in dic.items():
	if val is sublist:
	break
	if k in short:
	if val not in seen:
	seen.append(val)

	sumseen = []
	for elseen in seen:
	for y in elseen:
	sumseen.append(y)
	dic[y] = sumseen

	if seen:
	for el in sublist:
	if el not in sumseen:
	sumseen.append(el)
	dic[el] = sumseen

	sublist[:] = short

	cumul = []
	cumul.extend(lu for lu in dic.values()
	if lu not in cumul)
	return cumul


	def connected_components(edges):
	neighbors = defaultdict(set)
	for a, b in edges:
	neighbors[a].add(b)
	neighbors[b].add(a)
	seen = set()

	def component(node, neighbors=neighbors, seen=seen, see=seen.add):
	unseen = set([node])
	next_unseen = unseen.pop
	while unseen:
	node = next_unseen()
	see(node)
	unseen \|= neighbors[node] - seen
	yield node
	for node in neighbors:
	if node not in seen:
	yield component(node)


	def _condense_pillmuncher(lists):
	"""
	http://stackoverflow.com/a/13736192/
	"""
	tuples = combinations_with_replacement(enumerate(imap(tuple, lists)), 2)
	overlapping = ((a, b) for a, b in tuples
	if not set(a[1]).isdisjoint(b[1]))
	seen = set()
	see = seen.add
	for component in connected_components(overlapping):
	yield [item for each in sorted(component)
	for item in each[1]
	if not (item in seen or see(item))]


	def _condense_pillmuncher2(lists):
	"""
	http://stackoverflow.com/a/13736192/
	"""
	neighbors = defaultdict(set)
	pos = {}
	i = 0
	for each in lists:
	for i, a in enumerate(each, i + 1):
	neighbors[a].update(each)
	if a not in pos:
	pos[a] = i
	seen = set()

	def component(node, neighbors=neighbors, seen=seen, see=seen.add):
	unseen = set([node])
	next_unseen = unseen.pop
	while unseen:
	node = next_unseen()
	see(node)
	unseen \|= neighbors[node] - seen
	yield node
	for node in neighbors:
	if node not in seen:
	yield sorted(component(node), key=pos.get)


	def __condense_pillmuncher(lists): # slow
	return list(_condense_pillmuncher(lists))


	def condense_pillmuncher2(lists):
	return list(_condense_pillmuncher2(lists))


	def condense_hynekcer(lists):
	"""
	http://stackoverflow.com/a/13896383
	"""
	groups = {} # items divided into groups {id(the_set): the_set,...}
	members = {} # mapping from item to group
	positions = {} # mapping from item to sequential ordering
	iposition = 0 # counter for positions
	for sublist in lists:
	if not sublist or members.get(sublist[0], set()).issuperset(sublist):
	continue # speed-up condition if all is from one group
	common = set([x for x in sublist if x in members])
	if common: # any common group can be a destination for merge
	dst_group = members[common.pop()]
	common = common - dst_group # are more groups common for sublist?
	while common:
	grp = members[common.pop()]
	if len(grp) > len(dst_group): # merge shorter into longer grp
	grp, dst_group = dst_group, grp
	dst_group.update(grp)
	for item in grp:
	members[item] = dst_group
	del groups[id(grp)]
	common = common - dst_group
	else: # or create a new group if nothing common
	dst_group = set()
	groups[id(dst_group)] = dst_group
	newitems = []
	for item in sublist: # merge also new items
	if item not in positions:
	positions[item] = iposition
	iposition += 1
	newitems.append(item)
	members[item] = dst_group
	dst_group.update(newitems)
	return [sorted(x, key=positions.get) for x in groups.values()]


	def _condense_hynekcer2(lists):
	"""
	http://stackoverflow.com/a/13896383
	"""
	neighbors = defaultdict(set) # mapping from items to sublists
	positions = {} # mapping from items to sequential ordering
	position = 0
	for each in lists:
	for item in each:
	neighbors[item].update(each)
	if item not in positions:
	positions[item] = position
	position += 1
	seen = set()
	see = seen.add
	for node in neighbors:
	if node not in seen:
	unseen = set([node]) # this is a "todo" set
	next_unseen = unseen.pop # method alias, not called now
	group = [] # collects the output
	while unseen:
	node = next_unseen()
	see(node)
	unseen \|= neighbors[node] - seen
	group.append(node)
	yield sorted(group, key=positions.get)


	def condense_hynekcer2(lists):
	return list(_condense_hynekcer2(lists))


	def __condense_qarma(input): # fail tests
	"""
	http://stackoverflow.com/a/13951145
	"""
	def unique(l):
	s = set()
	for i in l:
	if i not in s:
	s.add(i)
	yield(i)

	tmp = list(unique(sum(input, [])))

	tmp = filter(None, [filter(lambda x: isinstance(x, str), tmp),
	filter(lambda x: not isinstance(x, str), tmp)])

	# if only numbers or only strings are present, flatten the list
	if len(tmp) == 1:
	tmp = tmp[0]

	return tmp


	@accept_funcs
	def test_funcs(funcs, args, ignore_order=True):
	"""Check that all functions produce the same result.

	Each function should accept a readonly sequence as an argument
	"""
	orig_args = deepcopy(args)
	expected = sorted(funcs[0][1](*args)) # pylint: disable=W0142
	for name, f in funcs:
	r = f(*args) # pylint: disable=W0142
	if ignore_order:
	r.sort()
	assert r == expected, (name, args, expected)
	assert orig_args == args, name


	def main():
	mod = sys.modules[__name__]
	funcs = get_functions_with_prefix('condense_', module=mod)
	for args in [lst_BK] * 2 + [lst_OP] * 2:
	test_funcs(funcs, [args])

	verbose = '--verbose' in sys.argv
	for name in "lst_OP lst_BK lst_rnd".split():
	lst = getattr(mod, name)
	measure(funcs, args=[lst], comment=name, verbose=verbose)


	if __name__ == "__main__":
	main()
	# -- coding: utf-8 --
	import inspect
	import sys
	import timeit
	from functools import partial, wraps
	from timeit import default_timer as timer


	def measure_func(func, args):
	"""Measure how long `func(*args)` takes in seconds."""
	n = 1
	f = partial(func, *args) # pylint: disable=W0142
	while True:
	start = timer()
	r = timeit.repeat(f, number=n, repeat=1)
	if timer() - start > 1: # at least 1 second per measurement
	break
	n *= 2
	return min(r + timeit.repeat(f, number=n, repeat=2)) / n


	def accept_funcs(func):
	"""Add function names if necessary.

	Decorator for functions that accept either a sequence of functions
	or (name, function) pairs.
	"""
	@wraps(func)
	def wrapper(funcs, args, *kwargs):
	if hasattr(funcs[0], '__name__'):
	funcs = [(f.__name__, f) for f in funcs]
	return func(funcs, args, *kwargs)
	return wrapper


	def human_seconds(seconds, fmt="%.3g %s"):
	"""Return human-readable string that represents given seconds."""
	t = 1e6 * seconds # start with µsec
	for suff in "usec msec".split():
	if t < 1000:
	return fmt % (t, suff)
	t /= 1000
	return fmt % (t, " sec")


	@accept_funcs
	def measure(funcs, args, comment='', verbose=False):
	"""Report how long `f(*args)` takes for each f in funcs."""
	# measure performance
	results = []
	w = max(len(name) for name, _ in funcs)
	for name, f in funcs:
	results.append((measure_func(f, args), name))
	if verbose:
	print("{:{}s} {:>9s} {}".format(
	name, w, human_seconds(results[-1][0]), comment))

	# print sorted results
	results.sort()
	mint = results[0][0] # minimal time
	ratios = ["%5.2f" % (t / mint,) for t, _ in results]
	maxratio_width = max(len(r) for r in ratios)
	# header
	print("{:{}s} {:>9s} {:>{}s} {}".format(
	"name", w, "time", "ratio", maxratio_width, "comment"))
	ratios = [s.rjust(maxratio_width) for s in ratios]
	for (t, name), ratio in zip(results, ratios):
	print("{:{}s} {:>9s} {} {}".format(
	name, w, human_seconds(t), ratio, comment))


	def get_functions_with_prefix(prefix, module=None):
	if module is None: # use caller's module
	modname = inspect.currentframe().f_back.f_globals['__name__']
	module = sys.modules[modname]
	all_funcs = inspect.getmembers(module, inspect.isfunction)
	return [f for name, f in all_funcs if name.startswith(prefix)]
	name time ratio comment
	condense_hynekcer 5.79 msec 1.00 lst_OP
	condense_hynekcer2 7.4 msec 1.28 lst_OP
	condense_pillmuncher2 11.5 msec 1.99 lst_OP
	condense_blckknght 16.8 msec 2.91 lst_OP
	condense_jfs 26 msec 4.49 lst_OP
	condense_BK 30.5 msec 5.28 lst_OP
	condense_blckknght2 30.9 msec 5.34 lst_OP
	condense_blckknght3 32.5 msec 5.62 lst_OP

	name time ratio comment
	condense_blckknght 964 usec 1.00 lst_BK
	condense_blckknght2 1.41 msec 1.47 lst_BK
	condense_blckknght3 1.46 msec 1.51 lst_BK
	condense_hynekcer2 1.95 msec 2.03 lst_BK
	condense_pillmuncher2 2.1 msec 2.18 lst_BK
	condense_hynekcer 2.12 msec 2.20 lst_BK
	condense_BK 3.39 msec 3.52 lst_BK
	condense_jfs 544 msec 563.66 lst_BK

	name time ratio comment
	condense_hynekcer 6.86 msec 1.00 lst_rnd
	condense_jfs 16.8 msec 2.44 lst_rnd
	condense_blckknght 28.6 msec 4.16 lst_rnd
	condense_blckknght2 56.1 msec 8.18 lst_rnd
	condense_blckknght3 56.3 msec 8.21 lst_rnd
	condense_BK 70.2 msec 10.23 lst_rnd
	condense_pillmuncher2 324 msec 47.22 lst_rnd
	condense_hynekcer2 334 msec 48.61 lst_rnd