Aluriak/bench_stream_div.py

## bench_stream_div.py
"""
This is a benchmark for a common operation on data : divide a stream
in two distinct parts.

Benchmarks use three techniques, leading for the following results:

        10^3  10^5  10^7   (data size)
    one 0.01  2.58  286
    two 0.03  3.4   339

  ratio 0.33  0.75  0.84

Obviously, usage of zip(*) (method one) is more efficient for small datasets.
No tests beyond 10^7 has been done, maybe the two methods converge
to the same runtime.
More probably the first method overtakes the second at some point.

"""

import timeit
import itertools
from functools import partial


def one(data):
    return tuple(zip(*tuple(data)))

def two(data):
    notes, mss = [], []
    for ms, note in data:
        notes.append(note)
        mss.append(ms)
    return tuple(mss), tuple(notes)


for data_size in (10, 10** 2, 10**3):#, 10**5, 10**7):
    data = tuple((1, 2) for _ in range(data_size))
    assert one(data) == two(data)
    for method in (one, two, tee):
        runtime = round(timeit.timeit(partial(method, data), number=10000), 7)
        print(method.__name__, 'on', data_size, ':', runtime)
	"""
	This is a benchmark for a common operation on data : divide a stream
	in two distinct parts.

	Benchmarks use three techniques, leading for the following results:

	10^3 10^5 10^7 (data size)
	one 0.01 2.58 286
	two 0.03 3.4 339

	ratio 0.33 0.75 0.84

	Obviously, usage of zip(*) (method one) is more efficient for small datasets.
	No tests beyond 10^7 has been done, maybe the two methods converge
	to the same runtime.
	More probably the first method overtakes the second at some point.

	"""

	import timeit
	import itertools
	from functools import partial


	def one(data):
	return tuple(zip(*tuple(data)))

	def two(data):
	notes, mss = [], []
	for ms, note in data:
	notes.append(note)
	mss.append(ms)
	return tuple(mss), tuple(notes)


	for data_size in (10, 10 2, 103):#, 105, 107):
	data = tuple((1, 2) for _ in range(data_size))
	assert one(data) == two(data)
	for method in (one, two, tee):
	runtime = round(timeit.timeit(partial(method, data), number=10000), 7)
	print(method.__name__, 'on', data_size, ':', runtime)