/timeit_bytearray_vs_struct.py

## timeit_bytearray_vs_struct.py
"""
http://stackoverflow.com/q/13932893/190597

using_bytearray is slightly faster than using_struct

|------------------------------+--------------------|
|             using_loop_20480 | 8.61 msec per loop |<- chunksize = 1024*20
| using_loop_with_struct_20480 | 8.92 msec per loop |
|              using_loop_1024 | 9.12 msec per loop |<- chunksize = 1024
|  using_loop_with_struct_1024 |  9.6 msec per loop |
|         using_bytearray_1024 | 29.2 msec per loop |
|        using_bytearray_20480 | 29.3 msec per loop |
|           using_struct_20480 | 31.1 msec per loop |
|            using_struct_1024 | 31.3 msec per loop |
|                 using_loop_8 | 47.7 msec per loop |<- chunksize = 1 byte
|     using_loop_with_struct_8 | 54.7 msec per loop |
|            using_bytearray_8 | 99.3 msec per loop |
|               using_struct_8 |  115 msec per loop |
|------------------------------+--------------------|

"""

import os
import struct
import functools
import itertools as IT

def using_struct(output, chunksize):
    with open(filename, 'r') as f, open(output, 'wb') as g:
        readchunk = functools.partial(f.read, chunksize)
        chunks = iter(readchunk, '')
        for chunk in chunks:
            chunk = iter(chunk)
            bytes = [int(eightbytes,2)
                     for eightbytes in
                     iter(lambda: ''.join(IT.islice(chunk, 8)), '')]
            g.write(struct.pack('{n}B'.format(n = len(bytes)), *bytes))

def using_bytearray(output, chunksize):
    with open(filename, 'r') as f, open(output, 'wb') as g:
        readchunk = functools.partial(f.read, chunksize)
        chunks = iter(readchunk, '')
        for chunk in chunks:
            chunk = iter(chunk)
            bytes = [int(eightbytes,2)
                     for eightbytes in
                     iter(lambda: ''.join(IT.islice(chunk, 8)), '')]
            g.write(bytearray(bytes))

def using_loop(output, chunksize):
    with open(filename, 'r') as f, open(output, 'wb') as g:
        while True:
            chunk = f.read(chunksize)
            if chunk == '':
                break
            bytes = [int(chunk[i:i+8], 2)
                     for i in range(0, len(chunk), 8)]
            g.write(bytearray(bytes))

def using_loop_with_struct(output, chunksize):
    with open(filename, 'r') as f, open(output, 'wb') as g:
        while True:
            chunk = f.read(chunksize)
            if chunk == '':
                break
            bytes = [int(chunk[i:i+8], 2)
                     for i in range(0, len(chunk), 8)]
            g.write(struct.pack('{n}B'.format(n = len(bytes)), *bytes))

output = os.path.expanduser('~/tmp/hexdata')
filename = os.path.expanduser('~/tmp/data')
with open(filename, 'w') as f:
    content = '0101111011100011001'
    f.write(content*10000)

funcs = []
for chunksize in (8, 1024, 1024*20):
    for func in [using_struct, using_bytearray, using_loop,
                 using_loop_with_struct]:
        f = functools.partial(func, chunksize = chunksize)
        f.func_name = '{n}_{c}'.format(n = func.func_name, c = chunksize)
        funcs.append(f)
        vars()[f.func_name] = f

if __name__=='__main__':
    output1 = os.path.expanduser('~/tmp/hexdata1')
    output2 = os.path.expanduser('~/tmp/hexdata2')
    output3 = os.path.expanduser('~/tmp/hexdata3')
    using_struct(output1, chunksize = 1024)
    using_bytearray(output2, chunksize = 1024)
    using_loop(output3, chunksize = 1024)
    with open(output1, 'rb') as f, open(output2, 'rb') as g, open(output3, 'rb') as h:
        fout = f.read()
        gout = g.read()
        hout = h.read()
        assert fout == gout
        assert fout == hout

    modname, ext = os.path.splitext(os.path.basename(__file__))
    report = ut.report(funcs, 'import {m} as m'.format(m = modname),
                     'm.{f}(m.output)')
    print(report)
	"""
	http://stackoverflow.com/q/13932893/190597

	using_bytearray is slightly faster than using_struct

	\|------------------------------+--------------------\|
	\| using_loop_20480 \| 8.61 msec per loop \|<- chunksize = 1024*20
	\| using_loop_with_struct_20480 \| 8.92 msec per loop \|
	\| using_loop_1024 \| 9.12 msec per loop \|<- chunksize = 1024
	\| using_loop_with_struct_1024 \| 9.6 msec per loop \|
	\| using_bytearray_1024 \| 29.2 msec per loop \|
	\| using_bytearray_20480 \| 29.3 msec per loop \|
	\| using_struct_20480 \| 31.1 msec per loop \|
	\| using_struct_1024 \| 31.3 msec per loop \|
	\| using_loop_8 \| 47.7 msec per loop \|<- chunksize = 1 byte
	\| using_loop_with_struct_8 \| 54.7 msec per loop \|
	\| using_bytearray_8 \| 99.3 msec per loop \|
	\| using_struct_8 \| 115 msec per loop \|
	\|------------------------------+--------------------\|

	"""

	import os
	import struct
	import functools
	import itertools as IT

	def using_struct(output, chunksize):
	with open(filename, 'r') as f, open(output, 'wb') as g:
	readchunk = functools.partial(f.read, chunksize)
	chunks = iter(readchunk, '')
	for chunk in chunks:
	chunk = iter(chunk)
	bytes = [int(eightbytes,2)
	for eightbytes in
	iter(lambda: ''.join(IT.islice(chunk, 8)), '')]
	g.write(struct.pack('{n}B'.format(n = len(bytes)), *bytes))

	def using_bytearray(output, chunksize):
	with open(filename, 'r') as f, open(output, 'wb') as g:
	readchunk = functools.partial(f.read, chunksize)
	chunks = iter(readchunk, '')
	for chunk in chunks:
	chunk = iter(chunk)
	bytes = [int(eightbytes,2)
	for eightbytes in
	iter(lambda: ''.join(IT.islice(chunk, 8)), '')]
	g.write(bytearray(bytes))

	def using_loop(output, chunksize):
	with open(filename, 'r') as f, open(output, 'wb') as g:
	while True:
	chunk = f.read(chunksize)
	if chunk == '':
	break
	bytes = [int(chunk[i:i+8], 2)
	for i in range(0, len(chunk), 8)]
	g.write(bytearray(bytes))

	def using_loop_with_struct(output, chunksize):
	with open(filename, 'r') as f, open(output, 'wb') as g:
	while True:
	chunk = f.read(chunksize)
	if chunk == '':
	break
	bytes = [int(chunk[i:i+8], 2)
	for i in range(0, len(chunk), 8)]
	g.write(struct.pack('{n}B'.format(n = len(bytes)), *bytes))

	output = os.path.expanduser('~/tmp/hexdata')
	filename = os.path.expanduser('~/tmp/data')
	with open(filename, 'w') as f:
	content = '0101111011100011001'
	f.write(content*10000)

	funcs = []
	for chunksize in (8, 1024, 1024*20):
	for func in [using_struct, using_bytearray, using_loop,
	using_loop_with_struct]:
	f = functools.partial(func, chunksize = chunksize)
	f.func_name = '{n}_{c}'.format(n = func.func_name, c = chunksize)
	funcs.append(f)
	vars()[f.func_name] = f

	if __name__=='__main__':
	output1 = os.path.expanduser('~/tmp/hexdata1')
	output2 = os.path.expanduser('~/tmp/hexdata2')
	output3 = os.path.expanduser('~/tmp/hexdata3')
	using_struct(output1, chunksize = 1024)
	using_bytearray(output2, chunksize = 1024)
	using_loop(output3, chunksize = 1024)
	with open(output1, 'rb') as f, open(output2, 'rb') as g, open(output3, 'rb') as h:
	fout = f.read()
	gout = g.read()
	hout = h.read()
	assert fout == gout
	assert fout == hout

	modname, ext = os.path.splitext(os.path.basename(__file__))
	report = ut.report(funcs, 'import {m} as m'.format(m = modname),
	'm.{f}(m.output)')
	print(report)