Skip to content

Instantly share code, notes, and snippets.

Created December 18, 2012 19:09
Show Gist options
  • Save anonymous/4330943 to your computer and use it in GitHub Desktop.
Save anonymous/4330943 to your computer and use it in GitHub Desktop.
"""
http://stackoverflow.com/q/13932893/190597
using_bytearray is slightly faster than using_struct
|------------------------------+--------------------|
| using_loop_20480 | 8.61 msec per loop |<- chunksize = 1024*20
| using_loop_with_struct_20480 | 8.92 msec per loop |
| using_loop_1024 | 9.12 msec per loop |<- chunksize = 1024
| using_loop_with_struct_1024 | 9.6 msec per loop |
| using_bytearray_1024 | 29.2 msec per loop |
| using_bytearray_20480 | 29.3 msec per loop |
| using_struct_20480 | 31.1 msec per loop |
| using_struct_1024 | 31.3 msec per loop |
| using_loop_8 | 47.7 msec per loop |<- chunksize = 1 byte
| using_loop_with_struct_8 | 54.7 msec per loop |
| using_bytearray_8 | 99.3 msec per loop |
| using_struct_8 | 115 msec per loop |
|------------------------------+--------------------|
"""
import os
import struct
import functools
import itertools as IT
def using_struct(output, chunksize):
with open(filename, 'r') as f, open(output, 'wb') as g:
readchunk = functools.partial(f.read, chunksize)
chunks = iter(readchunk, '')
for chunk in chunks:
chunk = iter(chunk)
bytes = [int(eightbytes,2)
for eightbytes in
iter(lambda: ''.join(IT.islice(chunk, 8)), '')]
g.write(struct.pack('{n}B'.format(n = len(bytes)), *bytes))
def using_bytearray(output, chunksize):
with open(filename, 'r') as f, open(output, 'wb') as g:
readchunk = functools.partial(f.read, chunksize)
chunks = iter(readchunk, '')
for chunk in chunks:
chunk = iter(chunk)
bytes = [int(eightbytes,2)
for eightbytes in
iter(lambda: ''.join(IT.islice(chunk, 8)), '')]
g.write(bytearray(bytes))
def using_loop(output, chunksize):
with open(filename, 'r') as f, open(output, 'wb') as g:
while True:
chunk = f.read(chunksize)
if chunk == '':
break
bytes = [int(chunk[i:i+8], 2)
for i in range(0, len(chunk), 8)]
g.write(bytearray(bytes))
def using_loop_with_struct(output, chunksize):
with open(filename, 'r') as f, open(output, 'wb') as g:
while True:
chunk = f.read(chunksize)
if chunk == '':
break
bytes = [int(chunk[i:i+8], 2)
for i in range(0, len(chunk), 8)]
g.write(struct.pack('{n}B'.format(n = len(bytes)), *bytes))
output = os.path.expanduser('~/tmp/hexdata')
filename = os.path.expanduser('~/tmp/data')
with open(filename, 'w') as f:
content = '0101111011100011001'
f.write(content*10000)
funcs = []
for chunksize in (8, 1024, 1024*20):
for func in [using_struct, using_bytearray, using_loop,
using_loop_with_struct]:
f = functools.partial(func, chunksize = chunksize)
f.func_name = '{n}_{c}'.format(n = func.func_name, c = chunksize)
funcs.append(f)
vars()[f.func_name] = f
if __name__=='__main__':
output1 = os.path.expanduser('~/tmp/hexdata1')
output2 = os.path.expanduser('~/tmp/hexdata2')
output3 = os.path.expanduser('~/tmp/hexdata3')
using_struct(output1, chunksize = 1024)
using_bytearray(output2, chunksize = 1024)
using_loop(output3, chunksize = 1024)
with open(output1, 'rb') as f, open(output2, 'rb') as g, open(output3, 'rb') as h:
fout = f.read()
gout = g.read()
hout = h.read()
assert fout == gout
assert fout == hout
modname, ext = os.path.splitext(os.path.basename(__file__))
report = ut.report(funcs, 'import {m} as m'.format(m = modname),
'm.{f}(m.output)')
print(report)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment