Skip to content

Instantly share code, notes, and snippets.

@tzickel
Last active June 21, 2018 09:50
Show Gist options
  • Save tzickel/4a81503acdb843dab4f03cfe950e84f3 to your computer and use it in GitHub Desktop.
Save tzickel/4a81503acdb843dab4f03cfe950e84f3 to your computer and use it in GitHub Desktop.
This is a benchmark that compares something you might want to do with a multipart result that is large.
# Benchmark for https://github.com/requests/toolbelt/pull/222
from requests_toolbelt import MultipartStreamDecoder, MultipartDecoder
def generate_data(bonudary, length, chunk_size):
yield b'\r\n--%s\r\n' % bonudary
yield b'Content-Disposition: form-data; name="metadata"; filename="metadata"\r\n'
yield b'Content-Type: text/plain\r\n'
yield b'\r\n'
yield str(length).encode('utf-8')
yield b'\r\n--%s\r\n\r\n' % bonudary
i = length
while i > 0:
data_length = min(i, chunk_size)
yield b'd' * data_length
i -= data_length
yield b'\r\n--%s--\r\n' % bonudary
def streaming(data):
with MultipartStreamDecoder(lambda: next(data), content_type) as mp:
parts = iter(mp)
metadata = next(parts).content
# You can think here you might parse some json and calculate the
# length instead of this simple code
length = int(metadata)
datapart = next(parts)
# This code does much less memory presure than the non streaming
# version
arr = bytearray(length)
i = 0
for stream in datapart:
stream_length = len(stream)
arr[i:i + stream_length] = stream
i += stream_length
return arr
def nonstreaming(data):
mp = MultipartDecoder(b''.join(data), content_type).parts
metadata = mp[0].content
data = mp[1].content
return data
if __name__ == "__main__":
import time
boundary = b'd' * 70
content_type = (b'Multipart/Related; boundary="%s"' % boundary).decode('utf-8')
data_length = 1024 * 1024 * 1024 #200 * 1024 * 1024
chunk_size = 20 * 1024 * 1024
output1 = output2 = None
if True:
data = generate_data(boundary, data_length, chunk_size)
s = time.time()
output1 = streaming(data)
e = time.time()
print(e - s)
if True:
data = generate_data(boundary, data_length, chunk_size)
s = time.time()
output2 = nonstreaming(data)
e = time.time()
print(e - s)
if output1 and output2:
print(output1 == output2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment