Last active
June 21, 2018 09:50
-
-
Save tzickel/4a81503acdb843dab4f03cfe950e84f3 to your computer and use it in GitHub Desktop.
This is a benchmark that compares something you might want to do with a multipart result that is large.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Benchmark for https://github.com/requests/toolbelt/pull/222 | |
from requests_toolbelt import MultipartStreamDecoder, MultipartDecoder | |
def generate_data(bonudary, length, chunk_size): | |
yield b'\r\n--%s\r\n' % bonudary | |
yield b'Content-Disposition: form-data; name="metadata"; filename="metadata"\r\n' | |
yield b'Content-Type: text/plain\r\n' | |
yield b'\r\n' | |
yield str(length).encode('utf-8') | |
yield b'\r\n--%s\r\n\r\n' % bonudary | |
i = length | |
while i > 0: | |
data_length = min(i, chunk_size) | |
yield b'd' * data_length | |
i -= data_length | |
yield b'\r\n--%s--\r\n' % bonudary | |
def streaming(data): | |
with MultipartStreamDecoder(lambda: next(data), content_type) as mp: | |
parts = iter(mp) | |
metadata = next(parts).content | |
# You can think here you might parse some json and calculate the | |
# length instead of this simple code | |
length = int(metadata) | |
datapart = next(parts) | |
# This code does much less memory presure than the non streaming | |
# version | |
arr = bytearray(length) | |
i = 0 | |
for stream in datapart: | |
stream_length = len(stream) | |
arr[i:i + stream_length] = stream | |
i += stream_length | |
return arr | |
def nonstreaming(data): | |
mp = MultipartDecoder(b''.join(data), content_type).parts | |
metadata = mp[0].content | |
data = mp[1].content | |
return data | |
if __name__ == "__main__": | |
import time | |
boundary = b'd' * 70 | |
content_type = (b'Multipart/Related; boundary="%s"' % boundary).decode('utf-8') | |
data_length = 1024 * 1024 * 1024 #200 * 1024 * 1024 | |
chunk_size = 20 * 1024 * 1024 | |
output1 = output2 = None | |
if True: | |
data = generate_data(boundary, data_length, chunk_size) | |
s = time.time() | |
output1 = streaming(data) | |
e = time.time() | |
print(e - s) | |
if True: | |
data = generate_data(boundary, data_length, chunk_size) | |
s = time.time() | |
output2 = nonstreaming(data) | |
e = time.time() | |
print(e - s) | |
if output1 and output2: | |
print(output1 == output2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment