tzickel/multipartStreamDecoderBenchmark.py

## multipartStreamDecoderBenchmark.py
# Benchmark for https://github.com/requests/toolbelt/pull/222
from requests_toolbelt import MultipartStreamDecoder, MultipartDecoder


def generate_data(bonudary, length, chunk_size):
    yield b'\r\n--%s\r\n' % bonudary
    yield b'Content-Disposition: form-data; name="metadata"; filename="metadata"\r\n'
    yield b'Content-Type: text/plain\r\n'
    yield b'\r\n'
    yield str(length).encode('utf-8')
    yield b'\r\n--%s\r\n\r\n' % bonudary
    i = length
    while i > 0:
        data_length = min(i, chunk_size)
        yield b'd' * data_length
        i -= data_length
    yield b'\r\n--%s--\r\n' % bonudary


def streaming(data):
    with MultipartStreamDecoder(lambda: next(data), content_type) as mp:
        parts = iter(mp)
        metadata = next(parts).content
        # You can think here you might parse some json and calculate the
        # length instead of this simple code
        length = int(metadata)
        datapart = next(parts)
        # This code does much less memory presure than the non streaming
        # version
        arr = bytearray(length)
        i = 0
        for stream in datapart:
            stream_length = len(stream)
            arr[i:i + stream_length] = stream
            i += stream_length
    return arr


def nonstreaming(data):
    mp = MultipartDecoder(b''.join(data), content_type).parts
    metadata = mp[0].content
    data = mp[1].content
    return data


if __name__ == "__main__":
    import time
    boundary = b'd' * 70
    content_type = (b'Multipart/Related; boundary="%s"' % boundary).decode('utf-8')
    data_length = 1024 * 1024 * 1024 #200 * 1024 * 1024
    chunk_size = 20 * 1024 * 1024
    output1 = output2 = None
    if True:
        data = generate_data(boundary, data_length, chunk_size)
        s = time.time()
        output1 = streaming(data)
        e = time.time()
        print(e - s)
    if True:
        data = generate_data(boundary, data_length, chunk_size)
        s = time.time()
        output2 = nonstreaming(data)
        e = time.time()
        print(e - s)
    if output1 and output2:
        print(output1 == output2)
	# Benchmark for https://github.com/requests/toolbelt/pull/222
	from requests_toolbelt import MultipartStreamDecoder, MultipartDecoder


	def generate_data(bonudary, length, chunk_size):
	yield b'\r\n--%s\r\n' % bonudary
	yield b'Content-Disposition: form-data; name="metadata"; filename="metadata"\r\n'
	yield b'Content-Type: text/plain\r\n'
	yield b'\r\n'
	yield str(length).encode('utf-8')
	yield b'\r\n--%s\r\n\r\n' % bonudary
	i = length
	while i > 0:
	data_length = min(i, chunk_size)
	yield b'd' * data_length
	i -= data_length
	yield b'\r\n--%s--\r\n' % bonudary


	def streaming(data):
	with MultipartStreamDecoder(lambda: next(data), content_type) as mp:
	parts = iter(mp)
	metadata = next(parts).content
	# You can think here you might parse some json and calculate the
	# length instead of this simple code
	length = int(metadata)
	datapart = next(parts)
	# This code does much less memory presure than the non streaming
	# version
	arr = bytearray(length)
	i = 0
	for stream in datapart:
	stream_length = len(stream)
	arr[i:i + stream_length] = stream
	i += stream_length
	return arr


	def nonstreaming(data):
	mp = MultipartDecoder(b''.join(data), content_type).parts
	metadata = mp[0].content
	data = mp[1].content
	return data


	if __name__ == "__main__":
	import time
	boundary = b'd' * 70
	content_type = (b'Multipart/Related; boundary="%s"' % boundary).decode('utf-8')
	data_length = 1024 * 1024 * 1024 #200 * 1024 * 1024
	chunk_size = 20 * 1024 * 1024
	output1 = output2 = None
	if True:
	data = generate_data(boundary, data_length, chunk_size)
	s = time.time()
	output1 = streaming(data)
	e = time.time()
	print(e - s)
	if True:
	data = generate_data(boundary, data_length, chunk_size)
	s = time.time()
	output2 = nonstreaming(data)
	e = time.time()
	print(e - s)
	if output1 and output2:
	print(output1 == output2)