Skip to content

Instantly share code, notes, and snippets.

@dongyuzheng
Last active October 6, 2022 15:35
Show Gist options
  • Save dongyuzheng/861ee339388803cf135c124dc48a6a0b to your computer and use it in GitHub Desktop.
Save dongyuzheng/861ee339388803cf135c124dc48a6a0b to your computer and use it in GitHub Desktop.
Benchmark of serial `requests` vs. `aiohttp` vs. `ThreadPoolExecutor` vs. `ProcessPoolExecutor`
#!/usr/bin/env python3
import asyncio
import time
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from contextlib import contextmanager
import aiohttp
import requests
URL = "https://arcana.io/"
@contextmanager
def timeit(*x):
print("\nstarting", x, flush=True)
start = time.time()
try:
yield
finally:
print(f"done {x} in {time.time() - start:.2f} seconds", flush=True)
def normal_requests(unused=None) -> int:
resp = requests.get(URL)
resp.raise_for_status()
return resp.status_code
def do_normal_requests(N):
return [normal_requests() for _ in range(N)]
async def async_requests() -> int:
resp = requests.get(URL)
resp.raise_for_status()
return resp.status_code
async def do_async_requests(N):
return await asyncio.gather(*(async_requests() for _ in range(N)))
async def async_aiohttp() -> int:
async with aiohttp.ClientSession() as sess:
async with sess.get(URL) as resp:
resp.raise_for_status()
return resp.status
async def do_async_aiohttp(N):
return await asyncio.gather(*(async_aiohttp() for _ in range(N)))
async def async_aiohttp_shared_session(sess) -> int:
async with sess.get(URL) as resp:
resp.raise_for_status()
return resp.status
async def do_async_aiohttp_shared_session(N):
async with aiohttp.ClientSession() as sess:
return await asyncio.gather(
*(async_aiohttp_shared_session(sess) for _ in range(N))
)
def do_thread_pool_executor(N, W):
with ThreadPoolExecutor(max_workers=W) as executor:
return [x for x in executor.map(normal_requests, range(N))]
def do_process_pool_executor(N, W):
with ProcessPoolExecutor(max_workers=W) as executor:
return [x for x in executor.map(normal_requests, range(N))]
def main():
N = 20
LOOP = 3
print(f"For each below, loop {LOOP} times, each loop {N} batch size.")
with timeit("serial"):
for _ in range(LOOP):
print(">", end="", flush=True)
(do_normal_requests(N))
with timeit("async but normal requests"):
for _ in range(LOOP):
print(">", end="", flush=True)
(asyncio.run(do_async_requests(N)))
with timeit("async aiohttp"):
for _ in range(LOOP):
print(">", end="", flush=True)
(asyncio.run(do_async_aiohttp(N)))
with timeit("async aiohttp with shared session"):
for _ in range(LOOP):
print(">", end="", flush=True)
(asyncio.run(do_async_aiohttp_shared_session(N)))
Ws = [1, 2, 3, 5, 10, 15, 20]
for W in Ws:
with timeit("thread pool executor", W):
for _ in range(LOOP):
print(">", end="", flush=True)
(do_thread_pool_executor(N, W))
for W in Ws:
with timeit("process pool executor", W):
for _ in range(LOOP):
print(">", end="", flush=True)
(do_process_pool_executor(N, W))
if __name__ == "__main__":
main()
@dongyuzheng
Copy link
Author

dongyuzheng commented Oct 6, 2022

MacBook Pro (16-inch, 2021)
CPU: Apple M1 Max
RAM: 64 GB

On gigabit internet

For each below, loop 3 times, each loop 20 batch size.

starting ('serial',)
>>>done ('serial',) in 5.47 seconds

starting ('async but normal requests',)
>>>done ('async but normal requests',) in 5.27 seconds

starting ('async aiohttp',)
>>>done ('async aiohttp',) in 0.78 seconds

starting ('async aiohttp with shared session',)
>>>done ('async aiohttp with shared session',) in 0.61 seconds

starting ('thread pool executor', 1)
>>>done ('thread pool executor', 1) in 5.20 seconds

starting ('thread pool executor', 2)
>>>done ('thread pool executor', 2) in 2.95 seconds

starting ('thread pool executor', 3)
>>>done ('thread pool executor', 3) in 2.09 seconds

starting ('thread pool executor', 5)
>>>done ('thread pool executor', 5) in 1.57 seconds

starting ('thread pool executor', 10)
>>>done ('thread pool executor', 10) in 1.03 seconds

starting ('thread pool executor', 15)
>>>done ('thread pool executor', 15) in 0.91 seconds

starting ('thread pool executor', 20)
>>>done ('thread pool executor', 20) in 0.90 seconds

starting ('process pool executor', 1)
>>>done ('process pool executor', 1) in 5.51 seconds

starting ('process pool executor', 2)
>>>done ('process pool executor', 2) in 3.14 seconds

starting ('process pool executor', 3)
>>>done ('process pool executor', 3) in 2.38 seconds

starting ('process pool executor', 5)
>>>done ('process pool executor', 5) in 1.82 seconds

starting ('process pool executor', 10)
>>>done ('process pool executor', 10) in 1.58 seconds

starting ('process pool executor', 15)
>>>done ('process pool executor', 15) in 1.86 seconds

starting ('process pool executor', 20)
>>>done ('process pool executor', 20) in 1.88 seconds

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment