orangle/asyncio_1000_benckmark.py

## asyncio_1000_benckmark.py
import asyncio
import aiohttp
import time

async def make_request(session, qty_req, req_n):
    url = "http://10.86.36.223:8080"
    for idx in range(qty_req):
        async with session.get(url) as resp:
            if resp.status == 200:
                await resp.text()


async def main():
    n_threads = 20
    n_requests = 1000
    start = time.time()
    async with aiohttp.ClientSession() as session:
        await asyncio.gather(
            *[make_request(session, int(n_requests/n_threads), i) for i in range(n_threads)]
        )
    end = time.time()
    total = end-start
    print("took %02.02f seconds" % (total))
    print("%02.02f requests x second" % (n_requests*1.0/total))
    print("average requests time %02.04f s" % (total*1.0/n_requests))
    print("|python|async-workers| %02.02f s | %02.02f |  %02.04f s| %02.04f s| (units in seconds here)" % (
        total,
        n_requests*1.0/total,
        n_threads*total*1.0/n_requests,
        total*1.0/n_requests
    ))

loop = asyncio.get_event_loop()
loop.run_until_complete(main())

## blog-en.md

      
    Raw
  

              blog-en.md
            
          
    Multithreading Consumption and Efficiency Issues

Server Benchmark (4C2G, Python version: 3.7)

ab -n 5000 -c 100 http://10.86.36.223:8080/

Nginx service, using Lua to return "hello"
Results

Requests per second:    3325.63 [#/sec] (mean)
Time per request:       30.069 [ms] (mean)
Time per request:       0.301 [ms] (mean, across all concurrent requests)

Round 1

thread_1000_benchmark.py
One Thread with 10,000 Requests

sar -P ALL 1

Review Results

09:22:07 PM     CPU     %user     %nice   %system   %iowait    %steal     %idle
09:22:08 PM       0     71.93      0.00      1.75      0.00     17.54      8.77
09:22:09 PM       0     67.27      0.00      1.82      0.00     21.82      9.09
09:22:10 PM       0     65.31      0.00      2.04      0.00     22.45     10.20
09:22:11 PM       0     71.43      0.00      1.79      0.00     12.50     14.29
09:22:15 PM       0     85.29      0.00      1.47      0.00      2.94     10.29
09:22:23 PM       0     85.25      0.00      1.64      0.00      4.92      8.20

Results

took 24.35 seconds
410.75 requests x second
average requests time 0.0024 s

It can be observed that only one core is being utilized, while the others remain idle at 0%. The user and steal percentages are relatively high, while the system usage is minimal. It is mostly CPU consumption, even though network waiting is not significant.
1,000 Threads with 10,000 Requests

09:31:51 PM     CPU     %user     %nice   %system   %iowait    %steal     %idle
09:31:52 PM       0     28.26      0.00      4.35      0.00      4.35     63.04
09:31:53 PM       0     13.75      0.00      2.50      0.00      7.50     76.25
09:31:53 PM       1      7.14      0.00      1.19      0.00      7.14     84.52
09:31:53 PM       2      6.82      0.00      1.14      0.00      4.55     87.50

09:31:55 PM       0     11.54      0.00      0.00      0.00      8.97     79.49
09:31:55 PM       1      7.32      0.00      0.00      0.00      8.54     84.15
09:31:55 PM       2      4.55      0.00      0.00      0.00      6.82     88.64
09:31:55 PM       3      4.35      0.00      1.09      0.00      4.35     90.22

09:31:56 PM       0     11.11      0.00      2.47      0.00      8.64     77.78
09:31:56 PM       1     12.79      0.00      1.16      0.00      5.81     80.23
09:31:56 PM       2      6.59      0.00      2.20      0.00      5.49     85.71
09:31:56 PM       3      5.49      0.00      0.00      0.00      6.59     87.91

09:32:02 PM       0     12.79      0.00      1.16      1.16     19.77     65.12
09:32:02 PM       1      8.79      0.00      2.20      0.00     18.68     70.33
09:32:02 PM       2      3.06      0.00      0.00      0.00     13.27     83.67
09:32:02 PM       3     11.34      0.00      1.03      1.03     16.49     70.10

09:32:09 PM       0      7.41      0.00      1.23      0.00     16.05     75.31
09:32:09 PM       1      9.20      0.00      1.15      0.00     10.34     79.31
09:32:09 PM       2      3.37      0.00      0.00      0.00      5.62     91.01
09:32:09 PM       3      4.44      0.00      1.11      0.00      5.56     88.89

09:32:22 PM       0      8.54      0.00      3.66      0.00     12.20     75.61
09:32:22 PM       1      7.23      0.00      2.41      0.00      7.23     83.13
09:32:22 PM       2      4.21      0.00      3.16      0.00      4.21     88.42
09:32:22 PM       3      8.05      0.00      2.30      0.00      6.90     82.76

Results

took 30.50 seconds
327.91 requests x second
average requests time 0.0030 s

Overall performance has worsened, with other cores also showing CPU usage, but none reaching full capacity. System and iowait percentages are higher than before.
Round 2

asyncio_1000_benckmark.py
One Thread with 10,000 Requests

CPU Statistics

09:49:33 PM     CPU     %user     %nice   %system   %iowait    %steal     %idle
09:49:27 PM       0     77.05      0.00      3.28      0.00     11.48      8.20
09:49:28 PM       0     74.60      0.00      3.17      0.00     14.29      7.94
09:49:30 PM       0     81.48      0.00      5.56      0.00      3.70      9.26
09:49:33 PM       0     83.33      0.00      3.03      0.00      7.58      6.06
09:49:34 PM       0     78.95      0.00      3.51      0.00      7.02     10.53

Results

took 14.25 seconds
702.00 requests x second
average requests time 0.0014 s
|python|async-workers| 14.25 s | 702.00 |  0.0014 s| 0.0014 s| (units in seconds here)

This is significantly faster than multithreading and, like before, only a single CPU is being used, with other CPUs idle at 0%. The system usage is higher than with multithreading.
1,000 Threads with 10,000 Requests

CPU Statistics

09:55:58 PM     CPU     %user     %nice   %system   %iowait    %steal     %idle
09:55:59 PM       2     73.00      0.00      4.00      0.00      1.00     22.00
09:56:01 PM       2     97.94      0.00      2.06      0.00      0.00      0.00
09:56:03 PM       2     97.96      0.00      2.04      0.00      0.00      0.00
09:56:06 PM       3     97.98      0.00      2.02      0.00      0.00      0.00
09:56:07 PM       3     98.00      0.00      2.00      0.00      0.00      0.00
09:56:10 PM       3     97.98      0.00      2.02      0.00      0.00      0.00

Results

took 12.18 seconds
820.80 requests x second
average requests time 0.0012 s
|python|async-workers| 12.18 s | 820.80 |  1.2183 s| 0.0012 s| (units in seconds here)

This result is the fastest of all, with only a single CPU core being utilized at a high rate (100%), and there was even a switch between CPU cores during the process.
Acknowledgments

Thank you to Hashnode article for providing a comparative benchmark between asynchronous and threaded approaches in Python.

  
## blog.md

      
    Raw
  

              blog.md
            
          
    多线程情况下消耗和效率问题

服务端基准测试(4C2G, python version:3.7)
ab -n 5000 -c 100 http://10.86.36.223:8080/

nginx服务，使用lua返回hello
结果
Requests per second:    3325.63 [#/sec] (mean)
Time per request:       30.069 [ms] (mean)
Time per request:       0.301 [ms] (mean, across all concurrent requests)

round1

thread_1000_benckmark.py
一个线程10000个请求

sar -P ALL 1

查看结果
09:22:07 PM     CPU     %user     %nice   %system   %iowait    %steal     %idle
09:22:08 PM       0     71.93      0.00      1.75      0.00     17.54      8.77
09:22:09 PM       0     67.27      0.00      1.82      0.00     21.82      9.09
09:22:10 PM       0     65.31      0.00      2.04      0.00     22.45     10.20
09:22:11 PM       0     71.43      0.00      1.79      0.00     12.50     14.29
09:22:15 PM       0     85.29      0.00      1.47      0.00      2.94     10.29
09:22:23 PM       0     85.25      0.00      1.64      0.00      4.92      8.20

跑出的结果
took 24.35 seconds
410.75 requests x second
average requests time 0.0024 s

可以看到，只有单核心在使用，其他核心都是0， user, steal比较高，system比较少。几乎都是cpu在消耗，虽然是网络等待不多。
1000个线程10000个请求

09:31:51 PM     CPU     %user     %nice   %system   %iowait    %steal     %idle
09:31:52 PM       0     28.26      0.00      4.35      0.00      4.35     63.04
09:31:53 PM       0     13.75      0.00      2.50      0.00      7.50     76.25
09:31:53 PM       1      7.14      0.00      1.19      0.00      7.14     84.52
09:31:53 PM       2      6.82      0.00      1.14      0.00      4.55     87.50

09:31:55 PM       0     11.54      0.00      0.00      0.00      8.97     79.49
09:31:55 PM       1      7.32      0.00      0.00      0.00      8.54     84.15
09:31:55 PM       2      4.55      0.00      0.00      0.00      6.82     88.64
09:31:55 PM       3      4.35      0.00      1.09      0.00      4.35     90.22

09:31:56 PM       0     11.11      0.00      2.47      0.00      8.64     77.78
09:31:56 PM       1     12.79      0.00      1.16      0.00      5.81     80.23
09:31:56 PM       2      6.59      0.00      2.20      0.00      5.49     85.71
09:31:56 PM       3      5.49      0.00      0.00      0.00      6.59     87.91

09:32:02 PM       0     12.79      0.00      1.16      1.16     19.77     65.12
09:32:02 PM       1      8.79      0.00      2.20      0.00     18.68     70.33
09:32:02 PM       2      3.06      0.00      0.00      0.00     13.27     83.67
09:32:02 PM       3     11.34      0.00      1.03      1.03     16.49     70.10

09:32:09 PM       0      7.41      0.00      1.23      0.00     16.05     75.31
09:32:09 PM       1      9.20      0.00      1.15      0.00     10.34     79.31
09:32:09 PM       2      3.37      0.00      0.00      0.00      5.62     91.01
09:32:09 PM       3      4.44      0.00      1.11      0.00      5.56     88.89

09:32:22 PM       0      8.54      0.00      3.66      0.00     12.20     75.61
09:32:22 PM       1      7.23      0.00      2.41      0.00      7.23     83.13
09:32:22 PM       2      4.21      0.00      3.16      0.00      4.21     88.42
09:32:22 PM       3      8.05      0.00      2.30      0.00      6.90     82.76

跑出的结果
took 30.50 seconds
327.91 requests x second
average requests time 0.0030 s

整体的性能更差了，可以看到其他核也有cpu使用，所有核都跑不满, system 和 iowait 也都比之前高
round2

asyncio_1000_benckmark.py
一个线程10000个请求

cpu统计
09:49:33 PM     CPU     %user     %nice   %system   %iowait    %steal     %idle
09:49:27 PM       0     77.05      0.00      3.28      0.00     11.48      8.20
09:49:28 PM       0     74.60      0.00      3.17      0.00     14.29      7.94
09:49:30 PM       0     81.48      0.00      5.56      0.00      3.70      9.26
09:49:33 PM       0     83.33      0.00      3.03      0.00      7.58      6.06
09:49:34 PM       0     78.95      0.00      3.51      0.00      7.02     10.53

跑出结果
took 14.25 seconds
702.00 requests x second
average requests time 0.0014 s
|python|async-workers| 14.25 s | 702.00 |  0.0014 s| 0.0014 s| (units in seconds here)

这比多线程快多了，也是只能使用单个cpu，其他cpu都是0，system比多线程要使用的多。
1000个线程10000个请求

cpu统计
09:55:58 PM     CPU     %user     %nice   %system   %iowait    %steal     %idle
09:55:59 PM       2     73.00      0.00      4.00      0.00      1.00     22.00
09:56:01 PM       2     97.94      0.00      2.06      0.00      0.00      0.00
09:56:03 PM       2     97.96      0.00      2.04      0.00      0.00      0.00
09:56:06 PM       3     97.98      0.00      2.02      0.00      0.00      0.00
09:56:07 PM       3     98.00      0.00      2.00      0.00      0.00      0.00
09:56:10 PM       3     97.98      0.00      2.02      0.00      0.00      0.00

跑出结果
took 12.18 seconds
820.80 requests x second
average requests time 0.0012 s
|python|async-workers| 12.18 s | 820.80 |  1.2183 s| 0.0012 s| (units in seconds here)

这是最快的一个结果，也是只有单个cpu有个使用率，要比之前的几种情况使用率都高（100%)，中间还切换了一次cpu核。
以上结果多次执行，基本一致。
感谢： https://hashnode.com/post/benchmarking-async-vs-thread-in-python-cjw1pjq4l000ue1s1eytt501l

  
## thread_1000_benckmark.py
# multiple_sync_request_threaded.py

import threading
import requests
import time

def make_requests(session, n, url):
    for i in range(n):
        resp = session.get(url)
        if resp.status_code == 200:
            pass

def main():
    n_threads = 1
    n_requests = 10000
    n_requests_per_thread = n_requests // n_threads

    url = "http://10.86.36.223:8080"
    session = requests.Session()

    threads = [
        threading.Thread(
            target=make_requests,
            args=(session, n_requests_per_thread, url)
        ) for i in range(n_threads)
    ]

    start = time.time()
    for t in threads:
        t.start()
    for t in threads:
        t.join()
    end = time.time()
    total = end-start
    print("took %02.02f seconds" % (total))
    print("%02.02f requests x second" % (n_requests*1.0/total))
    print("average requests time %02.04f s" % (total*1.0/n_requests))
main()
	import asyncio
	import aiohttp
	import time

	async def make_request(session, qty_req, req_n):
	url = "http://10.86.36.223:8080"
	for idx in range(qty_req):
	async with session.get(url) as resp:
	if resp.status == 200:
	await resp.text()


	async def main():
	n_threads = 20
	n_requests = 1000
	start = time.time()
	async with aiohttp.ClientSession() as session:
	await asyncio.gather(
	*[make_request(session, int(n_requests/n_threads), i) for i in range(n_threads)]
	)
	end = time.time()
	total = end-start
	print("took %02.02f seconds" % (total))
	print("%02.02f requests x second" % (n_requests*1.0/total))
	print("average requests time %02.04f s" % (total*1.0/n_requests))
	print("\|python\|async-workers\| %02.02f s \| %02.02f \| %02.04f s\| %02.04f s\| (units in seconds here)" % (
	total,
	n_requests*1.0/total,
	n_threadstotal1.0/n_requests,
	total*1.0/n_requests
	))

	loop = asyncio.get_event_loop()
	loop.run_until_complete(main())
	# multiple_sync_request_threaded.py

	import threading
	import requests
	import time

	def make_requests(session, n, url):
	for i in range(n):
	resp = session.get(url)
	if resp.status_code == 200:
	pass

	def main():
	n_threads = 1
	n_requests = 10000
	n_requests_per_thread = n_requests // n_threads

	url = "http://10.86.36.223:8080"
	session = requests.Session()

	threads = [
	threading.Thread(
	target=make_requests,
	args=(session, n_requests_per_thread, url)
	) for i in range(n_threads)
	]

	start = time.time()
	for t in threads:
	t.start()
	for t in threads:
	t.join()
	end = time.time()
	total = end-start
	print("took %02.02f seconds" % (total))
	print("%02.02f requests x second" % (n_requests*1.0/total))
	print("average requests time %02.04f s" % (total*1.0/n_requests))
	main()