Skip to content

Instantly share code, notes, and snippets.

@reata
Last active November 11, 2019 14:06
Show Gist options
  • Save reata/7894016938bc4110bb5516cdd87fe1a0 to your computer and use it in GitHub Desktop.
Save reata/7894016938bc4110bb5516cdd87fe1a0 to your computer and use it in GitHub Desktop.
Python中查询MySQL是CPU密集型还是IO密集型任务,能用多线程来加速吗?
import socket
import time
from multiprocessing import Pool as ProcessPool
from multiprocessing.dummy import Pool as ThreadPool
import pymysql
import psutil
class Timer:
def __enter__(self):
self.start = time.time()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.duration = (time.time() - self.start) * 1000
def query_mysql(sql):
with Timer() as timer:
conn = pymysql.connect(user="foo", password="bar")
with conn.cursor() as cursor:
cursor.execute(sql)
return timer.duration
def io_intensive_task(host):
with Timer() as timer:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((host, 80))
return timer.duration
def cpu_intensive_task(cnt):
with Timer() as timer:
count = 0
for i in range(cnt):
count += i
return timer.duration
loops = 20
thread_pool = ThreadPool(psutil.cpu_count(logical=False))
process_pool = ProcessPool(psutil.cpu_count(logical=False))
def parallel_execute(func, param, multi_process=True):
pool = process_pool if multi_process else thread_pool
result = pool.map(func, [param] * loops)
return sum(result) / len(result)
for f, p in [(io_intensive_task, "www.baidu.com"), (cpu_intensive_task, 1000000),
(query_mysql, "select * from mysql.db")]:
print("=" * 50)
print(f"start executing function {f.__name__}({p})")
r = [f(p) for _ in range(loops)]
print("serial execution: " + str(sum(r) / len(r)) + " ms")
# parallel execution with multi thread
print("threads parallel: " + str(parallel_execute(f, p, False)) + " ms")
# parallel execution with multi process
print("process parallel: " + str(parallel_execute(f, p, True)) + " ms")
```
==================================================
start executing function io_intensive_task(www.baidu.com)
serial execution: 17.513811588287354 ms
threads parallel: 16.531288623809814 ms
process parallel: 18.19998025894165 ms
==================================================
start executing function cpu_intensive_task(1000000)
serial execution: 31.85817003250122 ms
threads parallel: 169.74856853485107 ms
process parallel: 33.473050594329834 ms
==================================================
start executing function query_mysql(select * from mysql.db)
serial execution: 0.8367657661437988 ms
threads parallel: 3.7558913230895996 ms
process parallel: 0.8831620216369629 ms
```
@reata
Copy link
Author

reata commented Nov 10, 2019

很容易误认为数据库查询都是IO操作。而纯IO的任务,在Python中是不受GIL的制约,可以真正的并行执行。但通过上面这个例子,可以发现不是这样。

当Web应用中view函数的逻辑都类似query_mysql,主要是去查询数据库时,如果WebServer并行模式只开了线程,而没有多进程,性能会受到很大限制:

  • 对于uwsgi,可以通过--processes指定子进程数,--threads指定每个进程的线程数。
  • 对于gunicorn,在默认的sync模式下。可以通过--workers指定子进程数,--threads指定每个进程的线程数。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment