Last active
November 11, 2019 14:06
-
-
Save reata/7894016938bc4110bb5516cdd87fe1a0 to your computer and use it in GitHub Desktop.
Python中查询MySQL是CPU密集型还是IO密集型任务,能用多线程来加速吗?
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import socket | |
import time | |
from multiprocessing import Pool as ProcessPool | |
from multiprocessing.dummy import Pool as ThreadPool | |
import pymysql | |
import psutil | |
class Timer: | |
def __enter__(self): | |
self.start = time.time() | |
return self | |
def __exit__(self, exc_type, exc_val, exc_tb): | |
self.duration = (time.time() - self.start) * 1000 | |
def query_mysql(sql): | |
with Timer() as timer: | |
conn = pymysql.connect(user="foo", password="bar") | |
with conn.cursor() as cursor: | |
cursor.execute(sql) | |
return timer.duration | |
def io_intensive_task(host): | |
with Timer() as timer: | |
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |
s.connect((host, 80)) | |
return timer.duration | |
def cpu_intensive_task(cnt): | |
with Timer() as timer: | |
count = 0 | |
for i in range(cnt): | |
count += i | |
return timer.duration | |
loops = 20 | |
thread_pool = ThreadPool(psutil.cpu_count(logical=False)) | |
process_pool = ProcessPool(psutil.cpu_count(logical=False)) | |
def parallel_execute(func, param, multi_process=True): | |
pool = process_pool if multi_process else thread_pool | |
result = pool.map(func, [param] * loops) | |
return sum(result) / len(result) | |
for f, p in [(io_intensive_task, "www.baidu.com"), (cpu_intensive_task, 1000000), | |
(query_mysql, "select * from mysql.db")]: | |
print("=" * 50) | |
print(f"start executing function {f.__name__}({p})") | |
r = [f(p) for _ in range(loops)] | |
print("serial execution: " + str(sum(r) / len(r)) + " ms") | |
# parallel execution with multi thread | |
print("threads parallel: " + str(parallel_execute(f, p, False)) + " ms") | |
# parallel execution with multi process | |
print("process parallel: " + str(parallel_execute(f, p, True)) + " ms") | |
``` | |
================================================== | |
start executing function io_intensive_task(www.baidu.com) | |
serial execution: 17.513811588287354 ms | |
threads parallel: 16.531288623809814 ms | |
process parallel: 18.19998025894165 ms | |
================================================== | |
start executing function cpu_intensive_task(1000000) | |
serial execution: 31.85817003250122 ms | |
threads parallel: 169.74856853485107 ms | |
process parallel: 33.473050594329834 ms | |
================================================== | |
start executing function query_mysql(select * from mysql.db) | |
serial execution: 0.8367657661437988 ms | |
threads parallel: 3.7558913230895996 ms | |
process parallel: 0.8831620216369629 ms | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
很容易误认为数据库查询都是IO操作。而纯IO的任务,在Python中是不受GIL的制约,可以真正的并行执行。但通过上面这个例子,可以发现不是这样。
当Web应用中view函数的逻辑都类似query_mysql,主要是去查询数据库时,如果WebServer并行模式只开了线程,而没有多进程,性能会受到很大限制: