Created
April 24, 2018 07:35
-
-
Save lttzzlll/d095237a05c1eb8bfeb36e1d25837f9f to your computer and use it in GitHub Desktop.
using multiprocessing to check file exists or not
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import subprocess | |
import asyncio | |
from itertools import chain | |
from functools import wraps | |
import concurrent | |
import time | |
import readxml | |
EXE = r"\\ccpsofsep\am_s1\users\v-taotli\code\CollectData\CollectData\CollectData\bin\Debug\CollectData.exe" | |
def timeit(func): | |
@wraps(func) | |
def wrapper(*args, **kwargs): | |
start = time.time() | |
res = func(*args, **kwargs) | |
end = time.time() | |
print('func {} cost: {}'.format(func.__name__, end - start)) | |
return res | |
return wrapper | |
def check(path): | |
args = [EXE, 'Check', path] | |
p = subprocess.Popen(args, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
with p.stdout: | |
# for line in iter(p.stdout.readline, b''): | |
# print(line) | |
res = bool(p.stdout.readline().decode('utf-8')) | |
p.wait() | |
return (path, res) | |
def look(path): | |
args = [EXE, 'Look', path] | |
p = subprocess.Popen(args, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
with p.stdout: | |
# for line in iter(p.stdout.readline, b''): | |
# print(line) | |
res = str(p.stdout.readline().decode('utf-8')) | |
p.wait() | |
return res | |
@timeit | |
def mutliprocess(path_list): | |
with concurrent.futures.ProcessPoolExecutor() as executor: | |
for res in executor.map(check, path_list): | |
print(res) | |
def main(path): | |
print(look(path)) | |
@asyncio.coroutine | |
def co_check(path, res_list): | |
args = [EXE, 'Check', path] | |
p = subprocess.Popen(args, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
with p.stdout: | |
# for line in iter(p.stdout.readline, b''): | |
# print(line) | |
res = bool(p.stdout.readline().decode('utf-8')) | |
p.wait() | |
res_list.append(res) | |
yield | |
@timeit | |
def event(path_list): | |
loop = asyncio.get_event_loop() | |
res_list = [] | |
check_res = [co_check(path, res_list) for path in path_list] | |
loop.run_until_complete(asyncio.gather( | |
*check_res | |
)) | |
loop.close() | |
def test(path): | |
corpus_list = readxml.read_xmllist(path, 'fr-FR') | |
fbls = (fbl[1] for fbl in corpus_list) | |
hyps = (hyp[2] for hyp in corpus_list) | |
res = chain(fbls, hyps) | |
# event(res) | |
mutliprocess(res) | |
if __name__ == '__main__': | |
# main(r'https://cosmos09.osdinfra.net/cosmos/SPIN.Compute/shares/ipe.cu/local/SpeechAMProd/Test/TestScripts/Threshold/ServerN/en-AU/BTEST/ULM/20180410/SpeechLiveTrancribedData_Desktop_Win10Cortana_2017-01-01_2017-01-31-en-au_Test.pcm.ss') | |
# main(r'https://cosmos09.osdinfra.net/cosmos/SPIN.Compute/shares/ipe.cu/local/SpeechAM/E2E/Data/WinpServerCR/ThresholdKeySpot/fr-FR/Merino/20160113201941/CreateCorpus_TRAIN/Desktop_Merino_ThresholdKeySpot_Train_fr-fr_Live_03-03_05-05-2015/') | |
test(r'\\ccpsofsep\am_s1\users\v-amniu\FRA\unified_model\dataCollect\SDTrainConfig.list') |
而且多线程模式不是平滑的。多进程模式来一个请求处理一个请求。虽然慢,但是稳。
多进程也不一定是平滑的。这个和许多其他原因有关。
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
如果将多进程模式变成多线程模式,那么总体上变成了多线程=>多进程模式。原来的进程变得非常不稳定,很容易崩溃。但是多线程+多进程模式是效率最高的。