Skip to content

Instantly share code, notes, and snippets.

@lttzzlll
Created April 24, 2018 07:35
Show Gist options
  • Save lttzzlll/d095237a05c1eb8bfeb36e1d25837f9f to your computer and use it in GitHub Desktop.
Save lttzzlll/d095237a05c1eb8bfeb36e1d25837f9f to your computer and use it in GitHub Desktop.
using multiprocessing to check file exists or not
import subprocess
import asyncio
from itertools import chain
from functools import wraps
import concurrent
import time
import readxml
EXE = r"\\ccpsofsep\am_s1\users\v-taotli\code\CollectData\CollectData\CollectData\bin\Debug\CollectData.exe"
def timeit(func):
@wraps(func)
def wrapper(*args, **kwargs):
start = time.time()
res = func(*args, **kwargs)
end = time.time()
print('func {} cost: {}'.format(func.__name__, end - start))
return res
return wrapper
def check(path):
args = [EXE, 'Check', path]
p = subprocess.Popen(args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
with p.stdout:
# for line in iter(p.stdout.readline, b''):
# print(line)
res = bool(p.stdout.readline().decode('utf-8'))
p.wait()
return (path, res)
def look(path):
args = [EXE, 'Look', path]
p = subprocess.Popen(args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
with p.stdout:
# for line in iter(p.stdout.readline, b''):
# print(line)
res = str(p.stdout.readline().decode('utf-8'))
p.wait()
return res
@timeit
def mutliprocess(path_list):
with concurrent.futures.ProcessPoolExecutor() as executor:
for res in executor.map(check, path_list):
print(res)
def main(path):
print(look(path))
@asyncio.coroutine
def co_check(path, res_list):
args = [EXE, 'Check', path]
p = subprocess.Popen(args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
with p.stdout:
# for line in iter(p.stdout.readline, b''):
# print(line)
res = bool(p.stdout.readline().decode('utf-8'))
p.wait()
res_list.append(res)
yield
@timeit
def event(path_list):
loop = asyncio.get_event_loop()
res_list = []
check_res = [co_check(path, res_list) for path in path_list]
loop.run_until_complete(asyncio.gather(
*check_res
))
loop.close()
def test(path):
corpus_list = readxml.read_xmllist(path, 'fr-FR')
fbls = (fbl[1] for fbl in corpus_list)
hyps = (hyp[2] for hyp in corpus_list)
res = chain(fbls, hyps)
# event(res)
mutliprocess(res)
if __name__ == '__main__':
# main(r'https://cosmos09.osdinfra.net/cosmos/SPIN.Compute/shares/ipe.cu/local/SpeechAMProd/Test/TestScripts/Threshold/ServerN/en-AU/BTEST/ULM/20180410/SpeechLiveTrancribedData_Desktop_Win10Cortana_2017-01-01_2017-01-31-en-au_Test.pcm.ss')
# main(r'https://cosmos09.osdinfra.net/cosmos/SPIN.Compute/shares/ipe.cu/local/SpeechAM/E2E/Data/WinpServerCR/ThresholdKeySpot/fr-FR/Merino/20160113201941/CreateCorpus_TRAIN/Desktop_Merino_ThresholdKeySpot_Train_fr-fr_Live_03-03_05-05-2015/')
test(r'\\ccpsofsep\am_s1\users\v-amniu\FRA\unified_model\dataCollect\SDTrainConfig.list')
@lttzzlll
Copy link
Author

如果将多进程模式变成多线程模式,那么总体上变成了多线程=>多进程模式。原来的进程变得非常不稳定,很容易崩溃。但是多线程+多进程模式是效率最高的。

import subprocess
import asyncio
from itertools import chain
from functools import wraps
import concurrent

import time
import readxml

EXE = r"\\ccpsofsep\am_s1\users\v-taotli\code\CollectData\CollectData\CollectData\bin\Debug\CollectData.exe"

def timeit(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        start = time.time()
        res = func(*args, **kwargs)
        end = time.time()
        print('func {} cost: {}'.format(func.__name__, end - start))
        return res
    return wrapper

def check(path):
    args = [EXE, 'Check', path]
    p = subprocess.Popen(args,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    with p.stdout:
        # for line in iter(p.stdout.readline, b''):
        #     print(line)
        res = bool(p.stdout.readline().decode('utf-8'))
    p.wait()
    return (path, res)


def look(path):
    args = [EXE, 'Look', path]
    p = subprocess.Popen(args,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    with p.stdout:
        # for line in iter(p.stdout.readline, b''):
        #     print(line)
        res = str(p.stdout.readline().decode('utf-8'))
    p.wait()
    return res


@timeit
def mutliprocess(path_list):
    cnt = 1
    with concurrent.futures.ThreadPoolExecutor() as executor:
        for res in executor.map(check, path_list):
            print(res, cnt)
            cnt += 1


def main(path):
    print(look(path))


@asyncio.coroutine
def co_check(path, res_list):
    args = [EXE, 'Check', path]
    p = subprocess.Popen(args,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    with p.stdout:
        # for line in iter(p.stdout.readline, b''):
        #     print(line)
        res = bool(p.stdout.readline().decode('utf-8'))
        p.wait()
        res_list.append(res)
        yield





@timeit
def event(path_list):

    loop = asyncio.get_event_loop()
    res_list = []
    check_res = [co_check(path, res_list) for path in path_list]

    loop.run_until_complete(asyncio.gather(
        *check_res
    ))
    loop.close()


def test(path):
    corpus_list = readxml.read_xmllist(path, 'fr-FR')
    fbls = (fbl[1] for fbl in corpus_list)
    hyps = (hyp[2] for hyp in corpus_list)
    res = chain(fbls, hyps)

    # event(res)
    
    mutliprocess(res)

if __name__ == '__main__':
    # main(r'https://cosmos09.osdinfra.net/cosmos/SPIN.Compute/shares/ipe.cu/local/SpeechAMProd/Test/TestScripts/Threshold/ServerN/en-AU/BTEST/ULM/20180410/SpeechLiveTrancribedData_Desktop_Win10Cortana_2017-01-01_2017-01-31-en-au_Test.pcm.ss')

    # main(r'https://cosmos09.osdinfra.net/cosmos/SPIN.Compute/shares/ipe.cu/local/SpeechAM/E2E/Data/WinpServerCR/ThresholdKeySpot/fr-FR/Merino/20160113201941/CreateCorpus_TRAIN/Desktop_Merino_ThresholdKeySpot_Train_fr-fr_Live_03-03_05-05-2015/')
    test(r'\\ccpsofsep\am_s1\users\v-amniu\FRA\unified_model\dataCollect\SDTrainConfig.list')

@lttzzlll
Copy link
Author

而且多线程模式不是平滑的。多进程模式来一个请求处理一个请求。虽然慢,但是稳。

@lttzzlll
Copy link
Author

多进程也不一定是平滑的。这个和许多其他原因有关。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment