直接多进程
class KnowledgeEngine:
def __init__(self, name_2_id_path, triple_path):
self.name_2_id_path = name_2_id_path
self.triple_path = triple_path
self.entity_2_id_set = defaultdict(dict)
self.triple_set = defaultdict(dict)
t1 = Thread(target=self.load_name_2_id)
t2 = Thread(target=self.load_triple)
t1.start()
t2.start()
t1.join()
t2.join()
from pprint import pprint
pprint(self.entity_2_id_set)
pprint(self.triple_set)
@clock
def load_name_2_id(self):
print("Loading mention-to-id file...")
with open(self.name_2_id_path) as fin:
g_toks = (tokens.rstrip().split(' ||| ') for tokens in fin if tokens)
for toks in g_toks:
if len(toks) != 2:
continue
ids = toks[1].split('\t')
self.entity_2_id_set[toks[0]] = set(ids)
print("Done: Loading mention-to-id file")
@clock
def load_triple(self):
print("Loading triple file...")
with open(self.triple_path) as fin:
g_triples = (triple.rstrip().split(' ||| ') for triple in fin if triple)
for triple in g_triples:
if len(triple) != 3:
continue
self.triple_set[triple[0]] = {triple[1]: triple[2]}
print("Done: Loading triple file")
from time import sleep, strftime
from concurrent import futures
def display(*args): # <1>
print(strftime('[%H:%M:%S]'), end=' ')
print(*args)
def loiter(n): # <2>
msg = '{}loiter({}): doing nothing for {}s...'
display(msg.format('\t'*n, n, n))
sleep(n)
msg = '{}loiter({}): done.'
display(msg.format('\t'*n, n))
return n * 10 # <3>
def main():
display('Script starting.')
executor = futures.ThreadPoolExecutor(max_workers=3) # <4>
results = executor.map(loiter, range(5, -1, -1)) # <5>
display('results:', results) # <6>.
display('Waiting for individual results:')
for i, result in enumerate(results): # <7>
display('result {}: {}'.format(i, result))
main()