Skip to content

Instantly share code, notes, and snippets.

@bshishov
Created January 14, 2021 15:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bshishov/2040a72304897260f3a88915eb255721 to your computer and use it in GitHub Desktop.
Save bshishov/2040a72304897260f3a88915eb255721 to your computer and use it in GitHub Desktop.
import time
import subprocess
import json
from contextlib import contextmanager
MYSTEM_BINARY = 'mystem.exe'
MYSTEM_ARGS = '--input-format json --fixlist fixlist.txt --format json -gi -d -c'
@contextmanager
def benchmark(name: str, times: int = 1):
started = time.time()
yield
dt = (time.time() - started) / times
print(f'{name}: {dt * 1000:.0f}ms ({1/dt:.1f} op/s)')
class MyStem:
def __init__(self):
self.proc = subprocess.Popen(
(MYSTEM_BINARY, *MYSTEM_ARGS.split(' ')),
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
encoding='utf-8',
universal_newlines=True
)
def analyze(self, text: str):
# Json
tokens = []
for token in text.split():
tokens.append({'text': token, 'analysis': []})
input_data = json.dumps(tokens, ensure_ascii=False)
#input_data = text # Simple text
#print(input_data)
self.proc.stdin.write(input_data)
self.proc.stdin.write('\n')
self.proc.stdin.flush()
return self.proc.stdout.readline()
def main():
with benchmark("init"):
mystem = MyStem()
n = 100
input_text = 'привет ебучий майстем'
print(f'Input: {input_text}')
with benchmark("analyze", times=n):
for i in range(n):
res = mystem.analyze('привет ебучий майстем')
print(f'Result: {res}')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment