Skip to content

Instantly share code, notes, and snippets.

@dchaplinsky
Last active May 7, 2023 19:06
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dchaplinsky/d1684ca37db218f30d5248cf74e62a29 to your computer and use it in GitHub Desktop.
Save dchaplinsky/d1684ca37db218f30d5248cf74e62a29 to your computer and use it in GitHub Desktop.
A script for the pymorhpy3/pymorphy3-dicts-uk to generate inflections of ngrams
# pip install pymorphy3
# pip install pymorphy3-dicts-uk
import pymorphy3
from collections import defaultdict
from itertools import product
from typing import List, List
morph = pymorphy3.MorphAnalyzer(lang="uk")
def inflect_ngram(ngram: str) -> List[str]:
parses = []
for w in ngram.split(" "):
parses.append(morph.parse(w))
result = defaultdict(list)
for case in (
"nomn",
"gent",
"datv",
"accs",
"ablt",
"loct",
"voct",
"gen2",
"acc2",
"loc2",
):
for gender in ("masc", "femn", "neut", "plur"):
for parsed in parses:
inflections = set()
for word in parsed:
inflected = word.inflect({case, gender})
if inflected is not None:
inflections.add(inflected.word)
result[(case, gender)].append(inflections)
flattened = set()
for k, ngram in result.items():
if all(ngram):
for chunks in product(*ngram):
flattened.add(" ".join(chunks))
return flattened
print(inflect_ngram("спортивна ходьба"))
print(inflect_ngram("капуста білоголова"))
print(inflect_ngram("великий рудий кіт"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment