Created
February 20, 2023 18:48
-
-
Save anezih/bcf883445c36a1c72202176396c7e020 to your computer and use it in GitHub Desktop.
zemberek-python ile sözcük üretme (Word Generation)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pprint | |
from zemberek.morphology import TurkishMorphology | |
from zemberek.morphology.lexicon import RootLexicon | |
from zemberek.morphology.morphotactics.turkish_morphotactics import get_morpheme_map | |
MORPHOLOGY = TurkishMorphology.create_with_defaults() | |
def generate_noun(word, morphology): | |
morpheme_map = get_morpheme_map() | |
def mm(id): | |
return morpheme_map.get(id) | |
# bk. https://docs.google.com/document/d/1bUW6i5KrjkeID8GD7rs8tbGWapZooLmt2gmdAUvPHW8 | |
# https://github.com/ahmetaa/zemberek-nlp/tree/master/morphology#word-generation | |
# https://github.com/ahmetaa/zemberek-nlp/blob/master/examples/src/main/java/zemberek/examples/morphology/GenerateWords.java | |
number = [mm("A1sg"), mm("A2sg"), mm("A3sg"), mm("A1pl"), mm("A2pl"), mm("A3pl")] | |
possessives = [mm("P1sg"), mm("P2sg"), mm("P3sg"), mm("P1pl"), mm("P2pl"), mm("P3pl"), mm("Pnon")] | |
cases = [mm("Dat"), mm("Loc"), mm("Abl"), mm("Gen"), mm("Acc"), mm("Ins"), mm("Nom")] | |
derivational = [mm("Rel")] # masa-m-da-`ki`, dün-`kü` | |
# bk. resources/lexicon.csv | |
Noun_Lexicon_Suffix = ["Noun", "Noun_1", "Noun_2", "Noun_Time", "Noun_Time_1", | |
"Adj_Prop", "Adj", "Adv_Prop", "Adv_Time", "Pron", "Pron_Demons", | |
"Pron_Demons_1", "Pron_Pers", "Pron_Quant", "Pron_Ques", "Pron_Reflex"] | |
# morphology = TurkishMorphology.create_with_defaults() | |
lex = RootLexicon.get_default() | |
# simple_id = {k.split("_")[0]:k for k in lex.id_map.keys()} | |
# word_item = lex.get_item_by_id(f"{word}_Noun_2") | |
noun_res = {} | |
words = [] | |
for nls in Noun_Lexicon_Suffix: | |
word_nls = f"{word}_{nls}" | |
if (w := lex.get_item_by_id(word_nls)): | |
words.append((w, word_nls)) | |
for w in words: | |
temp = [] | |
for number_m in number: | |
for possessive_m in possessives: | |
for case_m in cases: | |
temp += morphology.word_generator.generate(w[0], (number_m, possessive_m, case_m)) | |
for number_m in number: | |
for possessive_m in possessives: | |
for case_m in cases: | |
for deriv_m in derivational: | |
temp += morphology.word_generator.generate(w[0], (number_m, possessive_m, case_m, deriv_m)) | |
noun_res[w[1]] = [x.surface for x in temp] | |
return noun_res | |
def generate_verb(verb, morphology): | |
morpheme_map = get_morpheme_map() | |
def mm(id): | |
return morpheme_map.get(id) | |
positiveNegatives = [mm("Verb"), mm("Neg")] # mm("Verb") -> positive yerine | |
times = [mm("Imp"), mm("Aor"), mm("Past"), mm("Prog1"), mm("Prog2"), mm("Narr"), mm("Fut"), mm("Neces"), mm("Opt"), mm("Cond")] # cond bazen çalışmıyor | |
persons = [mm("A1sg"), mm("A2sg"), mm("A3sg"), mm("A1pl"), mm("A2pl"), mm("A3pl")] | |
Verb_Lexicon_Suffix = "Verb" | |
# morphology = TurkishMorphology.create_with_defaults() | |
lex = RootLexicon.get_default() | |
# simple_id = {k.split("_")[0]:k for k in lex.id_map.keys()} | |
word_item = lex.get_item_by_id(f"{verb}_{Verb_Lexicon_Suffix}") | |
if not word_item: | |
return [] | |
res = [] | |
for positiveNegative_m in positiveNegatives: | |
for time_m in times: | |
for person_m in persons: | |
res += morphology.word_generator.generate(word_item, (positiveNegative_m, time_m, person_m)) | |
result = [x.surface for x in res] | |
return result | |
def noun_verb_other(word, morphology): | |
# morphology = TurkishMorphology.create_with_defaults() | |
result = morphology.analyze(word) | |
analysis = [(x.get_ending(), x.get_stem()) for x in result] | |
return analysis | |
pp = pprint.PrettyPrinter(indent=2) | |
res = generate_noun("bilinç", MORPHOLOGY) | |
pp.pprint(res) | |
res_verb = generate_verb("savaşmak", MORPHOLOGY) | |
pp.pprint(res_verb) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment