Skip to content

Instantly share code, notes, and snippets.

@Septillioner
Last active July 24, 2019 15:19
Show Gist options
  • Save Septillioner/60f119821c3b4ec0925dfbd2a704712d to your computer and use it in GitHub Desktop.
Save Septillioner/60f119821c3b4ec0925dfbd2a704712d to your computer and use it in GitHub Desktop.
Kuş dili fena çözülüyor
#!/usr/bin/python3
#-*- coding:utf-8 -*-
import string
import re
#Code begin
#Spellword Author : https://github.com/brolin59/PYTHON-TURKCE-DOGAL-DIL-ISLEME-TURKISH-NLP
clean_quiet = re.compile('[^aâeêıîioôöuûü]')
lower_vowel = 'aâeêıîioôöuûü'
lower_quiet = 'bcçdfgğhjklmnprsştvyzqwx'
def to_lower(word):
tolower_text = (word.replace('İ', 'i'))
tolower_text = (tolower_text.replace('I', 'ı'))
tolower_text = tolower_text.lower()
return tolower_text
def wordtoten(word):
word = to_lower(word)
translate_wtonum_0 = string.maketrans(lower_quiet, len(lower_quiet) * '0')
translate_wtonum_1 = string.maketrans(lower_vowel, len(lower_vowel) * '1')
word = (word.translate(translate_wtonum_1)).translate(translate_wtonum_0)
return word
def spellword(word):
syllable_list = []
# Bulduğumuz heceleri bu listede toplayacağız.
syllable = ""
# Harfleri bir hece oluşturana kadar "syllable" değişkenine yazacağız.
gword = to_lower(word)
# "gword" değişkenine kelimemizin küçük harfe çevrilmiş halini atıyoruz.
tword = wordtoten(word)
# "tword" değişkenine kelimemizin sayılara çevrilmiş halini atıyoruz.
if tword.startswith('000') or tword.endswith('000'):
return False
tword = tword + '.....'
len_vowel = tword.count('1')
counter = 0
for i, char in enumerate(tword):
if counter > 0:
counter -= 1
continue
if char == '.':
if syllable and syllable.count('1') == 1:
syllable_list.append(gword[:len(syllable)])
break
elif char == '0':
syllable = syllable + char
if syllable and (syllable == '000'):
break
continue
elif char == '1':
syllable = syllable + char
x = len(syllable)
if (tword[x:x + 2] == '01') or (tword[x:x + 2] == '10') or (tword[x:x + 2] == '1.'):
syllable_list.append(gword[:x])
gword = gword[x:]
tword = tword[x:]
syllable = ''
continue
elif tword[x:x + 3] == '001':
syllable_list.append(gword[:x + 1])
gword = gword[x + 1:]
tword = tword[x + 1:]
syllable = ''
counter += 1
continue
elif tword[x:x + 3] == '00.':
syllable_list.append(gword[:x + 2])
del gword
break
elif tword[x:x + 4] == '0001':
syllable_list.append(gword[:x + 2])
gword = gword[x + 2:]
tword = tword[x + 2:]
syllable = ''
counter += 2
continue
elif tword[x:x + 5] == '00001':
syllable_list.append(gword[:x + 2])
gword = gword[x + 2:]
tword = tword[x + 2:]
syllable = ''
counter += 2
continue
if (''.join(syllable_list) == word) and (len_vowel == len(syllable_list)):
return syllable_list
else:
return False
#Code end
class HeceAyirici(object):
"""docstring for HeceAyirici"""
sesli = ['a', 'e', 'ı', 'i', 'o', 'ö', 'u', 'ü', 'A', 'E', 'I', 'İ', 'O', 'Ö', 'U', 'Ü']
sessiz = ['b', 'c', 'ç', 'd', 'f', 'g', 'ğ', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'r', 's',
'ş', 't', 'v', 'y', 'z', 'B', 'C', 'Ç', 'D', 'F', 'G', 'Ğ', 'H', 'J', 'K', 'L',
'M', 'N', 'P', 'R', 'S', 'Ş', 'T', 'V', 'Y', 'Z' ]
def __init__(self):
super(HeceAyirici, self).__init__()
# Code Begin
# Author of index_call, index_send, hecele https://gist.github.com/semihozkoroglu
# edited:septillioner
def index_call(self,ses,i,kelime):
try :
t = ses.index(kelime[i])
except ValueError:
return -1
return t
def index_send(self,ses,kelime):
i=0
while len(ses) >= i+1:
if self.index_call(ses,i,kelime) != -1 :
return i
i += 1
def hecele_(self,kelime):
heceler = []
kelime = kelime[::-1]
while True:
if kelime == '':
break
if len(kelime) == 1:
if self.index_call(self.sesli,index_no,kelime) == -1:
heceler.append(heceler.pop()+kelime)
break
else:
heceler.append(kelime)
break
index_no = self.index_send(self.sesli,kelime)
index_no += 1
if self.index_call(self.sesli,index_no,kelime) == -1:
heceler.append(kelime[:index_no+1])
kelime = kelime[index_no+1:]
index_no = 0
else:
heceler.append(kelime[:index_no])
kelime = kelime[index_no:]
index_no = 0
return [i[::-1] for i in heceler[::-1]]
# Code End
def hecele(self,kelime):
return spellword(kelime)
class KusDili(object):
"""docstring for KusDiliCozucu"""
u_kalin = ["a","ı","o","u"]
u_ince = ["e","i","ö","ü"]
u_genis_duz = ["a","e"]
u_dar_duz = ["ı","i"]
u_genis_yuvarlak = ["o","ö"]
u_dar_yuvarlak = ["u","ü"]
def __init__(self):
super(KusDili, self).__init__()
self.heceleyici = HeceAyirici()
self.kelimelistesi = open("kelime-listesi.txt").readlines()
def unluUyumu(self,hece):
pass
def findUnlu(self,hece):
for i in hece:
if i in self.heceleyici.sesli:
return i
def deleteUnluler(self,cumle):
new_cumle = ""
for i in cumle:
if i in self.heceleyici.sesli:
continue
new_cumle+=i
return new_cumle
def encode(self,cumle,type=0,passn="g"):
kelimeler = cumle.strip().split(" ")
cumle = []
for kelime in kelimeler:
kelime_ = []
heceler = self.heceleyici.hecele(kelime)
for hece in heceler:
kelime_.append(hece)
if(type == 0):
kelime_.append(passn+self.findUnlu(hece))
kelime_sifreli = "".join(kelime_)
print "-".join(heceler)+" => "+kelime_sifreli+" ",
def decode(self,str_):
kelimeler = str_.split(" ")
pass_ = ""
_kelime = []
for kelime in kelimeler:
heceler = self.heceleyici.hecele(kelime)
_heceler = []
if(heceler):
if len(heceler) > 1:
pass_ = self.deleteUnluler(heceler[1])
for i in range(0,len(heceler),2):
_heceler.append(heceler[i])
_kelime.append("".join(_heceler))
else:
try:
heceler = self.heceleyici.hecele_(kelime)
except IndexError:
continue
if len(heceler) > 1:
pass_ = self.deleteUnluler(heceler[1])
for i in range(0,len(heceler),2):
_heceler.append(heceler[i])
_kelime.append("".join(_heceler))
print("Cumle : %s"%(" ".join(_kelime)))
print("Sifre : %s+[uyumlu unlu]"%(pass_))
def test(mesaj):
ha = KusDili()
sifreli_mesaj = mesaj
print("mesaj : %s"%(sifreli_mesaj))
ha.decode(mesaj)
def main():
test("sagaatga kaçga?")
test("herge zagamanga")
test("apgatalga")
test("vayga bega")
test("yogaugatugabega işgagalge edgecekge")
main()
@Septillioner
Copy link
Author

Aceleye getirdim kod dağınık yaparsınız siz bir şeyler

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment