Skip to content

Instantly share code, notes, and snippets.

@LizardLeliel
Last active October 17, 2015 17:25
Show Gist options
  • Save LizardLeliel/32e9c29db5cc51a97a66 to your computer and use it in GitHub Desktop.
Save LizardLeliel/32e9c29db5cc51a97a66 to your computer and use it in GitHub Desktop.
A function for translating romaji into either hiragana or katana (but no way to do it reverse so far)
prefixTable = {
"" : 0 , "k" : 5 , "s" : 10, "t" : 15, "n" : 20,
"h" : 25, "m" : 30, "y" : 35, "r" : 40, "w" : 45,
"g" : 50, "z" : 55, "d" : 60, "p" : 65, "b" : 70,
"j" : 75, "ky": 80, "sh": 85, "ny": 90, "hy": 95,
"py": 100, "by": 105, "my": 110, "ch": 115, "ry": 120,
"gy": 125, "f" : 130, "ts": 135, "v" : 140,
"elongations" : 145,
"nn" : 150, ".": 151, "," : 152, "smalltsu" : 153, " " : 154,
"oh" : 155
}
# prefixTable.setdefault()
# Mostly for readability. I.E. if x in vowels:
vowels = {"a": 0, "i": 1, "u": 2, "e": 3, "o": 4}
doubleConsonants = ["k", "p", "s", "t"]
beginningLetters = ["k", "s", "t", "n", "h", "m", "y", "r", "w",
"g", "z", "d", "p", "b", "j", "c", "f", "v"]
secondLetters = ["y", "h", "s"] # No, there isn't a lot
pronouncination = [".", ",", " "]
transliterable = " abcdefghijklmnopqrstuvwxyz,."
# ゃ ゅ ょ
hiraganaTable = [
"あ", "い", "う", "え", "お", # No consonants
"か", "き", "く", "け", "こ", # K
"さ", "?", "す", "せ", "そ", # S
"た", "?", "?", "て", "と", # T
"な", "に", "ぬ", "ね", "の", # N
"は", "ひ", "?", "へ", "ほ", # H
"ま", "み", "む", "め", "も", # M
"や", "?", "ゆ", "?", "よ", # Y
"ら", "り", "る", "れ", "ろ", # R
"わ", "?", "?", "?", "?", # W
"が", "ぎ", "ぐ", "げ", "ご", # G
"ざ", "?", "ず", "ぜ", "ぞ", # Z
"だ", "?", "?", "で", "ど", # D
"ぱ", "ぴ", "ぷ", "ぺ", "ぽ", # P
"ば", "び", "ぶ", "べ", "ぼ", # B
"じゃ", "じ", "じゅ", "?", "じょ", # J
"きゃ", "?", "きゅ", "?", "きょ", # Ky
"しゃ", "し", "しゅ", "?", "しょ", # Sh
"にゃ", "?", "にゅ", "?", "にょ", # Ny
"ひゃ", "?", "ひゅ", "?", "ひょ", # Hy
"ぴゃ", "?", "ぴゅ", "?", "ぴょ", # Py
"びゃ", "?", "びゅ", "?", "びょ", # By
"みゃ", "?", "みゅ", "?", "みょ", # My
"ちゃ", "ち", "ちゅ", "?", "ちょ", # Ch
"りゃ", "?", "りゅ", "?", "りょ", # Ry
"ぎゃ", "?", "ぎゅ", "?", "ぎょ", # Gy
"?", "?", "ふ", "?", "?", # F
"?", "?", "つ", "?", "?", # Ts
"?", "?", "?", "?", "?", # V, which doesn't exist
# in Hiragana.
"あ", "い", "う", "い", "う", # Elongations
"ん", "。", "、", "っ", " ",
"を", "?" # hiraganaTable[-1]
]
katakanaTable = [
"ア", "イ", "ウ", "エ", "オ", # No consonants
"カ", "キ", "ク", "ケ", "コ", # K
"サ", "?", "ス", "セ", "ソ", # S
"タ", "ティ","トゥ","テ","ト", # T
"ナ", "ニ", "ヌ", "ネ", "ノ", # N
"ハ", "ヒ", "?", "ヘ", "ホ", # H
"マ", "ミ", "ム", "メ", "モ", # M
"ヤ", "?", "ユ", "?", "ヨ", # Y
"ラ", "リ", "ル", "レ", "ロ", # R
"ワ", "ウィ","?", "ウェ","ウォ", # W
"ガ", "ギ", "グ", "ゲ", "ゴ", # G
"ザ", "?", "ズ", "ゼ", "ゾ", # Z
"ダ", "ディ","デュ","デ","ド", # D
"パ", "ピ", "プ", "ペ", "ポ", # P
"バ", "ビ", "ブ", "ベ", "ボ", # B
"ジャ", "ジ", "ジュ", "ジェ","ジョ", # J
"キャ", "?", "きゅ", "?", "キョ", # Ky
"シャ", "シ", "シュ", "シェ","ショ", # Sh
"ニャ", "?", "ニュ", "?", "ニョ", # Ny
"ヒャ", "?", "ヒュ", "?", "ヒョ", # Hy
"ピャ", "?", "ピュ", "?", "ピョ", # Py
"ビャ", "?", "ビュ", "?", "ビョ", # By
"ミャ", "?", "ミュ", "?", "ミョ", # My
"チャ", "チ", "チュ", "チェ", "チョ", # Ch
"リャ", "?", "リュ", "?", "リョ", # Ry
"ギャ", "?", "ギュ", "?", "ギョ", # Gy
"ファ", "フィ", "フ", "フェ", "フォ", # F
"ツァ", "ツィ", "ツ", "ツェ", "ツォ", # Ts
"ヴァ", "ヴィ", "?", "ヴェ", "ヴォ", # V
"ー", "ー", "ー", "ー", "ー", # Elongations
"ン", "。", "、", "ッ", " ", # etc.
"ヲ", "?" # katakanaTable[-1]
]
hir = hiraganaTable
kat = katakanaTable
def parseJ(romaji, writing):
japanese = "\0"
romaji = romaji.lower()
c = 0
while c < len(romaji):
vowel = ''
posbe = ''
basiclen = 0
# If its a space, ,/., or a non-letter character,
# do that first
if romaji[c] in pronouncination:
japanese += writing[prefixTable[romaji[c]]]
c += 1
continue
if romaji[c] not in transliterable:
japanese += romaji[c]
c += 1
continue
# Special Particles:
# These particles must be surrounded by a space, a comma,
# a period, or something that is not a letter (whitespace
# for example)
# Wa (spelled as ha)
if (
c + 2 < len(romaji) and
(japanese[-1] in pronouncination
or japanese[-1] not in transliterable) and
(romaji[c+2] in pronouncination
or romaji[c+2] not in transliterable) and
(romaji[c] + romaji[c+1]) == "wa"
):
japanese += writing[25]
c += 2
continue
# (w)o
elif (
c + 1 < len(romaji) and
(japanese[-1] in pronouncination
or japanese[-1] not in transliterable) and
(romaji[c+1] in pronouncination
or romaji[c+1] not in transliterable) and
romaji[c] == "o"
):
japanese += writing[155]
c += 1
continue
# Vowel only
if romaji[c] in vowels:
basiclen = 1
japanese += writing[vowels[romaji[c]]]
# one or two consonsant then a vowel
elif romaji[c] in beginningLetters:
posbe = romaji[c]
basiclen = 1
# Test to see if another consonsant follows it
if c + 1 < len(romaji) and romaji[c+1] in secondLetters:
basiclen = 2
posbe += romaji[c+1]
# Then look for a vowel
if (c + basiclen < len(romaji) and
romaji[c + basiclen] in vowels
):
append = writing[prefixTable[posbe]
+ vowels[romaji [c + basiclen]]]
japanese += append
#c += 1 + basiclen
basiclen += 1
else:
japanese += "?"
c += 1
continue
else:
japanese += "?"
c += 1
continue
# elongation (only one allowed - maybe allow more for onomotopoiea?)
if (c + basiclen < len(romaji) and
romaji[c + basiclen] in vowels and
romaji[c + basiclen - 1] == romaji[c + basiclen]
):
index = (prefixTable["elongations"]
+ vowels[romaji[c + basiclen]])
append = writing[index]
japanese += append
basiclen += 1
# Is there an n after it and there's no vowel?
if (c + basiclen + 1 < len(romaji) and
romaji[c + basiclen] == "n" and
romaji[c + basiclen + 1] not in vowels
):
n = writing[150]
japanese += n
basiclen += 1
# Is there an n /and/ its the last thing there is?
elif (c + basiclen == len(romaji) - 1 and
romaji[c + basiclen] == "n"
):
n = writing[150]
japanese += n
basiclen += 1
# Is it a double consonant?
elif (c + basiclen + 1 < len(romaji) and
romaji[c + basiclen] == romaji[c + basiclen + 1] and
romaji[c + basiclen] in doubleConsonants
):
japanese += writing[153]
basiclen += 1
c += basiclen
# while loop end is here
return japanese
def tests():
print (hiraganaTable[prefixTable["ky"] + vowels["o"]])
print (hiraganaTable[prefixTable["m"] + vowels["u"]])
print (parseJ(" wa o aieuo", hiraganaTable))
print (parseJ(" wa o aieuo", katakanaTable))
print (parseJ("wa o aieuo ha sa hasa hya tsu tsi", hiraganaTable))
print (parseJ("wa o aieuo ha sa hasa hya tsu tsi", katakanaTable))
print (parseJ("myaa kyoo", hiraganaTable))
print (parseJ("myaa kyoo", katakanaTable))
print (parseJ("kyuushu", hir))
print (parseJ("kyuushu", kat))
print (parseJ("konohana sakuya ninja nippon", hir))
print (parseJ("konohana sakuya ninja nippon", kat))
print (parseJ(" o\n", hir))
# My name is Mike Solomon
print (
parseJ("Watashi no onamae wa ", hir) +
parseJ("Maiku soromoun ", kat) +
parseJ("desu.", hir)
)
# My hobby is ninjutsu
print (parseJ("\nWatashi no shumi wa ninjutsu desu.", hir))
# One plus 2 is...\n ...3!
print (parseJ("1 + 2 wa...\n ...san desu!", hir))
if __name__ == "__main__":
tests()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment