Skip to content

Instantly share code, notes, and snippets.

@yokolet
Last active May 20, 2019 04:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yokolet/cafd4e87115e9c9720a053063853c3d5 to your computer and use it in GitHub Desktop.
Save yokolet/cafd4e87115e9c9720a053063853c3d5 to your computer and use it in GitHub Desktop.
katakana transcription
# https://en.wikibooks.org/wiki/Japanese/Transcribing_English_to_Japanese
import os
import sys
sys.path.append(os.path.abspath('./English-to-IPA'))
from pykakasi import kakasi
import romkan
import eng_to_ipa as ipa
import re
vowels = 'aeiou'
vowsym = 'ɑiɪɛɜæʌɒɔʊuəeajo'
conssym = 'θðlŋjhwvʃʧtdkpgm'
def a_short_rule(word, w_idx):
# ɑ - a
if word[w_idx] == 'o':
return 'o'
else:
return 'a'
def a_long_rule(word, w_idx):
# ɑː or ar - aa, a
return 'aa'
def i_long_rule(word, w_idx):
# iː or i
return 'ii'
def i_short_rule(word, w_idx):
# ɪ
return 'i'
def e_short_rule(word, w_idx):
# ɛ
return 'e'
def e_long_rule(word, w_idx):
# ɜː - aa, a
return 'aa'
def ae_rule(word, w_idx):
# æ
if w_idx >= 1 and (word[w_idx-1] == 'c' or word[w_idx-1] == 'g'):
return 'ya'
else:
return 'a'
def hat_rule(word, w_idx):
# ʌ
if word[w_idx] == 'o':
return 'o'
else:
return 'a'
def o_short_rule(word, w_idx):
# ɒ
return 'o'
def o_long_rule(word, w_idx):
# ɔː
return 'oo'
def u_short_rule(word, w_idx):
# ʊ
return 'u'
def u_long_rule(word, w_idx):
# uː
return 'uu'
def e2_rule(word, w_idx):
# ə
#print('e2_rule', word, w_idx)
if w_idx >= 2 and \
word[w_idx-2:w_idx] in ['bl', 'gl']:
return 'u'
elif w_idx == len(word)-1:
return 'a'
elif word[w_idx] == 'o':
if w_idx < len(word)-1 and word[w_idx+1] == 'u':
return 'a'
else:
return 'o'
else:
return 'a'
def ei_rule(word, w_idx):
# eɪ or e - ei, ee, e
if w_idx < len(word)-1 and word[w_idx+1] == 'y':
return 'ei'
else:
return 'e'
def ai_rule(word, w_idx):
# aɪ
return 'ai'
def oi_rule(word, w_idx):
# ɔɪ - ooi, oi
return 'ooi'
def eu_rule(word, w_idx):
# əʊ - o, oo
return 'oo'
def au_rule(word, w_idx):
# aʊ
return 'au'
def ie_rule(word, w_idx):
# ɪə - ia, iaa
return 'iaa'
def ee_rule(word, w_idx):
# ɛə - ea, eaa
return 'eaa'
def ue_rule(word, w_idx):
# ʊə
return 'uaa'
def ju_long_rule(word, w_idx):
# juː
return 'yuu'
def jvow_rule(word, w_idx):
# juː
return 'j'
vowel_map = {
'ɑ': a_short_rule,
'ɑː': a_long_rule,
'ɑr': a_long_rule,
'iː': i_long_rule,
'i': i_long_rule,
'ɪ': i_short_rule,
'ɛ': e_short_rule,
'ɜː': e_long_rule,
'æ': ae_rule,
'ʌ': hat_rule,
'ɒ': o_short_rule,
'ɔː': o_long_rule,
'ɔ': o_long_rule,
'ɔr': o_long_rule,
'ʊ': u_short_rule,
'uː': u_long_rule,
'u': u_long_rule,
'ə': e2_rule,
'ər': a_long_rule,
'eɪ': ei_rule,
'e': ei_rule,
'aɪ': ai_rule,
'ɔɪ': oi_rule,
'əʊ': eu_rule,
'oʊ': eu_rule,
'aʊ': au_rule,
'ɪə': ie_rule,
'ɪr': ie_rule,
'ɛə': ee_rule,
'ɛr': ee_rule,
'ʊə': ue_rule,
'ʊr': ue_rule,
'juː': ju_long_rule,
'ju': ju_long_rule,
'j': jvow_rule,
}
# æ after k => kya
# æ after g => gya
# ʌ spelt with an "o" => o, ex: monkey, front
# non-final ə => ?, ex: about, pilot, london
# final position ə spelt as "-r" => aa
# final position ə spelt with an "a" => a
def transVowel(word, ph):
result = ''
w_idx = 0
p_idx = 0
while w_idx < len(word) or p_idx < len(ph):
# consonant: adds as is
while w_idx < len(word) and word[w_idx] not in vowels:
#result += word[w_idx]
w_idx += 1
# consonant phonetics: skips for now
while p_idx < len(ph) and ph[p_idx] not in vowsym:
result += ph[p_idx]
p_idx += 1
# checks vowel phonetics
if p_idx+3 <= len(ph) and ph[p_idx:p_idx+3] == 'juː':
result += vowel_map['juː'](word, w_idx)
p_idx += 3
elif p_idx+2 <= len(ph) and ph[p_idx:p_idx+2] in vowel_map:
result += vowel_map[ph[p_idx:p_idx+2]](word, w_idx)
p_idx += 2
elif p_idx < len(ph):
result += vowel_map[ph[p_idx]](word, w_idx)
p_idx += 1
# vowel chars may be more than one in word, skips those
while w_idx < len(word) and word[w_idx] in vowels:
w_idx += 1
return result
def th_clear_rule(word, ph, p_idx):
# θ
return 's'
def th_hakuon_rule(word, ph, p_idx):
# ð
return 'z'
def l_rule(word, ph, p_idx):
# l
return 'r'
def n_rule(word, ph, p_idx):
# ŋ - Ng, N
if 'ng' in word:
return 'Ng'
else:
return 'N'
def jcon_rule(word, ph, p_idx):
# j (before the sounds i, ɪ, or e)
if p_idx < len(ph)-1 and ph[p_idx+1] in 'iɪe':
return 'i'
else:
return 'j'
def h_rule(word, ph, p_idx):
# h (before the sounds u or ʊ)
if p_idx < len(ph)-1 and ph[p_idx+1] in 'uʊ':
return 'f'
else:
return 'h'
def w_rule(word, ph, p_idx):
if p_idx == 0 and word[0:2] == 'wh':
return 'how'
else:
return 'u'
def v_rule(word, ph, p_idx):
return 'b'
def s_rule(word, ph, p_idx):
# ʃ
if len(ph) >= 2 and ph[-1] == 'ʃ' and ph[-2] in vowsym:
return 'sshu'
else:
return 'sh'
def ts_rule(word, ph, p_idx):
# ʧ
if p_idx >= 1 and ph[p_idx] == 'ʧ' and \
ph[p_idx-1] in vowsym and \
(len(ph) <= 2 or ph[p_idx-2] not in vowsym):
return 'cchi'
else:
return 'ch'
def t_rule(word, ph, p_idx):
# t - tto
if p_idx >= 1 and ph[p_idx] == 't' and \
ph[p_idx-1] in vowsym and \
(len(ph) <= 2 or ph[p_idx-2] not in vowsym):
return 'tto'
else:
return 't'
def d_rule(word, ph, p_idx):
# d in the end - ddo
if p_idx < len(ph)-1 and ph[p_idx+1] == 'z':
return 'z'
elif p_idx >= 1 and ph[p_idx] == 'd' and \
ph[p_idx-1] in vowsym and \
(len(ph) <= 2 or ph[p_idx-2] not in vowsym):
return 'ddo'
else:
return 'd'
def kpg_rule(word, ph, p_idx):
# k, p -- kku, ppu
if p_idx >= 1 and ph[p_idx] in 'kpg' and \
ph[p_idx-1] in vowsym and \
(len(ph) <= 2 or ph[p_idx-2] not in vowsym):
return ph[p_idx]+ph[p_idx]+'u'
else:
return ph[p_idx]
def m_rule(word, ph, p_idx):
# m not followed by vowel - n
if p_idx < len(ph)-1 and ph[p_idx+1] not in vowsym:
return 'n'
else:
return 'm'
consonant_map = {
'θ': th_clear_rule,
'ð': th_hakuon_rule,
'l': l_rule,
'ŋ': n_rule,
'j': jcon_rule,
'h': h_rule,
'w': w_rule,
'v': v_rule,
'ʃ': s_rule,
'ʧ': ts_rule,
't': t_rule,
'd': d_rule,
'k': kpg_rule,
'p': kpg_rule,
'g': kpg_rule,
'm': m_rule
}
def transConsonants(word, step2):
idx = 0
result = ''
while idx < len(step2):
while idx < len(step2) and step2[idx] not in conssym:
result += step2[idx]
idx += 1
if idx < len(step2):
result += consonant_map[step2[idx]](word, step2, idx)
idx += 1
return result
def transcribe(word):
# step 1: make phonetic
ph = ipa.convert(word)
# step 2: convert vowels
step2 = transVowel(word, ph)
#print(word, ph, step2)
# step 3: convert consonants
step3 = transConsonants(word, step2)
print(word, ph, step2, step3)
# step 4: add epenthtic vowels
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment