Skip to content

Instantly share code, notes, and snippets.

@Mekajiki
Last active October 8, 2023 22:48
Show Gist options
  • Save Mekajiki/8572429 to your computer and use it in GitHub Desktop.
Save Mekajiki/8572429 to your computer and use it in GitHub Desktop.
ひらがなを音声認識アプリケーションJuliusで使われている音素表現(.htkdic)に変換する
package net.mekajiki;
import com.ibm.icu.text.Transliterator;
import java.util.ArrayList;
import java.util.List;
public class Hiragana2Phoneme {
public static String hiragana2Phoneme(String text) {
return romaji2Phoneme(hiragana2Romaji(text));
}
public static String hiragana2Romaji(String text) {
Transliterator transliterator = Transliterator.getInstance("Hiragana-Latin");
return transliterator.transliterate(text);
}
public static String hiragana2katakana(String text) {
Transliterator transliterator = Transliterator.getInstance("Hiragana-Katakana");
return transliterator.transliterate(text);
}
public static String romaji2Phoneme(String text) {
List<String[]> replaceList = new ArrayList<String[]>();
//撥音のN
replaceList.add(new String[]{"n'", "N"});
replaceList.add(new String[]{"n([^aiueo])", "N$1"});
//おう→おーの音便
replaceList.add(new String[]{"ou", "o:"});
//長音
replaceList.add(new String[]{"ā", "a:"});
replaceList.add(new String[]{"ī", "i:"});
replaceList.add(new String[]{"ū", "u:"});
replaceList.add(new String[]{"ē", "e:"});
replaceList.add(new String[]{"ō", "o:"});
//同じ母音の連続→長音
String[] vowels = {"a", "i", "u", "e", "o"};
for(String vowel : vowels) {
replaceList.add(new String[]{vowel + "{2,}", vowel + ":"});
}
//スペース区切り
replaceList.add(new String[]{"[aiueoN]:?", "$0 "});
replaceList.add(new String[]{"[^aiueoN]{1,4}", "$0 "});
replaceList.add(new String[]{"\\s+", " "});
//tchを促音とみなす
replaceList.add(new String[]{"tch", "q ch"});
//dzは音素として不正
replaceList.add(new String[]{"dz", "z"});
//同じ子音の連続を促音とみなす
replaceList.add(new String[]{" ([^aiueo])$1", " q $1"});
for(String[] map : replaceList) {
text = text.replaceAll(map[0], map[1]);
}
return text;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment