英単語から \textipa の入力に自動変換するツール
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
/// 設定のデフォルト値 | |
// (ə)のような括弧で囲まれた発音記号の扱い | |
enum ParenTreatment: Int { | |
case none = 1, // そのまま | |
flatten, // 括弧を開く | |
remove // 発音記号ごと削除する | |
} | |
// デフォルトは(ə)などはそのまま | |
var parenTreatment: ParenTreatment = .none | |
// 単音節単語に強勢を付けるか | |
var stressMonosyllabicWord = false | |
// 第二強勢を削除するか | |
var removeSecondStress = false | |
// 発音記号が見つからないときの代替記号 | |
let NOTFOUND = "☃" | |
extension String { | |
var stringArray: [String] { | |
get { | |
return self.map{ String($0) } | |
} | |
} | |
mutating func replaceAllOccurrences(of pattern: String, replace: (String, [String]) -> String) { | |
do { | |
let regex = try NSRegularExpression(pattern: pattern, options: []) | |
let result = NSMutableString(string: self) | |
let matches = regex.matches(in: self, options: [], range: NSRange(location: 0, length: self.utf16.count)) | |
for (_, match) in matches.enumerated().reversed() { | |
let range = match.range | |
var groups: [String] = [] | |
for i in 1..<match.numberOfRanges { | |
groups.append(result.substring(with: match.range(at:i))) | |
} | |
result.replaceCharacters(in: range, with: replace(result.substring(with: range), groups)) | |
} | |
self = result as String | |
} catch { | |
} | |
} | |
mutating func replaceFirstOccurrence(of pattern: String, replace: (String) -> String) { | |
if let range = self.range(of: pattern, options: .regularExpression) { | |
self.replaceSubrange(range, with: replace(String(self[range]))) | |
} | |
} | |
} | |
class DictionaryServiceManager { | |
private let AppleGlobalDomainName = "Apple Global Domain" | |
private let DictionaryServicesKey = "com.apple.DictionaryServices" | |
private let ActiveDictionariesKey = "DCSActiveDictionaries" | |
private var globalDomain: [String:Any]? { UserDefaults.standard.persistentDomain(forName: AppleGlobalDomainName) } | |
private var dictionaryPreferences: [String:AnyObject]? { self.globalDomain?[DictionaryServicesKey] as! [String:AnyObject]? } | |
private var currentDictionaryList: [String]? { self.dictionaryPreferences?[ActiveDictionariesKey] as! [String]? } | |
private func setUserDictPreferences(_ activeDictionaries: [String]) { | |
if var currentPref = self.dictionaryPreferences { | |
currentPref[ActiveDictionariesKey] = activeDictionaries as AnyObject | |
if var gDomain = self.globalDomain { | |
gDomain[DictionaryServicesKey] = currentPref | |
UserDefaults.standard.setPersistentDomain(gDomain, forName: AppleGlobalDomainName) | |
} | |
} | |
} | |
func lookUp(_ word: String, inDictionary dictionaryPath: String) -> String? { | |
let currentPrefs = self.currentDictionaryList | |
self.setUserDictPreferences([dictionaryPath]) | |
let range = CFRangeMake(0, word.utf16.count) | |
let result = DCSCopyTextDefinition(nil, word as CFString, range)?.takeRetainedValue() as String? | |
if let currentPrefs = currentPrefs { | |
self.setUserDictPreferences(currentPrefs) | |
} | |
return result | |
} | |
} | |
class TipaConverter { | |
func lookUpPronunciationSymbolsInWisdom(_ word: String) -> String? { | |
let dicName = "Sanseido The WISDOM English-Japanese Japanese-English Dictionary.dictionary" | |
let wisdomRegex = "\\|.*?[\\|/,]" | |
guard let content = DictionaryServiceManager().lookUp(word, inDictionary: dicName), | |
let match = content.range(of: wisdomRegex, options: .regularExpression) else { return nil } | |
let startIndex = content.index(after: match.lowerBound) | |
let endIndex = content.index(before: match.upperBound) | |
let range = startIndex..<endIndex | |
var pron = String(content[range]) | |
pron.replaceFirstOccurrence(of: "^\\s*", replace: {_ in ""}) | |
pron.replaceFirstOccurrence(of: "\\s*$", replace: {_ in ""}) | |
switch parenTreatment { | |
case .none: | |
break | |
case .flatten: | |
pron.replaceAllOccurrences(of: "\\(|\\)", replace: {(_, _) in ""}) | |
case .remove: | |
pron.replaceAllOccurrences(of: "\\(.*?\\)", replace: {(_, _) in ""}) | |
} | |
pron = arrangePronunciation(pron) | |
return pron | |
} | |
func convertToTipa(_ pronunciation: String) -> String { | |
var result = "" | |
for ch in pronunciation.decomposedStringWithCanonicalMapping.stringArray { | |
var char = ch | |
let s = ch as NSString | |
// 歯音化記号 subscript bridge (U+032A) が入っている場合の対処 | |
if (s.length > 1) && (s.character(at: 1) == 0x032A) { | |
char = NSString(format:"%C", s.character(at: 0)) as String | |
result += "\\textsubbridge " | |
} | |
switch char { | |
// 記号 | |
case "ː": | |
result += ":" | |
case "ˈ": | |
result += "\"" | |
case "ˌ": | |
result += "\"\"" | |
// 子音 | |
case "ʃ": | |
result += "S" | |
case "ʒ": | |
result += "Z" | |
case "θ": | |
result += "T" | |
case "ð": | |
result += "D" | |
case "ŋ": | |
result += "N" | |
case "ɡ": | |
result += "g" | |
case "ʔ": | |
result += "P" | |
case "ɾ": | |
result += "\\textfishhookr " | |
// ə | |
case "ə́": | |
result += "\\'@" | |
case "ə̀": | |
result += "\\`@" | |
case "ə": | |
result += "@" | |
// e | |
case "é": | |
result += "\\'e" | |
case "è": | |
result += "\\`e" | |
// u | |
case "ú": | |
result += "\\'u" | |
case "ù": | |
result += "\\`u" | |
// ʌ | |
case "ʌ́": | |
result += "\\'2" | |
case "ʌ̀": | |
result += "\\`2" | |
case "ʌ": | |
result += "2" | |
// ʊ | |
case "ʊ́": | |
result += "\\'U" | |
case "ʊ̀": | |
result += "\\`U" | |
case "ʊ": | |
result += "U" | |
// o | |
case "ó": | |
result += "\\'o" | |
case "ò": | |
result += "\\`o" | |
// ɔ | |
case "ɔ́": | |
result += "\\'O" | |
case "ɔ̀": | |
result += "\\`O" | |
case "ɔ": | |
result += "O" | |
// ɪ | |
case "ɪ́": | |
result += "\\'I" | |
case "ɪ̀": | |
result += "\\`I" | |
case "ɪ": | |
result += "I" | |
// i | |
case "í": | |
result += "\\'\\i " | |
case "ì": | |
result += "\\`\\i " | |
// æ | |
case "ǽ": | |
result += "\\'\\ae " | |
case "æ̀": | |
result += "\\`\\ae " | |
case "æ": | |
result += "\\ae " | |
// ɑ | |
case "ɑ́": | |
result += "\\'A" | |
case "ɑ̀": | |
result += "\\`A" | |
case "ɑ": | |
result += "A" | |
// ɒ | |
case "ɒ́": | |
result += "\\'6" | |
case "ɒ̀": | |
result += "\\`6" | |
case "ɒ": | |
result += "6" | |
// a | |
case "á": | |
result += "\\'a" | |
case "à": | |
result += "\\`a" | |
// ɜ | |
case "ɜ́": | |
result += "\\'3" | |
case "ɜ̀": | |
result += "\\`3" | |
case "ɜ": | |
result += "3" | |
// ɛ | |
case "ɛ́": | |
result += "\\'E" | |
case "ɛ̀": | |
result += "\\`E" | |
case "ɛ": | |
result += "E" | |
default: | |
result += char | |
} | |
} | |
result.replaceAllOccurrences(of: " ([\\\\:\\(\\)\\]/])", replace: {(_, groups) in groups[0]}) | |
return result | |
} | |
private func stressIncluded(_ pronunciation: String) -> Bool { | |
return pronunciation.precomposedStringWithCanonicalMapping.range(of: "ə́|é|ú|ʌ́|ʊ́|ó|ɔ́|ɪ́|í|ǽ|ɑ́|ɒ́|á|ɜ́|ɛ́".precomposedStringWithCanonicalMapping, options:.regularExpression) != nil | |
} | |
private func arrangePronunciation(_ pronunciation: String) -> String { | |
var pron = pronunciation | |
// 単音節単語に強勢を付ける | |
if stressMonosyllabicWord, !stressIncluded(pron) { | |
pron.replaceFirstOccurrence(of: "[eɑɒʌaoɔɪiæuʊəɜɛ]", replace: { vowel in vowel == "i" ? "í" : vowel + "\u{0301}" }) | |
} | |
// 第二強勢を削除する | |
if removeSecondStress { | |
pron = pron.decomposedStringWithCanonicalMapping | |
pron.replaceFirstOccurrence(of: "\u{0300}", replace: {_ in ""}) | |
} | |
return pron | |
} | |
func wordToTipa(_ word: String, enclose: Bool = false) -> String { | |
guard let pronunciation = lookUpPronunciationSymbolsInWisdom(word) else { return NOTFOUND } | |
let result = convertToTipa(pronunciation) | |
return "\\textipa{\(result)}" | |
} | |
} | |
func showHelpMessage() { | |
print("[Usage]") | |
print(" word2tipa [options] word(s)...") | |
print("[Options]") | |
print(" --flattenParen : flatten an enclosed symbol (e.g., (ə) → ə)") | |
print(" --removeParen : remove an enclosed symbol (e.g., (ə))") | |
print(" --stressMonosyllabicWord : e.g., /bɪɡ/ → /bɪ́ɡ/") | |
print(" --removeSecondStress : e.g., /ɪ̀ntərfɪ́ər/ → /ɪntərfɪ́ər/") | |
} | |
/// Main | |
// コマンドラインオプションを取得してデフォルト設定を変更 | |
var arguments = CommandLine.arguments.dropFirst() | |
while true { | |
if arguments.isEmpty { | |
print("[ERROR] no arguments\n") | |
showHelpMessage() | |
exit(1) | |
} | |
if arguments.first?.first == "-" { | |
let option = arguments.first! | |
arguments = arguments.dropFirst() | |
switch option.lowercased() { | |
case "--flattenParen".lowercased(): | |
parenTreatment = .flatten | |
break | |
case "--removeParen".lowercased(): | |
parenTreatment = .remove | |
break | |
case "--stressMonosyllabicWord".lowercased(): | |
stressMonosyllabicWord = true | |
break | |
case "--removeSecondStress".lowercased(): | |
removeSecondStress = true | |
break | |
case "--help".lowercased(): | |
showHelpMessage() | |
exit(0) | |
default: | |
print("[ERROR] unrecognized argument: \(option)\n") | |
showHelpMessage() | |
exit(1) | |
} | |
} else { | |
break | |
} | |
} | |
print(arguments.map{TipaConverter().wordToTipa($0)}.joined(separator: " "), terminator: "") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment