Skip to content

Instantly share code, notes, and snippets.

@doraTeX
Created December 13, 2022 14:00
Embed
What would you like to do?
英単語から \textipa の入力に自動変換するツール
import Foundation
/// 設定のデフォルト値
// (ə)のような括弧で囲まれた発音記号の扱い
enum ParenTreatment: Int {
case none = 1, // そのまま
flatten, // 括弧を開く
remove // 発音記号ごと削除する
}
// デフォルトは(ə)などはそのまま
var parenTreatment: ParenTreatment = .none
// 単音節単語に強勢を付けるか
var stressMonosyllabicWord = false
// 第二強勢を削除するか
var removeSecondStress = false
// 発音記号が見つからないときの代替記号
let NOTFOUND = "☃"
extension String {
var stringArray: [String] {
get {
return self.map{ String($0) }
}
}
mutating func replaceAllOccurrences(of pattern: String, replace: (String, [String]) -> String) {
do {
let regex = try NSRegularExpression(pattern: pattern, options: [])
let result = NSMutableString(string: self)
let matches = regex.matches(in: self, options: [], range: NSRange(location: 0, length: self.utf16.count))
for (_, match) in matches.enumerated().reversed() {
let range = match.range
var groups: [String] = []
for i in 1..<match.numberOfRanges {
groups.append(result.substring(with: match.range(at:i)))
}
result.replaceCharacters(in: range, with: replace(result.substring(with: range), groups))
}
self = result as String
} catch {
}
}
mutating func replaceFirstOccurrence(of pattern: String, replace: (String) -> String) {
if let range = self.range(of: pattern, options: .regularExpression) {
self.replaceSubrange(range, with: replace(String(self[range])))
}
}
}
class DictionaryServiceManager {
private let AppleGlobalDomainName = "Apple Global Domain"
private let DictionaryServicesKey = "com.apple.DictionaryServices"
private let ActiveDictionariesKey = "DCSActiveDictionaries"
private var globalDomain: [String:Any]? { UserDefaults.standard.persistentDomain(forName: AppleGlobalDomainName) }
private var dictionaryPreferences: [String:AnyObject]? { self.globalDomain?[DictionaryServicesKey] as! [String:AnyObject]? }
private var currentDictionaryList: [String]? { self.dictionaryPreferences?[ActiveDictionariesKey] as! [String]? }
private func setUserDictPreferences(_ activeDictionaries: [String]) {
if var currentPref = self.dictionaryPreferences {
currentPref[ActiveDictionariesKey] = activeDictionaries as AnyObject
if var gDomain = self.globalDomain {
gDomain[DictionaryServicesKey] = currentPref
UserDefaults.standard.setPersistentDomain(gDomain, forName: AppleGlobalDomainName)
}
}
}
func lookUp(_ word: String, inDictionary dictionaryPath: String) -> String? {
let currentPrefs = self.currentDictionaryList
self.setUserDictPreferences([dictionaryPath])
let range = CFRangeMake(0, word.utf16.count)
let result = DCSCopyTextDefinition(nil, word as CFString, range)?.takeRetainedValue() as String?
if let currentPrefs = currentPrefs {
self.setUserDictPreferences(currentPrefs)
}
return result
}
}
class TipaConverter {
func lookUpPronunciationSymbolsInWisdom(_ word: String) -> String? {
let dicName = "Sanseido The WISDOM English-Japanese Japanese-English Dictionary.dictionary"
let wisdomRegex = "\\|.*?[\\|/,]"
guard let content = DictionaryServiceManager().lookUp(word, inDictionary: dicName),
let match = content.range(of: wisdomRegex, options: .regularExpression) else { return nil }
let startIndex = content.index(after: match.lowerBound)
let endIndex = content.index(before: match.upperBound)
let range = startIndex..<endIndex
var pron = String(content[range])
pron.replaceFirstOccurrence(of: "^\\s*", replace: {_ in ""})
pron.replaceFirstOccurrence(of: "\\s*$", replace: {_ in ""})
switch parenTreatment {
case .none:
break
case .flatten:
pron.replaceAllOccurrences(of: "\\(|\\)", replace: {(_, _) in ""})
case .remove:
pron.replaceAllOccurrences(of: "\\(.*?\\)", replace: {(_, _) in ""})
}
pron = arrangePronunciation(pron)
return pron
}
func convertToTipa(_ pronunciation: String) -> String {
var result = ""
for ch in pronunciation.decomposedStringWithCanonicalMapping.stringArray {
var char = ch
let s = ch as NSString
// 歯音化記号 subscript bridge (U+032A) が入っている場合の対処
if (s.length > 1) && (s.character(at: 1) == 0x032A) {
char = NSString(format:"%C", s.character(at: 0)) as String
result += "\\textsubbridge "
}
switch char {
// 記号
case "ː":
result += ":"
case "ˈ":
result += "\""
case "ˌ":
result += "\"\""
// 子音
case "ʃ":
result += "S"
case "ʒ":
result += "Z"
case "θ":
result += "T"
case "ð":
result += "D"
case "ŋ":
result += "N"
case "ɡ":
result += "g"
case "ʔ":
result += "P"
case "ɾ":
result += "\\textfishhookr "
// ə
case "ə́":
result += "\\'@"
case "ə̀":
result += "\\`@"
case "ə":
result += "@"
// e
case "é":
result += "\\'e"
case "è":
result += "\\`e"
// u
case "ú":
result += "\\'u"
case "ù":
result += "\\`u"
// ʌ
case "ʌ́":
result += "\\'2"
case "ʌ̀":
result += "\\`2"
case "ʌ":
result += "2"
// ʊ
case "ʊ́":
result += "\\'U"
case "ʊ̀":
result += "\\`U"
case "ʊ":
result += "U"
// o
case "ó":
result += "\\'o"
case "ò":
result += "\\`o"
// ɔ
case "ɔ́":
result += "\\'O"
case "ɔ̀":
result += "\\`O"
case "ɔ":
result += "O"
// ɪ
case "ɪ́":
result += "\\'I"
case "ɪ̀":
result += "\\`I"
case "ɪ":
result += "I"
// i
case "í":
result += "\\'\\i "
case "ì":
result += "\\`\\i "
// æ
case "ǽ":
result += "\\'\\ae "
case "æ̀":
result += "\\`\\ae "
case "æ":
result += "\\ae "
// ɑ
case "ɑ́":
result += "\\'A"
case "ɑ̀":
result += "\\`A"
case "ɑ":
result += "A"
// ɒ
case "ɒ́":
result += "\\'6"
case "ɒ̀":
result += "\\`6"
case "ɒ":
result += "6"
// a
case "á":
result += "\\'a"
case "à":
result += "\\`a"
// ɜ
case "ɜ́":
result += "\\'3"
case "ɜ̀":
result += "\\`3"
case "ɜ":
result += "3"
// ɛ
case "ɛ́":
result += "\\'E"
case "ɛ̀":
result += "\\`E"
case "ɛ":
result += "E"
default:
result += char
}
}
result.replaceAllOccurrences(of: " ([\\\\:\\(\\)\\]/])", replace: {(_, groups) in groups[0]})
return result
}
private func stressIncluded(_ pronunciation: String) -> Bool {
return pronunciation.precomposedStringWithCanonicalMapping.range(of: "ə́|é|ú|ʌ́|ʊ́|ó|ɔ́|ɪ́|í|ǽ|ɑ́|ɒ́|á|ɜ́|ɛ́".precomposedStringWithCanonicalMapping, options:.regularExpression) != nil
}
private func arrangePronunciation(_ pronunciation: String) -> String {
var pron = pronunciation
// 単音節単語に強勢を付ける
if stressMonosyllabicWord, !stressIncluded(pron) {
pron.replaceFirstOccurrence(of: "[eɑɒʌaoɔɪiæuʊəɜɛ]", replace: { vowel in vowel == "i" ? "í" : vowel + "\u{0301}" })
}
// 第二強勢を削除する
if removeSecondStress {
pron = pron.decomposedStringWithCanonicalMapping
pron.replaceFirstOccurrence(of: "\u{0300}", replace: {_ in ""})
}
return pron
}
func wordToTipa(_ word: String, enclose: Bool = false) -> String {
guard let pronunciation = lookUpPronunciationSymbolsInWisdom(word) else { return NOTFOUND }
let result = convertToTipa(pronunciation)
return "\\textipa{\(result)}"
}
}
func showHelpMessage() {
print("[Usage]")
print(" word2tipa [options] word(s)...")
print("[Options]")
print(" --flattenParen : flatten an enclosed symbol (e.g., (ə) → ə)")
print(" --removeParen : remove an enclosed symbol (e.g., (ə))")
print(" --stressMonosyllabicWord : e.g., /bɪɡ/ → /bɪ́ɡ/")
print(" --removeSecondStress : e.g., /ɪ̀ntərfɪ́ər/ → /ɪntərfɪ́ər/")
}
/// Main
// コマンドラインオプションを取得してデフォルト設定を変更
var arguments = CommandLine.arguments.dropFirst()
while true {
if arguments.isEmpty {
print("[ERROR] no arguments\n")
showHelpMessage()
exit(1)
}
if arguments.first?.first == "-" {
let option = arguments.first!
arguments = arguments.dropFirst()
switch option.lowercased() {
case "--flattenParen".lowercased():
parenTreatment = .flatten
break
case "--removeParen".lowercased():
parenTreatment = .remove
break
case "--stressMonosyllabicWord".lowercased():
stressMonosyllabicWord = true
break
case "--removeSecondStress".lowercased():
removeSecondStress = true
break
case "--help".lowercased():
showHelpMessage()
exit(0)
default:
print("[ERROR] unrecognized argument: \(option)\n")
showHelpMessage()
exit(1)
}
} else {
break
}
}
print(arguments.map{TipaConverter().wordToTipa($0)}.joined(separator: " "), terminator: "")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment