Skip to content

Instantly share code, notes, and snippets.

@uhooi
Last active March 17, 2021 12:56
Show Gist options
  • Save uhooi/ee461131f0820ca591c0bc29af8e0103 to your computer and use it in GitHub Desktop.
Save uhooi/ee461131f0820ca591c0bc29af8e0103 to your computer and use it in GitHub Desktop.
Morphological Analysis with Swift
// ref: https://developer.apple.com/documentation/foundation/nslinguistictagger
// ref: https://developer.apple.com/documentation/foundation/nslinguistictagger/1410036-enumeratetags
// ref: https://dev.classmethod.jp/articles/ios10-morphological-analysis-from-speechrecognizer/
import Foundation
private func analyzeText(_ text: String, scheme: NSLinguisticTagScheme) {
let tagger = NSLinguisticTagger(tagSchemes: NSLinguisticTagger.availableTagSchemes(forLanguage: "ja"), options: 0)
tagger.string = text
tagger.enumerateTags(
in: NSRange(location: 0, length: text.count),
scheme: scheme,
options: [.omitWhitespace]
) { tag, tokenRange, sentenceRange, stop in
let subString = (text as NSString).substring(with: tokenRange)
if let tag = tag {
print("\(subString): \(tag.rawValue)")
}
}
}
analyzeText("私はウホーイです。あなたは誰ですか?", scheme: .tokenType) // 形態素解析
analyzeText("私はウホーイです。あなたは誰ですか?", scheme: .lexicalClass) // 品詞分解 // !!!: 日本語非対応
私: Word
は: Word
ウ: Word
ホーイ: Word
です: Word
。: Punctuation
あなた: Word
は: Word
誰: Word
です: Word
か: Word
?: Punctuation
import NaturalLanguage
private func analyzeText(_ text: String, scheme: NLTagScheme) -> [String] {
var results: [String] = []
let tagger = NLTagger(tagSchemes: NLTagger.availableTagSchemes(for: .word, language: .japanese))
tagger.string = text
let options: NLTagger.Options = [.omitPunctuation, .omitWhitespace]
tagger.enumerateTags(in: text.startIndex..<text.endIndex, unit: .word, scheme: scheme, options: options) { tag, tokenRange in
results.append("\(text[tokenRange])")
return true
}
return results
}
print(analyzeText("私はウホーイです。あなたは誰ですか?", scheme: .tokenType))
["私", "は", "ウ", "ホーイ", "です", "あなた", "は", "誰", "です", "か"]
// ref: https://developer.apple.com/documentation/naturallanguage
// ref: https://developer.apple.com/documentation/naturallanguage/identifying_parts_of_speech
// ref: https://developer.apple.com/forums/thread/669890
// ref: https://dev.classmethod.jp/articles/ios12-natural-language-tokenizing/
// ref: https://dev.classmethod.jp/articles/ios12-natural-language-identifying-parts-of-speech/
import NaturalLanguage
private func analyzeText(_ text: String, scheme: NLTagScheme) {
let tagger = NLTagger(tagSchemes: NLTagger.availableTagSchemes(for: .word, language: .japanese))
tagger.string = text
let options: NLTagger.Options = [/* .omitPunctuation, */.omitWhitespace]
tagger.enumerateTags(in: text.startIndex..<text.endIndex, unit: .word, scheme: scheme, options: options) { tag, tokenRange in
if let tag = tag {
print("\(text[tokenRange]): \(tag.rawValue)")
}
return true
}
}
analyzeText("私はウホーイです。あなたは誰ですか?", scheme: .tokenType) // 形態素解析
analyzeText("私はウホーイです。あなたは誰ですか?", scheme: .lexicalClass) // 品詞分解 // !!!: 日本語非対応
私: Word
は: Word
ウ: Word
ホーイ: Word
です: Word
。: Punctuation
あなた: Word
は: Word
誰: Word
です: Word
か: Word
?: Punctuation
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment