Skip to content

Instantly share code, notes, and snippets.

@gavi
Created June 8, 2017 13:09
Show Gist options
  • Save gavi/0b5e130f203fb64cd81207394d0b36ee to your computer and use it in GitHub Desktop.
Save gavi/0b5e130f203fb64cd81207394d0b36ee to your computer and use it in GitHub Desktop.
NSLinguisticTagger updates in Foundation for Swift 4
import Foundation
var str = """
This is some text that needs to be processed. I do not know how fast this runs?
日本,
Лорем ипсум долор сит амет, перпетуа урбанитас ин про, проприае цонсететур ид сит
"""
let tagger=NSLinguisticTagger(tagSchemes: [.lemma, .language, .lexicalClass], options:0 )
tagger.string=str
let range=NSRange(location:0,length:str.utf16.count)
print(tagger.dominantLanguage!)
func enumerate(scheme:NSLinguisticTagScheme){
tagger.enumerateTags(in: range, unit: .word, scheme:scheme, options: [.omitPunctuation, .omitWhitespace]) {
tag, tokenRange, _ in
let token = (str as NSString).substring(with: tokenRange)
print("word:\(token.lowercased())")
if let tagVal = tag?.rawValue {
print("\(scheme.rawValue):\(tagVal.lowercased())")
}
}
}
enumerate(scheme: .lexicalClass)
enumerate(scheme: .lemma)
enumerate(scheme: .language)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment