Skip to content

Instantly share code, notes, and snippets.

@benbahrenburg
Last active July 6, 2017 14:15
Embed
What would you like to do?
Lemmatize strings
public struct wordToken {
let word: String
let wordStem: String?
init(word: String, wordStem: String?) {
self.word = word
self.wordStem = wordStem
}
}
func lemmatize(_ text: String) -> [wordToken] {
let text = text.lowercased()
let options: NSLinguisticTagger.Options = [.omitWhitespace, .omitPunctuation, .omitOther]
let tagger = NSLinguisticTagger(tagSchemes: NSLinguisticTagger.availableTagSchemes(forLanguage: "en"),
options: Int(options.rawValue))
tagger.string = text
var tokens: [wordToken] = []
tagger.enumerateTags(in: NSMakeRange(0, text.characters.count), scheme: NSLinguisticTagSchemeLemma, options: options) { tag, tokenRange, _, _ in
let word = (text as NSString).substring(with: tokenRange)
tokens.append(wordToken(word: word, wordStem: tag))
}
return tokens
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment