Skip to content

Instantly share code, notes, and snippets.

@luish
Last active May 24, 2017 11:58
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save luish/c90168d14d0c886e06790e36bf255ab8 to your computer and use it in GitHub Desktop.
Save luish/c90168d14d0c886e06790e36bf255ab8 to your computer and use it in GitHub Desktop.
import Foundation
struct Linguistic {
typealias LinguisticTag = (token: String, tag: String)
static func tags(text: String, range: NSRange? = nil, language: String = "en") -> [LinguisticTag] {
let options: NSLinguisticTaggerOptions = [.OmitWhitespace, .OmitPunctuation, .JoinNames]
let schemes = NSLinguisticTagger.availableTagSchemesForLanguage(language)
let tagger = NSLinguisticTagger(tagSchemes: schemes, options: Int(options.rawValue))
tagger.string = text
let range = range ?? NSRange(location: 0, length: text.characters.count)
let scheme = NSLinguisticTagSchemeNameTypeOrLexicalClass
var tags = [LinguisticTag]()
tagger.enumerateTagsInRange(range, scheme: scheme, options: options) { tag, tokenRange, _, _ in
let token = text.substringWithRange(tokenRange)
tags.append(LinguisticTag(token: token, tag: tag))
}
return tags
}
}
private extension String {
func substringWithRange(range: NSRange) -> String {
return string.substringWithRange(range)
}
func linguisticTagsInRange(range: NSRange, scheme tagScheme: String, options opts: NSLinguisticTaggerOptions) -> [String] {
return string.linguisticTagsInRange(range, scheme: tagScheme, options: opts, orthography: nil, tokenRanges: nil)
}
private var string: NSString {
return self as NSString
}
}
Linguistic.tags("Mary Jane, please, could you tell me if that bus is going to Los Angeles or to San Francisco?")
// [("Mary Jane", "PersonalName"), ("please", "Interjection"),
// ("could", "Verb"), ("you", "Pronoun"),
// ("tell", "Verb"), ("me", "Pronoun"),
// ("if", "Preposition"), ("that", "Determiner"),
// ("bus", "Noun"), ("is", "Verb"), ("going", "Verb"),
// ("to", "Preposition"), ("Los Angeles", "PlaceName"),
// ("or", "Conjunction"), ("to", "Preposition"), ("San Francisco", "PlaceName")]
Linguistic.tags("What is the weather like today in San Francisco?")
// [("What", "Pronoun"), ("is", "Verb"), ("the", "Determiner"), ("weather", "Noun"), ("like", "Preposition"), ("today", "Noun"), ("in", "Preposition"), ("San Francisco", "PlaceName")]
Linguistic.tags("What is the weather in San Francisco?", range: NSRange(location: 5, length: 2))
// [("is": "Verb")]
// Places names
let tags = Linguistic.tags("Mary Jane, please, could you tell me if that bus is going to Los Angeles or to San Francisco?")
let places = tags.filter { $0.tag == "PlaceName" }.map { $0.token }
// ["Los Angeles", "San Francisco"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment