Last active
May 24, 2017 11:58
-
-
Save luish/c90168d14d0c886e06790e36bf255ab8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
struct Linguistic { | |
typealias LinguisticTag = (token: String, tag: String) | |
static func tags(text: String, range: NSRange? = nil, language: String = "en") -> [LinguisticTag] { | |
let options: NSLinguisticTaggerOptions = [.OmitWhitespace, .OmitPunctuation, .JoinNames] | |
let schemes = NSLinguisticTagger.availableTagSchemesForLanguage(language) | |
let tagger = NSLinguisticTagger(tagSchemes: schemes, options: Int(options.rawValue)) | |
tagger.string = text | |
let range = range ?? NSRange(location: 0, length: text.characters.count) | |
let scheme = NSLinguisticTagSchemeNameTypeOrLexicalClass | |
var tags = [LinguisticTag]() | |
tagger.enumerateTagsInRange(range, scheme: scheme, options: options) { tag, tokenRange, _, _ in | |
let token = text.substringWithRange(tokenRange) | |
tags.append(LinguisticTag(token: token, tag: tag)) | |
} | |
return tags | |
} | |
} | |
private extension String { | |
func substringWithRange(range: NSRange) -> String { | |
return string.substringWithRange(range) | |
} | |
func linguisticTagsInRange(range: NSRange, scheme tagScheme: String, options opts: NSLinguisticTaggerOptions) -> [String] { | |
return string.linguisticTagsInRange(range, scheme: tagScheme, options: opts, orthography: nil, tokenRanges: nil) | |
} | |
private var string: NSString { | |
return self as NSString | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Linguistic.tags("Mary Jane, please, could you tell me if that bus is going to Los Angeles or to San Francisco?") | |
// [("Mary Jane", "PersonalName"), ("please", "Interjection"), | |
// ("could", "Verb"), ("you", "Pronoun"), | |
// ("tell", "Verb"), ("me", "Pronoun"), | |
// ("if", "Preposition"), ("that", "Determiner"), | |
// ("bus", "Noun"), ("is", "Verb"), ("going", "Verb"), | |
// ("to", "Preposition"), ("Los Angeles", "PlaceName"), | |
// ("or", "Conjunction"), ("to", "Preposition"), ("San Francisco", "PlaceName")] | |
Linguistic.tags("What is the weather like today in San Francisco?") | |
// [("What", "Pronoun"), ("is", "Verb"), ("the", "Determiner"), ("weather", "Noun"), ("like", "Preposition"), ("today", "Noun"), ("in", "Preposition"), ("San Francisco", "PlaceName")] | |
Linguistic.tags("What is the weather in San Francisco?", range: NSRange(location: 5, length: 2)) | |
// [("is": "Verb")] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Places names | |
let tags = Linguistic.tags("Mary Jane, please, could you tell me if that bus is going to Los Angeles or to San Francisco?") | |
let places = tags.filter { $0.tag == "PlaceName" }.map { $0.token } | |
// ["Los Angeles", "San Francisco"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment