mrdavey/NSLinguisticTagger-iOS11.playground

## NSLinguisticTagger-iOS11.playground
//: Natural Language Processing in iOS 11, with the help of this post: https://medium.com/swiftworld/swift-world-whats-new-in-ios-11-natural-language-processing-2a16b7422334
// This can only be run in Xcode 9

import UIKit

let options: NSLinguisticTagger.Options = [.omitPunctuation, .omitWhitespace]
let text = "Silicon Valley is a nickname for the southern portion of the San Francisco Bay Area, in the northern part of the U.S. state of California. In 2014, tech companies Google, Yahoo!, Facebook, Apple, and others, released corporate transparency reports that offered detailed employee breakdowns. Let's go running or walking to the shops."
let range = NSRange(location: 0, length: text.utf16.count)

//
// Language Identification
//
// Identify the dominant language in a text
//

//let textZh = "WWDC 2017 已经结束了。"
//let taggerLanguage = NSLinguisticTagger(tagSchemes: [.language], options: 0)
//taggerLanguage.string = textZh
//
//if let language = taggerLanguage.dominantLanguage {
//    print(language)
//} else {
//    print("can't get dominant language")
//}


//
// Tokenization
//
// Tokenization is the process of demarcating and possibly classifying sections of a string of input characters.
// The resulting tokens are then passed on to some other form of processing. The process can be considered a
// sub-task of parsing input.
//

//let taggerToken = NSLinguisticTagger(tagSchemes: [.tokenType], options: 0)
//taggerToken.string = text
//taggerToken.enumerateTags(in: range, unit: .word, scheme: .tokenType, options: options) { tag, tokenRange, stop in
//    let token = (text as NSString).substring(with: tokenRange)
//    print("\(tag!.rawValue): \(token)")
//}


//
// Lemmatization
//
// Lemmatisation (or lemmatization) in linguistics is the process of grouping together the inflected forms of a word
// so they can be analysed as a single item, identified by the word's lemma, or dictionary form.
// E.g. 'walk', 'walked', 'walks', 'walking' all have a lemma of 'walk'
//

//let taggerLemma = NSLinguisticTagger(tagSchemes: [.lemma], options: 0)
//taggerLemma.string = text
//
//taggerLemma.enumerateTags(in: range, unit: .word, scheme: .lemma, options: options) { tag, tokenRange, stop in
//    if let lemma = tag?.rawValue { print(lemma) }
//}


//
// Name Type
//
// Named-entity recognition (NER) (also known as entity identification, entity chunking and entity extraction) is a subtask
// of information extraction that seeks to locate and classify named entities in text into pre-defined categories such as
// the names of persons, organizations, locations, expressions of times, quantities, monetary values, percentages, etc.
//

//let taggerNameType = NSLinguisticTagger(tagSchemes: [.nameType], options: 0)
//taggerNameType.string = text
//let tags: [NSLinguisticTag] = [.personalName, .placeName, .organizationName]
//
//taggerNameType.enumerateTags(in: range, unit: .word, scheme: .nameType, options: options) { tag, tokenRange, stop in
//    if let tag = tag, tags.contains(tag) {
//        let name = (text as NSString).substring(with: tokenRange)
//        print(name)
//    }
//}


//
// Lexical Class
//
// In grammar, a lexical category (also word class, lexical class, or in traditional grammar part of speech) is a linguistic
// category of words (or more precisely lexical items), which is generally defined by the syntactic or morphological behaviour
// of the lexical item in question. Common linguistic categories include noun and verb, among others.
//

let taggerLexical = NSLinguisticTagger(tagSchemes: [.lexicalClass], options: 0)
taggerLexical.string = text

taggerLexical.enumerateTags(in: range, unit: .word, scheme: .lexicalClass, options: options) { tag, tokenRange, stop in
    let word = (text as NSString).substring(with: tokenRange)
    print("\(tag!.rawValue): \(word)")
}
	//: Natural Language Processing in iOS 11, with the help of this post: https://medium.com/swiftworld/swift-world-whats-new-in-ios-11-natural-language-processing-2a16b7422334
	// This can only be run in Xcode 9

	import UIKit

	let options: NSLinguisticTagger.Options = [.omitPunctuation, .omitWhitespace]
	let text = "Silicon Valley is a nickname for the southern portion of the San Francisco Bay Area, in the northern part of the U.S. state of California. In 2014, tech companies Google, Yahoo!, Facebook, Apple, and others, released corporate transparency reports that offered detailed employee breakdowns. Let's go running or walking to the shops."
	let range = NSRange(location: 0, length: text.utf16.count)

	//
	// Language Identification
	//
	// Identify the dominant language in a text
	//

	//let textZh = "WWDC 2017 已经结束了。"
	//let taggerLanguage = NSLinguisticTagger(tagSchemes: [.language], options: 0)
	//taggerLanguage.string = textZh
	//
	//if let language = taggerLanguage.dominantLanguage {
	// print(language)
	//} else {
	// print("can't get dominant language")
	//}



	//
	// Tokenization
	//
	// Tokenization is the process of demarcating and possibly classifying sections of a string of input characters.
	// The resulting tokens are then passed on to some other form of processing. The process can be considered a
	// sub-task of parsing input.
	//

	//let taggerToken = NSLinguisticTagger(tagSchemes: [.tokenType], options: 0)
	//taggerToken.string = text
	//taggerToken.enumerateTags(in: range, unit: .word, scheme: .tokenType, options: options) { tag, tokenRange, stop in
	// let token = (text as NSString).substring(with: tokenRange)
	// print("\(tag!.rawValue): \(token)")
	//}



	//
	// Lemmatization
	//
	// Lemmatisation (or lemmatization) in linguistics is the process of grouping together the inflected forms of a word
	// so they can be analysed as a single item, identified by the word's lemma, or dictionary form.
	// E.g. 'walk', 'walked', 'walks', 'walking' all have a lemma of 'walk'
	//

	//let taggerLemma = NSLinguisticTagger(tagSchemes: [.lemma], options: 0)
	//taggerLemma.string = text
	//
	//taggerLemma.enumerateTags(in: range, unit: .word, scheme: .lemma, options: options) { tag, tokenRange, stop in
	// if let lemma = tag?.rawValue { print(lemma) }
	//}



	//
	// Name Type
	//
	// Named-entity recognition (NER) (also known as entity identification, entity chunking and entity extraction) is a subtask
	// of information extraction that seeks to locate and classify named entities in text into pre-defined categories such as
	// the names of persons, organizations, locations, expressions of times, quantities, monetary values, percentages, etc.
	//

	//let taggerNameType = NSLinguisticTagger(tagSchemes: [.nameType], options: 0)
	//taggerNameType.string = text
	//let tags: [NSLinguisticTag] = [.personalName, .placeName, .organizationName]
	//
	//taggerNameType.enumerateTags(in: range, unit: .word, scheme: .nameType, options: options) { tag, tokenRange, stop in
	// if let tag = tag, tags.contains(tag) {
	// let name = (text as NSString).substring(with: tokenRange)
	// print(name)
	// }
	//}



	//
	// Lexical Class
	//
	// In grammar, a lexical category (also word class, lexical class, or in traditional grammar part of speech) is a linguistic
	// category of words (or more precisely lexical items), which is generally defined by the syntactic or morphological behaviour
	// of the lexical item in question. Common linguistic categories include noun and verb, among others.
	//

	let taggerLexical = NSLinguisticTagger(tagSchemes: [.lexicalClass], options: 0)
	taggerLexical.string = text

	taggerLexical.enumerateTags(in: range, unit: .word, scheme: .lexicalClass, options: options) { tag, tokenRange, stop in
	let word = (text as NSString).substring(with: tokenRange)
	print("\(tag!.rawValue): \(word)")
	}