Last active November 29, 2018 16:32
Create your own Siri in Swift
import UIKit
import Speech
class SiriViewController: UIViewController {
private static let locale = Locale(identifier: "es-ES")
private let speechRecognizer = SFSpeechRecognizer(locale: SiriViewController.locale)!
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private let audioEngine = AVAudioEngine()
@IBOutlet var label : UILabel!
@IBOutlet var recordButton : UIButton!
override public func viewWillAppear(_ animated: Bool) {
SFSpeechRecognizer.requestAuthorization { status in
if status == .authorized {
OperationQueue.main.addOperation {
private func startRecording() throws {
if let recognitionTask = recognitionTask {
self.recognitionTask = nil
try setAudioSessionCategory(AVAudioSessionCategoryRecord)
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let recognitionRequest = recognitionRequest else {
recognitionRequest.shouldReportPartialResults = true
recognitionRequest.taskHint = .dictation
let inputNode = audioEngine.inputNode
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
var isFinal = false
var transcription = ""
if let result = result {
isFinal = result.isFinal
transcription = result.bestTranscription.formattedString
self.label.attributedText = self.tag(transcription)
if error != nil || isFinal {
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
self.recordButton.setTitle("¡Oye Siri!", for: [])
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
try audioEngine.start()
label.text = "Cuéntame..."
func setAudioSessionCategory(_ category: String) throws {
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(category)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
@IBAction func recordButtonTapped() {
if audioEngine.isRunning {
} else {
try? startRecording()
recordButton.setTitle("¿Que he dicho?", for: [])
func tag(_ text: String) -> NSAttributedString {
let tagger = NSLinguisticTagger(tagSchemes: [.lemma, .nameTypeOrLexicalClass], options: 0)
tagger.string = text
let range = NSRange(location: 0, length: text.utf16.count)
var words = [String]()
var bagOfWords = [String: CGFloat]()
var lemmas = [String: String]()
tagger.enumerateTags(in: range, unit: .word, scheme: .lemma, options: []) { tag, tokenRange, _ in
let word = (text as NSString).substring(with: tokenRange)
lemmas[word] = tag?.rawValue
if bagOfWords[word] != nil {
bagOfWords[word]! += 1
} else {
bagOfWords[word] = 1
var colors = [String: UIColor]()
tagger.enumerateTags(in: range, unit: .word, scheme: .nameTypeOrLexicalClass, options: []) { tag, tokenRange, _ in
let word = (text as NSString).substring(with: tokenRange)
colors[word] = tag?.colorValue
return compose(words, bagOfWords: bagOfWords, lemmas: lemmas, colors: colors)
private func compose(_ words: [String], bagOfWords: [String: CGFloat], lemmas: [String: String], colors: [String: UIColor]) -> NSAttributedString {
let result = NSMutableAttributedString()
words.forEach { word in
let fontSize = 15 + (bagOfWords[word] ?? 0)
let attributedWord = NSAttributedString(string: lemmas[word] ?? word,
attributes: [.foregroundColor : colors[word] ?? .black,
.font: UIFont.systemFont(ofSize: fontSize)])
return result
func read(_ text: String) throws {
try setAudioSessionCategory(AVAudioSessionCategoryPlayback)
let speechUtterance = AVSpeechUtterance(string: text)
speechUtterance.voice = AVSpeechSynthesisVoice(language: SiriViewController.locale.identifier)
let speechSynthesizer = AVSpeechSynthesizer()
extension NSLinguisticTag {
public var colorValue: UIColor {
switch self {
case .noun: return .red
case .verb: return .green
case .adjective: return .blue
case .adverb: return .cyan
case .pronoun: return .yellow
case .determiner: return .magenta
case .particle: return .gray
case .preposition: return .darkGray
case .number: return .lightGray
case .personalName: return .orange
case .placeName: return .purple
case .organizationName: return .brown
default: return .black
