Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Create your own Siri in Swift | Lil ‘Bits | https://www.youtube.com/watch?v=Sigl3dihEB8
import UIKit
import Speech
class SiriViewController: UIViewController {
private static let locale = Locale(identifier: "es-ES")
private let speechRecognizer = SFSpeechRecognizer(locale: SiriViewController.locale)!
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private let audioEngine = AVAudioEngine()
@IBOutlet var label : UILabel!
@IBOutlet var recordButton : UIButton!
override public func viewWillAppear(_ animated: Bool) {
super.viewWillAppear(animated)
SFSpeechRecognizer.requestAuthorization { status in
if status == .authorized {
OperationQueue.main.addOperation {
self.recordButtonTapped()
}
}
}
}
private func startRecording() throws {
if let recognitionTask = recognitionTask {
recognitionTask.cancel()
self.recognitionTask = nil
}
try setAudioSessionCategory(AVAudioSessionCategoryRecord)
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let recognitionRequest = recognitionRequest else {
return
}
recognitionRequest.shouldReportPartialResults = true
recognitionRequest.taskHint = .dictation
let inputNode = audioEngine.inputNode
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
var isFinal = false
var transcription = ""
if let result = result {
isFinal = result.isFinal
transcription = result.bestTranscription.formattedString
self.label.attributedText = self.tag(transcription)
}
if error != nil || isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
try? self.read(transcription)
self.recordButton.setTitle("¡Oye Siri!", for: [])
}
}
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
try audioEngine.start()
label.text = "Cuéntame..."
}
func setAudioSessionCategory(_ category: String) throws {
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(category)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
}
@IBAction func recordButtonTapped() {
if audioEngine.isRunning {
audioEngine.stop()
recognitionRequest?.endAudio()
} else {
try? startRecording()
recordButton.setTitle("¿Que he dicho?", for: [])
}
}
func tag(_ text: String) -> NSAttributedString {
let tagger = NSLinguisticTagger(tagSchemes: [.lemma, .nameTypeOrLexicalClass], options: 0)
tagger.string = text
let range = NSRange(location: 0, length: text.utf16.count)
var words = [String]()
var bagOfWords = [String: CGFloat]()
var lemmas = [String: String]()
tagger.enumerateTags(in: range, unit: .word, scheme: .lemma, options: []) { tag, tokenRange, _ in
let word = (text as NSString).substring(with: tokenRange)
words.append(word)
lemmas[word] = tag?.rawValue
if bagOfWords[word] != nil {
bagOfWords[word]! += 1
} else {
bagOfWords[word] = 1
}
}
var colors = [String: UIColor]()
tagger.enumerateTags(in: range, unit: .word, scheme: .nameTypeOrLexicalClass, options: []) { tag, tokenRange, _ in
let word = (text as NSString).substring(with: tokenRange)
colors[word] = tag?.colorValue
}
return compose(words, bagOfWords: bagOfWords, lemmas: lemmas, colors: colors)
}
private func compose(_ words: [String], bagOfWords: [String: CGFloat], lemmas: [String: String], colors: [String: UIColor]) -> NSAttributedString {
let result = NSMutableAttributedString()
words.forEach { word in
let fontSize = 15 + (bagOfWords[word] ?? 0)
let attributedWord = NSAttributedString(string: lemmas[word] ?? word,
attributes: [.foregroundColor : colors[word] ?? .black,
.font: UIFont.systemFont(ofSize: fontSize)])
result.append(attributedWord)
}
return result
}
func read(_ text: String) throws {
try setAudioSessionCategory(AVAudioSessionCategoryPlayback)
let speechUtterance = AVSpeechUtterance(string: text)
speechUtterance.voice = AVSpeechSynthesisVoice(language: SiriViewController.locale.identifier)
let speechSynthesizer = AVSpeechSynthesizer()
speechSynthesizer.speak(speechUtterance)
}
}
extension NSLinguisticTag {
public var colorValue: UIColor {
switch self {
case .noun: return .red
case .verb: return .green
case .adjective: return .blue
case .adverb: return .cyan
case .pronoun: return .yellow
case .determiner: return .magenta
case .particle: return .gray
case .preposition: return .darkGray
case .number: return .lightGray
case .personalName: return .orange
case .placeName: return .purple
case .organizationName: return .brown
default: return .black
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.