Created December 16, 2017 05:20
This GIST is for converting Speech To Text.
import Foundation
import Speech
protocol SpeechToTextDelegate {
func conversionGoingOn(_ convertedString: String)
func conversionFinished(_ convertedString: String)
class SpeechToText: NSObject, SFSpeechRecognizerDelegate {
// Creating singleton
static let sharedIntance = SpeechToText()
private override init() {
speechRecognizer?.delegate = self
var delegateSpeechToText: SpeechToTextDelegate?
// This is the speech recognizer
let speechRecognizer = SFSpeechRecognizer(locale: Locale.init(identifier: "en-US"))
// This is a Request
var regRequest: SFSpeechAudioBufferRecognitionRequest?
var regTask: SFSpeechRecognitionTask?
let avEngine = AVAudioEngine()
var speechText = String()
var timer: Timer?
// Method for stop recognition
func stopRecognition() {
avEngine.inputNode?.removeTap(onBus: 0)
speechText = ""
regRequest = nil
regTask = nil
// Method for starting recognition
func startRecognition() {
//Cancel task if already running
if regTask != nil {
regTask = nil
//Create and AVAudioSession for audio recording
let avAudioSession = AVAudioSession.sharedInstance()
do {
try avAudioSession.setCategory(AVAudioSessionCategoryRecord)
try avAudioSession.setMode(AVAudioSessionModeMeasurement)
try avAudioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("Audio Session is not active")
//Check the Audio input.
guard let inputEngineNode = avEngine.inputNode else {
fatalError("Some Error")
regRequest = SFSpeechAudioBufferRecognitionRequest()
guard let recognitionRequest = regRequest else {
fatalError("SFSpeechAudioBufferRecognitionRequest object is not created")
recognitionRequest.shouldReportPartialResults = true
//Start task of speech recognition
regTask = speechRecognizer?.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in
if result != nil {
DispatchQueue.main.async {
self.timer = nil
self.timer = Timer.scheduledTimer(timeInterval: 1, target: self, selector: #selector(SpeechToText.timerStopped), userInfo: nil, repeats: false)
self.speechText = (result?.bestTranscription.formattedString)!
//Set Formation of Audio Input
inputEngineNode.removeTap(onBus: 0)
let recordingFormat = inputEngineNode.outputFormat(forBus: 0)
inputEngineNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
do {
try avEngine.start()
} catch {
print("some error")
// When conversation stops
func timerStopped() {
