Skip to content

Instantly share code, notes, and snippets.

Last active May 31, 2018 22:17
Show Gist options
  • Save acotilla91/1a0022becec5a5296ef874fc9aad95de to your computer and use it in GitHub Desktop.
Save acotilla91/1a0022becec5a5296ef874fc9aad95de to your computer and use it in GitHub Desktop.
Utility class to communicate with Google Cloud Text-to-Speech API in Swift.
import UIKit
import AVFoundation
enum VoiceType: String {
case undefined
case waveNetFemale = "en-US-Wavenet-F"
case waveNetMale = "en-US-Wavenet-D"
case standardFemale = "en-US-Standard-E"
case standardMale = "en-US-Standard-D"
let ttsAPIUrl = ""
let APIKey = "<YOUR_API_KEY>"
class SpeechService: NSObject, AVAudioPlayerDelegate {
static let shared = SpeechService()
private(set) var busy: Bool = false
private var player: AVAudioPlayer?
private var completionHandler: (() -> Void)?
func speak(text: String, voiceType: VoiceType = .waveNetFemale, completion: @escaping () -> Void) {
guard !self.busy else {
print("Speech Service busy!")
self.busy = true .background).async {
let postData = self.buildPostData(text: text, voiceType: voiceType)
let headers = ["X-Goog-Api-Key": APIKey, "Content-Type": "application/json; charset=utf-8"]
let response = self.makePOSTRequest(url: ttsAPIUrl, postData: postData, headers: headers)
// Get the `audioContent` (as a base64 encoded string) from the response.
guard let audioContent = response["audioContent"] as? String else {
print("Invalid response: \(response)")
self.busy = false
DispatchQueue.main.async {
// Decode the base64 string into a Data object
guard let audioData = Data(base64Encoded: audioContent) else {
self.busy = false
DispatchQueue.main.async {
DispatchQueue.main.async {
self.completionHandler = completion
self.player = try! AVAudioPlayer(data: audioData)
self.player?.delegate = self
private func buildPostData(text: String, voiceType: VoiceType) -> Data {
var voiceParams: [String: Any] = [
// All available voices here:
"languageCode": "en-US"
if voiceType != .undefined {
voiceParams["name"] = voiceType.rawValue
let params: [String: Any] = [
"input": [
"text": text
"voice": voiceParams,
"audioConfig": [
// All available formats here:
"audioEncoding": "LINEAR16"
// Convert the Dictionary to Data
let data = try! params)
return data
// Just a function that makes a POST request.
private func makePOSTRequest(url: String, postData: Data, headers: [String: String] = [:]) -> [String: AnyObject] {
var dict: [String: AnyObject] = [:]
var request = URLRequest(url: URL(string: url)!)
request.httpMethod = "POST"
request.httpBody = postData
for header in headers {
request.addValue(header.value, forHTTPHeaderField: header.key)
// Using semaphore to make request synchronous
let semaphore = DispatchSemaphore(value: 0)
let task = URLSession.shared.dataTask(with: request) { data, response, error in
if let data = data, let json = try? JSONSerialization.jsonObject(with: data, options: []) as? [String: AnyObject] {
dict = json!
_ = semaphore.wait(timeout: DispatchTime.distantFuture)
return dict
// Implement AVAudioPlayerDelegate "did finish" callback to cleanup and notify listener of completion.
func audioPlayerDidFinishPlaying(_ player: AVAudioPlayer, successfully flag: Bool) {
self.player?.delegate = nil
self.player = nil
self.busy = false
self.completionHandler = nil
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment