Skip to content

Instantly share code, notes, and snippets.

Created October 5, 2021 18:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mattetti/a8522cf365de509b7e10e82714636214 to your computer and use it in GitHub Desktop.
Save mattetti/a8522cf365de509b7e10e82714636214 to your computer and use it in GitHub Desktop.
Unity script to generate and load an audio clip using Azure Cognitive Services
using System.Collections;
using UnityEngine;
using UnityEngine.UI;
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using System.IO;
using UnityEngine.Networking;
using System.Threading.Tasks;
public class audioGenButton : MonoBehaviour
public Button SynthesizeButton;
public string SpeechServiceSubscriptionKey = "";
public string SpeechServiceRegion = "";
public bool IsWaitingForSynthesis = false;
public string GeneratedFileName = "azureSpeechSynth.wav";
public AudioSource audioSource;
private SpeechConfig config;
private AudioConfig audioConfig;
private string outputPath;
private string buttonText = "";
private AudioClip _audioClip;
public async void OnButtonPressed()
Debug.Log("Button clicked");
if (SynthesizeButton != null)
SynthesizeButton.interactable = false;
if (config is null)
outputPath = Path.Combine(Application.dataPath, GeneratedFileName);
config = SpeechConfig.FromSubscription(SpeechServiceSubscriptionKey, SpeechServiceRegion);
config.SpeechSynthesisLanguage = "en-US";
config.SpeechSynthesisVoiceName = "en-ZA-LukeNeural"; //"en-US-JennyMultilingualNeural";
audioConfig = AudioConfig.FromWavFileOutput(outputPath);
Debug.Log("will save the file to: " + outputPath);
var result = await GenerateTextAsync("We're all puppets April! I'm just a puppet who can see the strings.");
switch (result.Reason)
case ResultReason.NoMatch:
buttonText = "No Match";
case ResultReason.Canceled:
buttonText = "Canceled";
case ResultReason.SynthesizingAudio:
buttonText = "Synthesizing Audio";
case ResultReason.SynthesizingAudioCompleted:
buttonText = "Synthesizing Audio Completed";
case ResultReason.SynthesizingAudioStarted:
buttonText = "Synthesizing Audio Started";
if (result.Reason != ResultReason.SynthesizingAudioCompleted)
SynthesizeButton.interactable = false;
_audioClip = await GetAudioClip(outputPath, AudioType.WAV);
audioSource.PlayOneShot(_audioClip, 1.0f);
SynthesizeButton.interactable = true;
StartCoroutine(ResetButtonTextAfter(4.0f, "Regenerate Audio", SynthesizeButton));
private void setButtonText(string text)
if (SynthesizeButton is null)
Debug.Log("Button not found, can't set its text");
var textField = SynthesizeButton.GetComponentInChildren<Text>();
if (textField is null)
Debug.Log("Text not found");
textField.text = text;
private async Task<SpeechSynthesisResult> GenerateTextAsync(string text)
using var synthesizer = new SpeechSynthesizer(config, audioConfig);
SpeechSynthesisResult speechSynthesisResult = await synthesizer.SpeakTextAsync(text);
return speechSynthesisResult;
public async Task<AudioClip> GetAudioClip(string filePath, AudioType fileType)
using (UnityWebRequest www = UnityWebRequestMultimedia.GetAudioClip(filePath, fileType))
var result = www.SendWebRequest();
while (!result.isDone) { await Task.Delay(100); }
if (www.result == UnityWebRequest.Result.ConnectionError)
return null;
return DownloadHandlerAudioClip.GetContent(www);
IEnumerator ResetButtonTextAfter(float delayTime, string text, Button button)
//Wait for the specified delay time before continuing.
yield return new WaitForSeconds(delayTime);
var textField = button.GetComponentInChildren<Text>();
if (textField is null)
Debug.Log("Text not found");
textField.text = text;
Debug.Log("Button text updated");
// Start is called before the first frame update
void Start()
// register the click handler
// Update is called once per frame
void Update()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment