Skip to content

Instantly share code, notes, and snippets.

@tonyhallett
Created February 6, 2023 13:59
Show Gist options
  • Save tonyhallett/5b068572a5638975122b003b575812b8 to your computer and use it in GitHub Desktop.
Save tonyhallett/5b068572a5638975122b003b575812b8 to your computer and use it in GitHub Desktop.
Instructions and code for programmatic Alexa Voice and Tone with saving of mp3.

Intro

The alexa developer console has a test section with Voice & Tone where you can provide Alex SSML, select a skill locale and hear the audio response.

There is no API available but using Chrome Developer Tools it is possible to create a javascript snippet to generate multiple SSML MP3s.

The only requirement is to add a debug statement in the page source and make a function global.

To find the js.

  1. Add some SSML and press Play.
  2. Check the Network tab and select Fetch/XHR with name getTTS.
  3. Use the Initiator tab to find e.getTextToSpeech and click the link to open the js.
  4. Pretty print.
  5. Find and add a debug statement to the line t.getAasInstance = p("/alexa/console/ask/displays")
  6. Reload the page
  7. In the console window.getDefaultAssClient = t.getDefaultInstance

SSML to MP3 snippet

Use ssmlToAudioFile from the code below adding your skillId.

Don't debug or the file picker will not work.

If performing multiple requests with the snippet be sure to wait between requests.

{
    const skillId = "amzn1.ask.skill.YOUR_SKILL_ID";
    
    // when sending multiple requests a wait is necessary
    function delay(time) {
        return new Promise(resolve => setTimeout(resolve, time));
    }

    function wait(){
      return delay(1000);
    }
          
    // uses the function that made global ( and some of the internal Alexa dev console js)
    async function getSSMLBlob(ssml,locale){
        var r = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
        function removePaddingChars(e) {
            return 64 === r.indexOf(e.charAt(e.length - 1)) ? e.substring(0, e.length - 1) : e
        }
              
        function decode(e, n) {
            e = removePaddingChars(e);
            var o, i, s, a, l, u, c, d = (e = removePaddingChars(e)).length / 4 * 3, p = 0, f = 0;
            for (o = n ? new Uint8Array(n) : new Uint8Array(d),
            e = e.replace(/[^A-Za-z0-9\+\/\=]/g, ""),
            p = 0; p < d; p += 3)
                i = r.indexOf(e.charAt(f++)) << 2 | (l = r.indexOf(e.charAt(f++))) >> 4,
                s = (15 & l) << 4 | (u = r.indexOf(e.charAt(f++))) >> 2,
                a = (3 & u) << 6 | (c = r.indexOf(e.charAt(f++))),
                o[p] = i,
                64 !== u && (o[p + 1] = s),
                64 !== c && (o[p + 2] = a);
            return o
        }
        
        function decodeAsArrayBuffer(e) {
            var n = e.length / 4 * 3
            , r = new ArrayBuffer(n);
            return decode(e, r)
        }

        locale = {
            value:locale
        }
        
        const res = await window.getDefaultAssClient.getTextToSpeech(ssml, "SSML", skillId,locale);
        const audioBase64str = res[0];
        const decoded = decodeAsArrayBuffer(audioBase64str);
        return new Blob([new Uint8Array(decoded)],{
                                type: "audio/mp3"
        });
    }
      
    async function writeFile(fileHandle, contents) {
        const writable = await fileHandle.createWritable();
        await writable.write(contents);
        await writable.close();
    }
  
    async function getMp3FileHandle(suggestedName) {
        const options = {
        suggestedName:suggestedName,
        types: [
            {
            description: 'SSML mp3',
            accept: {
                'audio/mpeg': ['.mp3'],
            },
            },
        ],
        };
        const handle = await window.showSaveFilePicker(options);
        return handle;
    }
  
    async function writeMp3(data,suggestedName){
        const fileHandle = await getMp3FileHandle(suggestedName);
        await writeFile(fileHandle,data);
    }
  
    // *** The function to call - be sure to wait if sending multiple requests
    // locale corresponds to the drop down - e.g en-GB
    async function ssmlToAudioFile(ssml,locale, suggestedName){
        const blob = await getSSMLBlob(ssml,locale);
        await writeMp3(blob,suggestedName);
    }
  
    // helper ssml functions
  
  
    function speak(contents){
        return `<speak>${contents}</speak>`;
    }
  
    function language(lang,contents){
        return `<lang xml:lang="${lang}">${contents}</lang>`
    }

    function voice(voiceName, contents){
        return `<voice name="${voiceName}">${contents}</voice>`;
    }
  
    function voiceInLanguage(voiceName,lang,contents){
        return voice(voiceName, language(lang,contents));
    }
  
    function amazonDomain(domain,contents){
        return `<amazon:domain name="${domain}">${contents}</amazon:domain>`;
    }

    function conversational(contents){
        return amazonDomain("conversational",contents);
    }

    function news(contents){
        return amazonDomain("news",contents);
    }
  
  
    (async () => {
        const speech1 = speak("In the default voice of the skill locale.");
        await ssmlToAudioFile(speech1,"en-US","filename1");
        await wait();
        const speech2 = speak(voice("Amy","In Amy's voice"));
        await ssmlToAudioFile(speech2,"en-GB","filename2");
    })();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment