Instantly share code, notes, and snippets.

Embed
What would you like to do?
Project Oxford Speech APIs Node.js Sample
var fs = require('fs');
var util = require('util');
var request = require('request');
var clientId = 'test-app'; // Can be anything
var clientSecret = 'f6f0bfec08274b8790520a9079b808af'; // API key from Azure marketplace
var str = 'This is a cool demo to call Microsoft text to speach service in Node.js.';
console.log('Converting from text -> speech -> text.');
console.log('Input text: "' + str + '"');
getAccessToken(clientId, clientSecret, function(err, accessToken) {
if(err) return console.log(err);
console.log('Got access token: ' + accessToken)
textToSpeech(str, 'test.wav', accessToken, function(err) {
if(err) return console.log(err);
console.log('Wrote out: ' + 'test.wav');
speechToText('test.wav', accessToken, function(err, res) {
if(err) return console.log(err);
console.log('Confidence ' + res.results[0].confidence + ' for: "' + res.results[0].lexical + '"');
});
});
})
// ==== Helpers ====
function getAccessToken(clientId, clientSecret, callback) {
request.post({
url: 'https://oxford-speech.cloudapp.net/token/issueToken',
form: {
'grant_type': 'client_credentials',
'client_id': encodeURIComponent(clientId),
'client_secret': encodeURIComponent(clientSecret),
'scope': 'https://speech.platform.bing.com'
}
}, function(err, resp, body) {
if(err) return callback(err);
try {
var accessToken = JSON.parse(body).access_token;
if(accessToken) {
callback(null, accessToken);
} else {
callback(body);
}
} catch(e) {
callback(e);
}
});
}
function textToSpeech(text, filename, accessToken, callback) {
var ssmlTemplate = "<speak version='1.0' xml:lang='en-us'><voice xml:lang='%s' xml:gender='%s' name='%s'>%s</voice></speak>";
request.post({
url: 'http://speech.platform.bing.com/synthesize',
body: util.format(ssmlTemplate, 'en-US', 'Female', 'Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)', text),
encoding: null,
headers: {
'Authorization': 'Bearer ' + accessToken,
'Content-Type' : 'application/ssml+xml',
'X-Microsoft-OutputFormat' : 'riff-16khz-16bit-mono-pcm',
'X-Search-AppId': '07D3234E49CE426DAA29772419F436CA',
'X-Search-ClientID': '1ECFAE91408841A480F00935DC390960',
}
}, function(err, resp, body) {
if(err) return callback(err);
fs.writeFile(filename, body, 'binary', function (err) {
if (err) return callback(err);
callback(null);
});
});
}
function speechToText(filename, accessToken, callback) {
fs.readFile(filename, function(err, waveData) {
if(err) return callback(err);
request.post({
url: 'https://speech.platform.bing.com/recognize/query',
qs: {
'scenarios': 'ulm',
'appid': 'D4D52672-91D7-4C74-8AD8-42B1D98141A5', // This magic value is required
'locale': 'en-US',
'device.os': 'wp7',
'version': '3.0',
'format': 'json',
'requestid': '1d4b6030-9099-11e0-91e4-0800200c9a66', // can be anything
'instanceid': '1d4b6030-9099-11e0-91e4-0800200c9a66' // can be anything
},
body: waveData,
headers: {
'Authorization': 'Bearer ' + accessToken,
'Content-Type': 'audio/wav; samplerate=16000',
'Content-Length' : waveData.length
}
}, function(err, resp, body) {
if(err) return callback(err);
try {
callback(null, JSON.parse(body));
} catch(e) {
callback(e);
}
});
});
}
@rtomasa

This comment has been minimized.

rtomasa commented Nov 2, 2015

Hi Luke,

I'm playing around with this script and testing text to speech but I always get a test.wav file of 1KB size. I tried to change url: 'http://speech.platform.bing.com/synthesize' to it's https version (not sure if this was a tipo in your script or made on purpose) but now it generates a test.wav file of 0KB.
It is not raising errors, so any idea what could be the problem here?

Kind regards

@Jaykah

This comment has been minimized.

Jaykah commented Dec 17, 2015

@rtomasa were you able to figure it out?

@gcrev93

This comment has been minimized.

gcrev93 commented Jan 4, 2016

am i the only one having issues with the wav file?

@JpEncausse

This comment has been minimized.

JpEncausse commented Feb 24, 2016

Hi ! Where do you find client Secret ?
It seems since Oxford API update, the site provide a Primary and Secondary key

@qanuj

This comment has been minimized.

qanuj commented Apr 8, 2016

Throws error now.

Our services aren't available right now

We're working to restore all services as soon as possible. Please check back soon.

Ref A: F2FFBD09FD1B45B2A1689D228D805100 Ref B: 8D1DAD3D62430DA3AE2D6DAF1F47F3E6 Ref C: Fri Apr 08 04:52:25 2016 PST
@lv-alex

This comment has been minimized.

lv-alex commented Apr 15, 2016

Bing speech API - same error

@ovrmrw

This comment has been minimized.

ovrmrw commented Apr 16, 2016

the body of created test.wav is "

Our services aren't available right now

We're working to restore all services as soon as possible. Please check back soon.

Ref A: 63E4B5D206F145EB87505A4D9945CC6A Ref B: 8699883F3C81321F54B2BE5A6EF4FC9D Ref C: Sat Apr 16 07:30:09 2016 PST".

It seems that everyone has the same error...

@RalphRe

This comment has been minimized.

RalphRe commented Apr 20, 2016

Problem with the text to speech code above is, that it is missing the User-Agent header.

It works for me as shown below with the User-Agent header set to some string (only required for the textToSpeech function).

`
headers: {
'X-Microsoft-OutputFormat' : 'riff-16khz-16bit-mono-pcm',
'Authorization': 'Bearer ' + accessToken,
'Content-Type' : 'application/ssml+xml',
'User-Agent': 'TTSWithNode' // or whatever else

  //'X-Search-AppId': '07D3234E49CE426DAA29772419F436CA',
  //'X-Search-ClientID': '1ECFAE91408841A480F00935DC390960',
}

`

@ghost

This comment has been minimized.

ghost commented May 2, 2016

This STT API still working??

@ericbolo

This comment has been minimized.

ericbolo commented May 14, 2016

Also getting "Services not available right now". Any idea why?

@myadhdoutlet

This comment has been minimized.

myadhdoutlet commented May 15, 2016

Hi Luke. Three things:

(1) It's not a good idea to post code with your clientSecret. You might want to remove it.
(2) Have you tried Microsoft's sample JS code on github? https://github.com/Microsoft/ProjectOxford-ClientSDK/tree/master/Speech/TextToSpeech/Samples-Http/NodeJS — all that is missing is a little code to write the data to disk (which you already have)
(3) The Microsoft sample code works for me, except that it seems to produces a wav file with a lot of static in the playback. Would love to hear from any of you that have the same issue or have found a workable solution. After messing with ffmpeg and sox, trying to figure it out, I am baffled.

@philmccarty

This comment has been minimized.

philmccarty commented Jul 4, 2016

I receive the "Services Not Working" error if I use the http: endpoint instead of https:, I'm still not getting successful responses, but I'm not getting That particular error.

@Ctvidyak

This comment has been minimized.

Ctvidyak commented Jul 5, 2016

I am also getting service not available .Anyone found resolution?. same error with micorsoft's sample code as well.

@o3dwade

This comment has been minimized.

o3dwade commented Oct 5, 2016

I dont get the problem when I test it on Postman. But I get the problem everywhere else lol

@palmerabollo

This comment has been minimized.

palmerabollo commented Oct 9, 2016

@myadhdoutlet did you fix the issue with the static? I had the same issue and I fixed it adding encoding: null to the request. This way you get the response as a Buffer. See https://github.com/palmerabollo/bingspeech-api-client/blob/master/src/client.ts#L102

@miparnisari

This comment has been minimized.

miparnisari commented Apr 6, 2017

@lukehoban From where did you get 'X-Search-AppId': '07D3234E49CE426DAA29772419F436CA' and 'X-Search-ClientID': '1ECFAE91408841A480F00935DC390960'?

@JBX028

This comment has been minimized.

JBX028 commented May 12, 2017

Hi,
The voice recognition process works fine but only if my sentence contains at least 5 or 6 syllables. Is there any limitation that prevent shortest sentences to be recognized?
Thanks

@beilusen

This comment has been minimized.

beilusen commented Jun 5, 2017

From where did you get 'X-Search-AppId': '' and 'X-Search-ClientID': ''?

@beilusen

This comment has been minimized.

beilusen commented Jun 5, 2017

@miparnisari hello,From where did you get 'X-Search-AppId': '' and 'X-Search-ClientID': ''?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment