Library that we created: speech-to-text.min.js/speech-to-text.js
. It's not open source. Its a wrapper that unify all APIs from different speech recognition services into one API.
- If we gonna use azure/google we need to load some dependencies
<!-- For Azure -->
<script src="https://speech-poc.awsext.berlitz.net/lib/microsoft.cognitiveservices.speech.sdk.bundle.js"></script>
<!-- For Google -->
<script src="https://speech-poc.awsext.berlitz.net/lib/socket.js"></script>
- Load our library and create a container:
<script src="https://speech-poc.awsext.berlitz.net/lib/speech-to-text.min.js"></script>
var speechToText = new SpeechToText.Container;
Container not functional yet, we need to add speech services that we gonna use (supported azure, google, browser for now). Wrappers for this services implemented by library.
- Add speech service (one or all of them):
// Browser API service (chrome only):
speechToText.addService('browser', new SpeechToText.BrowserApiService({
lang: 'en-US',
continuous: true,
interimResults: true
}));
// Azure service:
speechToText.addService('azure', new SpeechToText.AzureService({
key: '45c5dfed064c4014860778115720b932', // or token: <token> (token need to be generated on backend service)
region: 'westus',
language: 'en-US'
}));
// Google service
speechToText.addService('google', new SpeechToText.GoogleService({
socketUrl: 'https://speech-poc.awsext.berlitz.net:3556' // backend need to be implemented
}));
For example, if we use only azure, then only azure service need to be added.
- Set service to use or services priority (services priority set as current service first available by order):
speechToText.setServicePriority(['azure', 'browser', 'google']);
//or
speechToText.setCurrentService('azure');
- Now container is functional and can be used for speech recognition:
try {
speechToText.init(); // check microphone, throw exception if none of services works
} catch (e) {
console.log(e);
}
// speech recognition results callback
speechToText.onResults((text, isCorrect, isCorrectByAlternative, config, diffElement) => {
// do something with results
});
speechToText.onStopRecognition((config) => {
// callback that always fired, even if nothing is sayed to microphone
});
// Start recognition, this method can be attached to button onclick event
speechToText.startRecognition({answer: 'the man is holding a briefcase', alternatives: ['hello'], maxListenTime: 10});
- Keys 'azure', 'google', 'browser' is custom string, so we can add multiple instances of one service with different configs, for example:
speechToText.addService('azure-1', new SpeechToText.AzureService({
key: '45c5dfed064c4014860778115720b932',
region: 'westus',
language: 'en-US'
}));
speechToText.addService('azure-2', new SpeechToText.AzureService({
key: 'some-other-key',
region: 'westus',
language: 'pt'
}));