Skip to content

Instantly share code, notes, and snippets.

@savelee
Last active February 17, 2020 13:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save savelee/95b730d5674783aae9969ca6c00faec9 to your computer and use it in GitHub Desktop.
Save savelee/95b730d5674783aae9969ca6c00faec9 to your computer and use it in GitHub Desktop.
A best practice for streaming audio from a browser microphone to Dialogflow or Google Cloud STT by using websockets.
function playOutput(arrayBuffer){
let audioContext = new AudioContext();
let outputSource;
try {
if(arrayBuffer.byteLength > 0){
console.log(arrayBuffer.byteLength);
audioContext.decodeAudioData(arrayBuffer,
function(buffer){
audioContext.resume();
outputSource = audioContext.createBufferSource();
outputSource.connect(audioContext.destination);
outputSource.buffer = buffer;
outputSource.start(0);
},
function(){
console.log(arguments);
});
}
} catch(e) {
console.log(e);
}
}
this.sessionId = uuid.v4();
this.sessionClient = new df.SessionsClient();
this.sessionPath = this.sessionClient.sessionPath(this.projectId, this.sessionId);
this.request = {
session: this.sessionPath,
queryInput: {
text: {
languageCode: this.languageCode
}
}
}
/*
* Detect Intent based on Text String
* @param audio file buffer
* @param cb Callback function to execute with results
*/
async detectIntent(text: string){
this.request.queryInput.text.text = text;
const responses = await this.sessionClient.detectIntent(this.request);
return this.getHandleResponses(responses);
}
// Listener, once the client connect to the server socket
io.on('connect', (client) => {
console.log(`Client connected [id=${client.id}]`);
client.emit('server_setup', `Server connected [id=${client.id}]`);
// when the client sends 'stream' events
// when using audio streaming
ss(client).on('stream', function(stream, data) {
// get the name of the stream
const filename = path.basename(data.name);
// pipe the filename to the stream
stream.pipe(fs.createWriteStream(filename));
// make a detectIntStream call
detectIntentStream(stream, function(results){
console.log(results);
client.emit('results', results);
});
});
})
navigator.getUserMedia({
audio: true
}, function(stream) {
recordAudio = RecordRTC(stream, {
type: 'audio',
mimeType: 'audio/webm',
sampleRate: 44100,
desiredSampRate: 16000,
recorderType: StereoAudioRecorder,
numberOfAudioChannels: 1,
//1)
// get intervals based blobs
// value in milliseconds
// as you might not want to make detect calls every seconds
timeSlice: 4000,
//2)
// as soon as the stream is available
ondataavailable: function(blob) {
// 3
// making use of socket.io-stream for bi-directional
// streaming, create a stream
var stream = ss.createStream();
// stream directly to server
// it will be temp. stored locally
ss(socket).emit('stream', stream, {
name: 'stream.wav',
size: blob.size
});
// pipe the audio blob to the read stream
ss.createBlobReadStream(blob).pipe(stream);
}
});
}, function(error) {
console.error(JSON.stringify(error));
});
this.speechClient = new speech.SpeechClient();
// Create the initial request object
// When streaming, this is the first call you will
// make, a request without the audio stream
// which prepares Dialogflow in receiving audio
// with a certain sampleRateHerz, encoding and languageCode
// this needs to be in line with the audio settings
// that are set in the client
this.request = {
config: {
sampleRateHertz: 16000,
encoding: 'LINEAR16',
languageCode: languageCode
},
interimResults: true
}
async function transcribeAudio(audio){
this.request.audio = {
content: audio
};
const responses = await this.speechClient.recognize(this.request);
return responses;
}
this.ttsClient = new textToSpeech.TextToSpeechClient();
// Construct the request
this.request = {
// Select the language and SSML Voice Gender (optional)
voice: {
languageCode: 'en-US',
ssmlGender: 'NEUTRAL'
},
audioConfig: {
audioEncoding: 'LINEAR16', //'LINEAR16|MP3|AUDIO_ENCODING_UNSPECIFIED/OGG_OPUS'
}
};
async function textToAudioBuffer(text) {
this.request.input = { text: text }; // text or SSML
// Performs the Text-to-Speech request
const response = await this.ttsClient.synthesizeSpeech(this.request);
return response[0].audioContent;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment