Last active
February 17, 2020 13:46
-
-
Save savelee/95b730d5674783aae9969ca6c00faec9 to your computer and use it in GitHub Desktop.
A best practice for streaming audio from a browser microphone to Dialogflow or Google Cloud STT by using websockets.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function playOutput(arrayBuffer){ | |
let audioContext = new AudioContext(); | |
let outputSource; | |
try { | |
if(arrayBuffer.byteLength > 0){ | |
console.log(arrayBuffer.byteLength); | |
audioContext.decodeAudioData(arrayBuffer, | |
function(buffer){ | |
audioContext.resume(); | |
outputSource = audioContext.createBufferSource(); | |
outputSource.connect(audioContext.destination); | |
outputSource.buffer = buffer; | |
outputSource.start(0); | |
}, | |
function(){ | |
console.log(arguments); | |
}); | |
} | |
} catch(e) { | |
console.log(e); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
this.sessionId = uuid.v4(); | |
this.sessionClient = new df.SessionsClient(); | |
this.sessionPath = this.sessionClient.sessionPath(this.projectId, this.sessionId); | |
this.request = { | |
session: this.sessionPath, | |
queryInput: { | |
text: { | |
languageCode: this.languageCode | |
} | |
} | |
} | |
/* | |
* Detect Intent based on Text String | |
* @param audio file buffer | |
* @param cb Callback function to execute with results | |
*/ | |
async detectIntent(text: string){ | |
this.request.queryInput.text.text = text; | |
const responses = await this.sessionClient.detectIntent(this.request); | |
return this.getHandleResponses(responses); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Listener, once the client connect to the server socket | |
io.on('connect', (client) => { | |
console.log(`Client connected [id=${client.id}]`); | |
client.emit('server_setup', `Server connected [id=${client.id}]`); | |
// when the client sends 'stream' events | |
// when using audio streaming | |
ss(client).on('stream', function(stream, data) { | |
// get the name of the stream | |
const filename = path.basename(data.name); | |
// pipe the filename to the stream | |
stream.pipe(fs.createWriteStream(filename)); | |
// make a detectIntStream call | |
detectIntentStream(stream, function(results){ | |
console.log(results); | |
client.emit('results', results); | |
}); | |
}); | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
navigator.getUserMedia({ | |
audio: true | |
}, function(stream) { | |
recordAudio = RecordRTC(stream, { | |
type: 'audio', | |
mimeType: 'audio/webm', | |
sampleRate: 44100, | |
desiredSampRate: 16000, | |
recorderType: StereoAudioRecorder, | |
numberOfAudioChannels: 1, | |
//1) | |
// get intervals based blobs | |
// value in milliseconds | |
// as you might not want to make detect calls every seconds | |
timeSlice: 4000, | |
//2) | |
// as soon as the stream is available | |
ondataavailable: function(blob) { | |
// 3 | |
// making use of socket.io-stream for bi-directional | |
// streaming, create a stream | |
var stream = ss.createStream(); | |
// stream directly to server | |
// it will be temp. stored locally | |
ss(socket).emit('stream', stream, { | |
name: 'stream.wav', | |
size: blob.size | |
}); | |
// pipe the audio blob to the read stream | |
ss.createBlobReadStream(blob).pipe(stream); | |
} | |
}); | |
}, function(error) { | |
console.error(JSON.stringify(error)); | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
this.speechClient = new speech.SpeechClient(); | |
// Create the initial request object | |
// When streaming, this is the first call you will | |
// make, a request without the audio stream | |
// which prepares Dialogflow in receiving audio | |
// with a certain sampleRateHerz, encoding and languageCode | |
// this needs to be in line with the audio settings | |
// that are set in the client | |
this.request = { | |
config: { | |
sampleRateHertz: 16000, | |
encoding: 'LINEAR16', | |
languageCode: languageCode | |
}, | |
interimResults: true | |
} | |
async function transcribeAudio(audio){ | |
this.request.audio = { | |
content: audio | |
}; | |
const responses = await this.speechClient.recognize(this.request); | |
return responses; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
this.ttsClient = new textToSpeech.TextToSpeechClient(); | |
// Construct the request | |
this.request = { | |
// Select the language and SSML Voice Gender (optional) | |
voice: { | |
languageCode: 'en-US', | |
ssmlGender: 'NEUTRAL' | |
}, | |
audioConfig: { | |
audioEncoding: 'LINEAR16', //'LINEAR16|MP3|AUDIO_ENCODING_UNSPECIFIED/OGG_OPUS' | |
} | |
}; | |
async function textToAudioBuffer(text) { | |
this.request.input = { text: text }; // text or SSML | |
// Performs the Text-to-Speech request | |
const response = await this.ttsClient.synthesizeSpeech(this.request); | |
return response[0].audioContent; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment