Skip to content

Instantly share code, notes, and snippets.

@lmcarreiro
Created July 29, 2021 21:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lmcarreiro/db2dbbb5d06c6170ae31434f3bf873e3 to your computer and use it in GitHub Desktop.
Save lmcarreiro/db2dbbb5d06c6170ae31434f3bf873e3 to your computer and use it in GitHub Desktop.
STT+VAD article - useSpeechToText.diff
+ const BUFFER_SECONDS = 2;
export default function useSpeechToText(
speechToTextEnabled: boolean,
muted: boolean,
newMessage: (message: { text: string; isFinal: boolean }) => void,
) {
+ const bufferBlocks = React.useRef<{ duration: number; bytes: ArrayBufferLike }[]>([]);
// ...
- // Control the streaming flag, based on the voice activity detection (that uses hark) and the mute/unmute flag
- React.useEffect(() => {
- if (shouldStream) {
- console.log("Voice activity detected, starting streaming current buffer + live streaming...");
- streamingFlagRef.current = true;
- } else {
- console.log("No voice activity detected, stopped streaming.");
- const timeout = setTimeout(() => {
- streamingFlagRef.current = false;
- }, 2_000);
-
- return () => clearInterval(timeout);
- }
- }, [shouldStream]);
+ // Control the streaming flag, based on the voice activity detection (that uses hark) and the mute/unmute flag
+ React.useEffect(() => {
+ if (shouldStream) {
+ if (!streamingFlagRef.current) {
+ console.log("Voice activity detected, starting streaming current buffer + live streaming...");
+ streamingFlagRef.current = true;
+ }
+ } else {
+ if (!streamingFlagRef.current) return;
+
+ console.log("Stop detecting voice activity, will stop streaming in 2 seconds...");
+
+ const stopStreamingTimer = setTimeout(() => {
+ console.log("Stopped streaming after 2 seconds without voice activity.");
+ streamingFlagRef.current = false;
+ }, 2_000);
+
+ return () => {
+ if (streamingFlagRef.current) {
+ console.log("Voice activity detected, continue streaming...");
+ }
+
+ clearTimeout(stopStreamingTimer);
+ };
+ }
+ }, [shouldStream]);
// ...
const onAudioProcess = (ev: AudioProcessingEvent) => {
const block = {
duration: ev.inputBuffer.duration,
bytes: convertFloat32ToInt16(ev.inputBuffer.getChannelData(0)),
};
- if (streamingFlagRef.current) {
- pushStream.write(block.bytes);
- }
+ // If not streaming, keep the current audio on buffer to be send when start streaming
+ if (!streamingFlagRef.current) {
+ const totalDuration = bufferBlocks.current.reduce((sum, curr) => sum + curr.duration, 0);
+
+ if (totalDuration >= BUFFER_SECONDS) {
+ bufferBlocks.current.shift();
+ }
+
+ bufferBlocks.current.push(block);
+ }
+ // If streaming, send first the current data from the buffer (if there are any), then send the current live streaming
+ else {
+ while (bufferBlocks.current.length) {
+ pushStream.write(bufferBlocks.current.shift()!.bytes);
+ }
+
+ pushStream.write(block.bytes);
+ }
};
// ...
return () => {
console.log("############## stop()");
recognizer.stopContinuousRecognitionAsync();
running.current = false;
+ bufferBlocks.current = [];
processor.removeEventListener("audioprocess", onAudioProcess);
processor.disconnect(output);
input.disconnect(processor);
context.close();
};
// ...
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment