Skip to content

Instantly share code, notes, and snippets.

@hughrawlinson
Created October 23, 2023 16:33
Show Gist options
  • Save hughrawlinson/df9f6d8f2f543a78433dd4381203c3ce to your computer and use it in GitHub Desktop.
Save hughrawlinson/df9f6d8f2f543a78433dd4381203c3ce to your computer and use it in GitHub Desktop.
export * from "./Recorder";
export * from "./RecordingState";
export * from "./VoiceRecorderEventTarget";
export class MediaRecorderWithStorage {
#mediaRecorder: MediaRecorder;
#blobs: Blob[] = [];
#blobOptions?: BlobPropertyBag;
constructor(
stream: MediaStream,
mediaRecorderOptions: MediaRecorderOptions,
blobOptions?: BlobPropertyBag
) {
this.#mediaRecorder = new MediaRecorder(stream, mediaRecorderOptions);
this.#blobOptions = blobOptions;
this.#mediaRecorder.addEventListener("dataavailable", e => {
this.#blobs.push(e.data);
});
// this.#mediaRecorder.addEventListener("error", event => {
// console.log(event);
// });
this.#mediaRecorder.start(1000);
}
stop(recordingHandler?: (recording: Blob) => void) {
if (recordingHandler) {
if (this.#mediaRecorder.state === "inactive") {
recordingHandler(new Blob(this.#blobs, this.#blobOptions));
}
this.#mediaRecorder.addEventListener("stop", () => {
recordingHandler(new Blob(this.#blobs, this.#blobOptions));
});
}
this.#mediaRecorder.stop();
}
}
import { MediaRecorderWithStorage } from "./MediaRecorderWithStorage";
export class PreRecorder {
#holdRecording = false;
#stream: MediaStream;
#mediaRecorders: MediaRecorderWithStorage[];
#mediaRecorderInitOptions: MediaRecorderOptions;
#blobOptions: BlobPropertyBag | undefined;
#recordingHandler: (blob: Blob) => void;
constructor(
flipTime: number,
stream: MediaStream,
onRecordingAvailable: (blob: Blob) => void,
mediaRecorderOptions: MediaRecorderOptions,
blobOptions?: BlobPropertyBag
) {
this.#stream = stream;
this.#mediaRecorderInitOptions = mediaRecorderOptions;
this.#blobOptions = blobOptions;
this.#mediaRecorders = [
new MediaRecorderWithStorage(
stream,
this.#mediaRecorderInitOptions,
this.#blobOptions
),
new MediaRecorderWithStorage(
stream,
this.#mediaRecorderInitOptions,
this.#blobOptions
),
];
this.#recordingHandler = onRecordingAvailable;
setInterval(() => {
this.#flip();
}, flipTime);
}
hold = () => {
this.#holdRecording = true;
};
release = () => {
this.#mediaRecorders[0].stop(recording => {
this.#holdRecording = false;
this.#recordingHandler(recording);
});
};
#flip = () => {
if (this.#holdRecording) {
return;
}
const oldRecorder = this.#mediaRecorders.shift();
oldRecorder?.stop();
this.#mediaRecorders.push(
new MediaRecorderWithStorage(
this.#stream,
this.#mediaRecorderInitOptions,
this.#blobOptions
)
);
};
}
import { PreRecorder } from "./PreRecorder";
import { RecordingState } from "./RecordingState";
import {
typedVoiceRecorderEventTarget,
ErrorEvent,
RecordingStartedEvent,
RecordingCompleteEvent,
} from "./VoiceRecorderEventTarget";
import vad from "./voice-activity-detector";
const OGG = "audio/ogg";
const WEBM = "audio/webm; codecs=opus";
const PCM = "audio/webm; codecs=pcm";
const UNSUPPORTED = "UNSUPPORTED";
const AUDIO_CODEC = MediaRecorder.isTypeSupported(OGG)
? OGG
: MediaRecorder.isTypeSupported(WEBM)
? WEBM
: MediaRecorder.isTypeSupported(PCM)
? PCM
: UNSUPPORTED;
// const CHUNK_SIZE = 500;
const FLIP_TIME = 500;
export class VoiceRecorder extends typedVoiceRecorderEventTarget {
recording: RecordingState = RecordingState.new();
#debounceTime = 1;
#audioContext: AudioContext;
#vadControl: ReturnType<typeof vad> | undefined;
#mediaStream: MediaStream | undefined;
#preRecorder: PreRecorder | undefined;
#starts: number[] = [];
#stops: number[] = [];
// TODO: Allow device selection
#devices: MediaDeviceInfo[];
/**
* Prepare an voice activated audio recorder.
*
* If you are in an environment that requires user permission for microphone access, you must call `requestMicrophonePermission()`
*
* You must call `start()` to start recieving events
*
* @param debounceTime margin between end of voice activity and end of recording, in seconds
* @param audioContext provide an audio context. If you don't, one will be created, but then you won't be able to provide a source
*/
constructor(debounceTime?: number, audioContext?: AudioContext) {
super();
if (AUDIO_CODEC === UNSUPPORTED) {
throw new Error(
"MediaRecorder doesn't support any of the accepted audio codecs in this environment"
);
}
this.#debounceTime = debounceTime ?? this.#debounceTime;
this.#audioContext = audioContext ?? new AudioContext();
this.#devices = [];
}
/**
* Must get user permsision to access microphone
*/
requestMicrophonePermission = async () => {
try {
this.#mediaStream = await navigator.mediaDevices.getUserMedia({
audio: true,
});
this.#devices = await navigator.mediaDevices.enumerateDevices();
this.#preRecorder = new PreRecorder(
FLIP_TIME,
this.#mediaStream,
recording => {
if (RecordingState.isRecording(this.recording)) {
this.dispatchEvent(
new RecordingCompleteEvent(this.recording.startTime, recording)
);
this.recording = RecordingState.stopRecording(this.recording);
}
},
{ mimeType: AUDIO_CODEC },
{ type: AUDIO_CODEC }
);
this.#audioContext.resume();
return;
} catch (error) {
this.dispatchEvent(new ErrorEvent(error));
}
};
/**
* In environments that restrict audio contexts from running without user
* input, this function should be called in a user input handler like onClick.
*
* Otherwise, it can be called in non-handler code.
*/
start = async (source?: MediaStream) => {
// const stream =
// source || (await navigator.mediaDevices.getUserMedia({ audio: true }));
this.#vadControl = vad(this.#audioContext, this.#mediaStream, {
onVoiceStart: this.#onVoiceStart,
onVoiceStop: this.#onVoiceStop,
});
this.#vadControl.enable();
};
/**
* Clean up all associated data
*/
destroy = () => {
this.#vadControl?.destroy();
this.#audioContext.close();
};
#onVoiceStart = () => {
this.#starts.push(this.#audioContext.currentTime);
if (!this.#vadControl) {
// We're haven't got prerequisites set up and can't start a recording
return;
}
if (RecordingState.isRecording(this.recording)) {
// A recording is already running, we can't start a new one
return;
}
this.#startRecording();
};
#onVoiceStop = () => {
const stopTime = this.#audioContext.currentTime;
this.#stops.push(stopTime);
setTimeout(() => {
const latestStart = this.#starts.at(-1);
if (!latestStart) {
// We've stopped before we've started
return;
}
if (RecordingState.isStopped(this.recording)) {
// We're not currently recording so we can't complete a recording
return;
}
if (!this.#vadControl) {
// We haven't set up our voice activity detector yet
return;
}
if (latestStart >= stopTime) {
// We've had a voice onset since the stop was triggered, so we shouldn't
// stop the recording
return;
}
this.#stopRecording();
}, this.#debounceTime * 1000);
};
#startRecording = () => {
if (!this.#mediaStream) {
throw new Error("Media stream is gone, can no longer record");
}
if (RecordingState.isStopped(this.recording)) {
this.#preRecorder?.hold();
const recordingStartEvent = new RecordingStartedEvent();
this.recording = RecordingState.startRecording(
this.recording,
recordingStartEvent.startTime
);
this.dispatchEvent(recordingStartEvent);
}
};
#stopRecording = () => {
if (!this.#preRecorder) {
throw new Error("There's no running recorder, can't stop a recording");
}
if (RecordingState.isRecording(this.recording)) {
this.#preRecorder.release();
}
};
}
interface Recording {
state: "recording";
startTime: number;
}
interface NotRecording {
state: "not-recording";
}
export type RecordingState = Recording | NotRecording;
export const RecordingState = {
new: (): NotRecording => {
return { state: "not-recording" };
},
startRecording: (
recordingState: NotRecording,
startTime: number
): Recording => {
if (!RecordingState.isStopped(recordingState)) {
throw new Error("Could not start recording, recording already running");
}
return {
state: "recording",
startTime: startTime ?? Date.now(),
};
},
stopRecording: (recordingState: Recording): NotRecording => {
if (!RecordingState.isRecording(recordingState)) {
throw new Error("Could not stop recording, recording not running");
}
return RecordingState.new();
},
isRecording: (
recordingState: RecordingState
): recordingState is Recording => {
return recordingState.state === "recording";
},
isStopped: (
recordingState: RecordingState
): recordingState is NotRecording => {
return recordingState.state === "not-recording";
},
};
// @ts-nocheck
// https://github.com/wjw12/voice-activity-detector/blob/main/src/voice-activity-detector.js
'use strict';
function clamp(value, min, max) {
return min < max
? (value < min ? min : value > max ? max : value)
: (value < max ? max : value > min ? min : value)
}
function frequencyToIndex(frequency, sampleRate, frequencyBinCount) {
var nyquist = sampleRate / 2
var index = Math.round(frequency / nyquist * frequencyBinCount)
return clamp(index, 0, frequencyBinCount)
}
function fourierAverage(frequencies, minHz, maxHz, sampleRate, binCount) {
var start = frequencyToIndex(minHz, sampleRate, binCount)
var end = frequencyToIndex(maxHz, sampleRate, binCount)
var count = end - start
var sum = 0
for (; start < end; start++) {
sum += frequencies[start] / 255.0
}
return count === 0 ? 0 : (sum / count)
}
const vad = function(audioContext, stream, opts) {
opts = opts || {};
var defaults = {
fftSize: 512,
bufferLen: 512,
smoothingTimeConstant: 0.8,
minCaptureFreq: 85, // in Hz
maxCaptureFreq: 1000, // in Hz
noiseCaptureDuration: 1000, // in ms
minNoiseLevel: 0.3, // from 0 to 1
maxNoiseLevel: 0.7, // from 0 to 1
avgNoiseMultiplier: 1.1,
onVoiceStart: function() {
},
onVoiceStop: function() {
},
onUpdate: function(val) {
}
};
var options = {};
for (var key in defaults) {
options[key] = opts.hasOwnProperty(key) ? opts[key] : defaults[key];
}
var baseLevel = 0;
var voiceScale = 1;
var activityCounter = 0;
var activityCounterMin = 0;
var activityCounterMax = 30;
var activityCounterThresh = 5;
var envFreqRange = [];
var isNoiseCapturing = true;
var prevVadState = undefined;
var vadState = false;
var captureTimeout = null;
var source = audioContext.createMediaStreamSource(stream);
var analyser = audioContext.createAnalyser();
analyser.smoothingTimeConstant = options.smoothingTimeConstant;
analyser.fftSize = options.fftSize;
var frequencies = new Uint8Array(analyser.frequencyBinCount);
connect();
var raf = null;
function processVAD() {
analyser.getByteFrequencyData(frequencies);
var average = fourierAverage(frequencies, options.minCaptureFreq, options.maxCaptureFreq, analyser.context.sampleRate, analyser.frequencyBinCount);
if (isNoiseCapturing) {
envFreqRange.push(average);
raf = requestAnimationFrame(processVAD);
return;
}
if (average >= baseLevel && activityCounter < activityCounterMax) {
activityCounter++;
} else if (average < baseLevel && activityCounter > activityCounterMin) {
activityCounter--;
}
vadState = activityCounter > activityCounterThresh;
if (prevVadState !== vadState) {
vadState ? onVoiceStart() : onVoiceStop();
prevVadState = vadState;
}
options.onUpdate(Math.max(0, average - baseLevel) / voiceScale);
raf = requestAnimationFrame(processVAD);
}
if (isNoiseCapturing) {
console.log('VAD: start noise capturing');
captureTimeout = setTimeout(init, options.noiseCaptureDuration);
}
function init() {
console.log('VAD: stop noise capturing');
isNoiseCapturing = false;
captureTimeout = null;
envFreqRange = envFreqRange.filter(function(val) {
return val;
}).sort();
var averageEnvFreq = envFreqRange.length ? envFreqRange.reduce(function (p, c) { return Math.min(p, c) }, 1) : (options.minNoiseLevel || 0.1);
baseLevel = averageEnvFreq * options.avgNoiseMultiplier;
if (options.minNoiseLevel && baseLevel < options.minNoiseLevel) baseLevel = options.minNoiseLevel;
if (options.maxNoiseLevel && baseLevel > options.maxNoiseLevel) baseLevel = options.maxNoiseLevel;
voiceScale = 1 - baseLevel;
console.log('VAD: base level:', baseLevel);
}
function connect() {
source.connect(analyser);
}
function disconnect() {
analyser.disconnect();
source.disconnect();
}
function destroy() {
captureTimeout && clearTimeout(captureTimeout);
disconnect();
}
function enable() {
!raf && processVAD();
}
function disable() {
raf && cancelAnimationFrame(raf);
captureTimeout && clearTimeout(captureTimeout);
envFreqRange = [];
}
function onVoiceStart() {
options.onVoiceStart();
}
function onVoiceStop() {
options.onVoiceStop();
}
return {enable: enable, disable: disable, destroy: destroy};
};
export default vad;
export class ErrorEvent extends Event {
error;
constructor(error: unknown) {
super("error");
this.error = error;
}
}
export class RecordingStartedEvent extends Event {
startTime: number;
constructor() {
super("recordingStarted");
this.startTime = Date.now();
}
}
export class RecordingCompleteEvent extends Event {
recording: Blob;
startTime: number;
endTime: number;
constructor(startTime: number, file: Blob) {
super("recordingComplete");
this.startTime = startTime;
this.endTime = Date.now();
this.recording = file;
}
}
interface VoiceRecorderEvent {
recordingStarted: RecordingStartedEvent;
recordingComplete: RecordingCompleteEvent;
}
type VoiceRecorderEventHandler<K extends keyof VoiceRecorderEvent> = (
event: VoiceRecorderEvent[K]
) => void;
interface VoiceRecorderEventTarget extends EventTarget {
addEventListener<K extends keyof VoiceRecorderEvent>(
type: K,
callback: VoiceRecorderEventHandler<K> | null,
options?: boolean | AddEventListenerOptions | undefined
): void;
addEventListener(
type: string,
callback: EventListenerOrEventListenerObject | null,
options?: EventListenerOptions | boolean
): void;
}
// https://dev.to/43081j/strongly-typed-event-emitters-using-eventtarget-in-typescript-3658
export const typedVoiceRecorderEventTarget = EventTarget as {
new (): VoiceRecorderEventTarget;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment