Last active
June 1, 2024 15:36
-
-
Save Overemployed/6c9e9a7ac0efc0324adda1d047e925dc to your computer and use it in GitHub Desktop.
Listen for a live recording and transcribe the file. Send a notification when YOUR_NAME is called
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const pathParser = require("path"); | |
const { MODEL_PATH, HOME, YOUR_NAME, FILE_GLOB } = process.env; | |
console.log(MODEL_PATH, HOME, YOUR_NAME, FILE_GLOB); | |
const chokidar = require('chokidar'); | |
const vosk = require('vosk'); | |
const wav = require("wav"); | |
const { Readable } = require("stream"); | |
const { notify } = require('node-notifier'); | |
const TailingReadableStream = require('tailing-stream'); | |
const recordingsDir = `${HOME}/recordings/` | |
const model = new vosk.Model(MODEL_PATH) | |
const listenPath = `${recordingsDir}${FILE_GLOB}` | |
console.log('listenPath', listenPath) | |
const watcher = chokidar.watch(listenPath, { persistent: true }); | |
// const Speaker = require('speaker'); | |
const audioStreams = {} | |
let startup = true; | |
watcher | |
.on('add', async (path) => { | |
if (startup) console.log('skipping due to startup ') | |
if (startup) return | |
const file = pathParser.parse(path) | |
// expects files to be named with J prefix eg. J2-blah-blah.wav | |
const [jHost] = file.name.split('-') | |
console.log(`New recording ${jHost} ${file.base}`) | |
const wfReader = new wav.Reader(); | |
const wfReadable = new Readable().wrap(wfReader); | |
console.log(`about to setup ${jHost} ${file.base}`) | |
wfReader.on('format', async (format) => { | |
const { audioFormat, sampleRate, channels } = format | |
const rec = new vosk.Recognizer({ model: model, sampleRate: sampleRate }); | |
// const speaker = new Speaker(format) | |
if (audioFormat != 1 || channels != 1) { | |
console.error("Audio file must be WAV format mono PCM."); | |
return | |
} | |
for await (const data of wfReadable) { | |
const end_of_speech = await rec.acceptWaveformAsync(data) | |
if (end_of_speech) { | |
const { partial } = rec.partialResult() | |
console.log(partial); | |
rec.reset() | |
if (partial && partial.includes(YOUR_NAME)) { | |
notify({ | |
message: `${jHost} said your name`, | |
sound: 'Glass' | |
}) | |
// hear the output | |
// wfReader.pipe(speaker) | |
} | |
} | |
} | |
rec.free(); | |
}); | |
// nodejs fs module will stop when it gets to EOF, so we use TailingReadableStream to follow the tail of the file | |
const stream = TailingReadableStream.createReadStream(path, { timeout: 0, highWaterMark: 4096 }); | |
stream.pipe(wfReader) | |
audioStreams[path] = stream | |
}) | |
.on('change', async path => { | |
// file changed, likey finished with the file | |
if (audioStreams[path]) audioStreams[path].destroy() | |
}) | |
setTimeout(() => { | |
startup = false | |
}, 5000) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This will listen for changes in your recordings directory and transcribe wav files as they are being written to. Listens for YOUR_NAME and pops up with an alert of which Job said your name.
Install dependencies:
You'll also need to save the model to the model directory in the same directory as trasnscribe.js. Find the models here
Record audio using timemachine. I have a custom build using PCM 16 bit audio as the output which would be required for this to work properly.
Files require a particular format to identify which job is saying your name.
Run like this