Skip to content

Instantly share code, notes, and snippets.

@hrishioa
Created July 25, 2023 15:02
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hrishioa/0aa300edbf262116a7ea1bb550ebff3d to your computer and use it in GitHub Desktop.
Save hrishioa/0aa300edbf262116a7ea1bb550ebff3d to your computer and use it in GitHub Desktop.
Script for combining the output of diart and whisper into a single Speaker-labelled SRT File
// Hrishi Olickel
// https://olickel.com
// 25 July 2023
const fs = require('fs');
const readline = require('readline');
const speakers = {}; // Hold start time and speaker
const speakerNames = ['Hrishi', 'Nick']; // Speaker names array
const rttmFilename = 'podcastPart1.rttm';
const srtFilename = 'podcastPart1.srt';
const outputSrtFilename = 'podcastPart1Annotated.srt';
const readRTTM = readline.createInterface({
input: fs.createReadStream(rttmFilename),
output: process.stdout,
terminal: false
});
readRTTM.on('line', function(line) {
console.log('Reading RTTM line: ', line);
let lineParts = line.split(' ');
let timeStart = parseFloat(lineParts[3]);
let timeEnd = timeStart + parseFloat(lineParts[4]);
let speaker = speakerNames[parseInt(lineParts[7].replace('speaker', ''))];
speakers[timeStart.toFixed(3)] = speaker;
console.log('Mapped speaker: ', speaker, ' to time start: ', timeStart.toFixed(3));
});
readRTTM.on('close', function() {
console.log('Finished reading RTTM file');
const readSRT = readline.createInterface({
input: fs.createReadStream(srtFilename),
output: process.stdout,
terminal: false
});
let writeStream = fs.createWriteStream(outputSrtFilename);
let currentSpeaker = '';
readSRT.on('line', function(line) {
console.log('Reading SRT line: ', line);
let match = line.match(/(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})/);
if (match) {
let timeStart = timeToSeconds(match[1]);
console.log('Matched SRT timestamp, time start: ', timeStart);
currentSpeaker = findSpeaker(timeStart);
console.log('Found speaker: ', currentSpeaker, ' for time start: ', timeStart);
} else if (currentSpeaker && line.trim().length > 0) {
console.log('Inserting speaker name into dialogue: ', currentSpeaker);
line = `${currentSpeaker}: ${line}`;
currentSpeaker = '';
}
console.log('Writing line to merged file: ', line);
writeStream.write(`${line}\n`);
});
readSRT.on('close', function() {
console.log('Finished reading SRT file');
});
});
function timeToSeconds(time) {
let parts = time.split(':');
console.log('Converting time to seconds: ', time);
return parseFloat(parts[0]) * 3600 + parseFloat(parts[1]) * 60 + parseFloat(parts[2].replace(',', '.'));
}
function findSpeaker(timeStart) {
console.log('Searching for speaker for time start: ', timeStart);
let keys = Object.keys(speakers).map(k => parseFloat(k));
for (let i = 0; i < keys.length; i++) {
if (keys[i] > timeStart) {
if (keys[i - 1] !== undefined) {
console.log('Found speaker: ', speakers[keys[i - 1].toFixed(3)], ' for time start: ', timeStart);
return speakers[keys[i - 1].toFixed(3)];
}
}
}
console.log('Speaker not found for time start: ', timeStart);
return undefined;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment