Created
August 14, 2023 16:44
-
-
Save tayiorbeii/9d8f5f9b1d0abc769cb82049584b5f46 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import "@johnlindquist/kit" | |
// Name: diarize-srt-john | |
let files = await drop() | |
let srtFile = files.filter(x => x.name.includes('.srt'))[0] | |
let diarizationFile = files.filter(x => x.name.includes('diarization'))[0] | |
let commonName = diarizationFile.name.replace('.srt', '') | |
await cwd(srtFile.path.replace(srtFile.name, "")) | |
let contents = await readFile(diarizationFile.path, "utf-8") | |
let lines = contents.split("\n") | |
let srtContents = await readFile(srtFile.path, "utf-8") | |
let srtLines = srtContents.split("\n\n") | |
// Step 1. Create blocks of speaker times | |
let sections = [] | |
let timestampToNumber = ts => | |
parseInt(ts.replaceAll(/\D/g, ""), 10) | |
let prevSpeaker = "" | |
let prevStartAsNumber = 0 | |
let prevStopAsNumber = 0 | |
lines.forEach((line, i) => { | |
// [ 00:03:01.752 --> 00:03:47.230] CW SPEAKER_01 | |
let [, start, stop, speaker] = line.match( | |
/\s(\d.+\d)\s.+\s(\d.+\d)\].+(SPEAKER.+)/ | |
) | |
speaker = speaker.endsWith("0") ? "Speaker_0" : "Speaker_1" | |
let startAsNumber = timestampToNumber(start) | |
let stopAsNumber = timestampToNumber(stop) | |
let duration = stopAsNumber - startAsNumber | |
// if (duration < 500 || startAsNumber < prevStopAsNumber) { | |
// return | |
// } | |
let prevSection = sections.at(-1) | |
if ( | |
speaker === prevSpeaker && | |
startAsNumber > prevSection?.stopAsNumber | |
) { | |
prevSection.stop = stop | |
prevSection.stopAsNumber = stopAsNumber | |
prevSection.duration = duration | |
} else { | |
if (prevSection) { | |
prevSection.stopAsNumber = startAsNumber | |
} | |
sections.push({ | |
speaker, | |
duration, | |
start, | |
stop, | |
startAsNumber, | |
stopAsNumber, | |
words: "", | |
}) | |
} | |
prevSpeaker = speaker | |
prevStartAsNumber = startAsNumber | |
prevStopAsNumber = stopAsNumber | |
}) | |
sections.forEach(s => | |
log({ | |
start: s.start, | |
stop: s.stop, | |
}) | |
) | |
// Step 2. Parse words into their timestamps | |
let wordRanges = [] | |
// 857 | |
// 00:06:17,520 --> 00:06:18,400 | |
// Neo4j, | |
srtLines.forEach(line => { | |
let [, start, stop, word] = line.match( | |
/.*\n(\d.*\d)\s.*\s(\d.*)\n(.*)/ | |
) | |
let startAsNumber = timestampToNumber(start) | |
let stopAsNumber = timestampToNumber(stop) | |
wordRanges.push({ | |
word, | |
start, | |
stop, | |
startAsNumber, | |
stopAsNumber, | |
}) | |
}) | |
// Step 3. Spread words into speaker blocks | |
let sectionIndex = 0 | |
wordRanges.forEach(range => { | |
let section = sections.at(sectionIndex) | |
let nextSection = sections.at(sectionIndex + 1) | |
while (section.duration < 1000 && nextSection) { | |
sectionIndex++ | |
section = sections.at(sectionIndex) | |
} | |
while ( | |
nextSection?.startAsNumber && | |
range.stopAsNumber > nextSection.startAsNumber && | |
range?.word?.trim().match(/^[A-Z]/) && | |
!range?.word?.match(/[\.\?!]$/) | |
) { | |
sectionIndex++ | |
section = sections.at(sectionIndex) | |
nextSection = sections.at(sectionIndex + 1) | |
} | |
section.words += range.word | |
}) | |
let markdown = sections | |
.map(section => { | |
return `## ${section.speaker} ${section.start} - ${ | |
section.stop | |
} | |
${section.words.trim()}` | |
}) | |
.join("\n\n") | |
debugger; | |
await writeFile(`${commonName}.md`, markdown) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment