Skip to content

Instantly share code, notes, and snippets.

@tayiorbeii
Created August 14, 2023 16:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tayiorbeii/9d8f5f9b1d0abc769cb82049584b5f46 to your computer and use it in GitHub Desktop.
Save tayiorbeii/9d8f5f9b1d0abc769cb82049584b5f46 to your computer and use it in GitHub Desktop.
import "@johnlindquist/kit"
// Name: diarize-srt-john
let files = await drop()
let srtFile = files.filter(x => x.name.includes('.srt'))[0]
let diarizationFile = files.filter(x => x.name.includes('diarization'))[0]
let commonName = diarizationFile.name.replace('.srt', '')
await cwd(srtFile.path.replace(srtFile.name, ""))
let contents = await readFile(diarizationFile.path, "utf-8")
let lines = contents.split("\n")
let srtContents = await readFile(srtFile.path, "utf-8")
let srtLines = srtContents.split("\n\n")
// Step 1. Create blocks of speaker times
let sections = []
let timestampToNumber = ts =>
parseInt(ts.replaceAll(/\D/g, ""), 10)
let prevSpeaker = ""
let prevStartAsNumber = 0
let prevStopAsNumber = 0
lines.forEach((line, i) => {
// [ 00:03:01.752 --> 00:03:47.230] CW SPEAKER_01
let [, start, stop, speaker] = line.match(
/\s(\d.+\d)\s.+\s(\d.+\d)\].+(SPEAKER.+)/
)
speaker = speaker.endsWith("0") ? "Speaker_0" : "Speaker_1"
let startAsNumber = timestampToNumber(start)
let stopAsNumber = timestampToNumber(stop)
let duration = stopAsNumber - startAsNumber
// if (duration < 500 || startAsNumber < prevStopAsNumber) {
// return
// }
let prevSection = sections.at(-1)
if (
speaker === prevSpeaker &&
startAsNumber > prevSection?.stopAsNumber
) {
prevSection.stop = stop
prevSection.stopAsNumber = stopAsNumber
prevSection.duration = duration
} else {
if (prevSection) {
prevSection.stopAsNumber = startAsNumber
}
sections.push({
speaker,
duration,
start,
stop,
startAsNumber,
stopAsNumber,
words: "",
})
}
prevSpeaker = speaker
prevStartAsNumber = startAsNumber
prevStopAsNumber = stopAsNumber
})
sections.forEach(s =>
log({
start: s.start,
stop: s.stop,
})
)
// Step 2. Parse words into their timestamps
let wordRanges = []
// 857
// 00:06:17,520 --> 00:06:18,400
// Neo4j,
srtLines.forEach(line => {
let [, start, stop, word] = line.match(
/.*\n(\d.*\d)\s.*\s(\d.*)\n(.*)/
)
let startAsNumber = timestampToNumber(start)
let stopAsNumber = timestampToNumber(stop)
wordRanges.push({
word,
start,
stop,
startAsNumber,
stopAsNumber,
})
})
// Step 3. Spread words into speaker blocks
let sectionIndex = 0
wordRanges.forEach(range => {
let section = sections.at(sectionIndex)
let nextSection = sections.at(sectionIndex + 1)
while (section.duration < 1000 && nextSection) {
sectionIndex++
section = sections.at(sectionIndex)
}
while (
nextSection?.startAsNumber &&
range.stopAsNumber > nextSection.startAsNumber &&
range?.word?.trim().match(/^[A-Z]/) &&
!range?.word?.match(/[\.\?!]$/)
) {
sectionIndex++
section = sections.at(sectionIndex)
nextSection = sections.at(sectionIndex + 1)
}
section.words += range.word
})
let markdown = sections
.map(section => {
return `## ${section.speaker} ${section.start} - ${
section.stop
}
${section.words.trim()}`
})
.join("\n\n")
debugger;
await writeFile(`${commonName}.md`, markdown)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment