Last active
May 17, 2021 15:19
-
-
Save golanlevin/4381d340476309efcce24f7b691b09ff to your computer and use it in GitHub Desktop.
Utility for converting transcriptions produced by Zoom's speech-to-text, into WebVTT Caption files suitable for Vimeo.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Utility for converting transcriptions produced by Zoom's speech-to-text, | |
// into WebVTT Caption files suitable for Vimeo. | |
/* | |
Zoom transcription file looks like: | |
17:32:36 Thanks so much. Hi everybody, let me get my screen share going. | |
17:32:47 Right. | |
---------------------- | |
Per https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API | |
VTT output file looks like: | |
00:51.000 --> 01:02.000 | |
Thanks so much. Hi everybody, let me get my screen share going. | |
01:02.000 --> 01:04.000 | |
Right. | |
*/ | |
void setup() { | |
// The filename of the transcription file produced by Zoom. | |
// Note: it contains timestamps in local time, eg. 17:30 (5:30pm). | |
// Should be located in the sketch's 'data' folder. | |
String inputFilename = "kate_compton.txt"; | |
// The number of seconds into the edited video file, where the | |
// actual words start. | |
int offsetSecondsInDestinationFile = 4; | |
String[] inputStrings; | |
inputStrings = loadStrings(inputFilename); | |
int nLines = inputStrings.length; | |
if (nLines > 0) { | |
String outputFilename = "data/"; | |
outputFilename += inputFilename.substring(0, inputFilename.lastIndexOf('.')); | |
outputFilename += ".vtt"; | |
println("Loading: data/" + inputFilename); | |
int initialClockSeconds = -1; | |
String line0 = inputStrings[0]; | |
if (isCaptionLineValid(line0)) { | |
String inputFileClockStartTime = line0.substring(0, line0.indexOf(' ')); | |
initialClockSeconds = getSecondsFromZoomTime(inputFileClockStartTime); | |
} else { | |
println("First line must be a valid input. Exiting with Problem A."); | |
return; | |
} | |
if (initialClockSeconds == -1) { | |
println("First line must be a valid input. Exiting with Problem B."); | |
return; | |
} | |
println("Creating: " + outputFilename); | |
PrintWriter myWriter = createWriter(outputFilename); | |
myWriter.println("WEBVTT"); | |
myWriter.println(""); | |
String prevVttTimeString = ""; | |
String prevCaptionString = ""; | |
int prevClockTimeSeconds = 0; | |
for (int i=0; i<nLines; i++) { | |
String ithLine = inputStrings[i]; | |
if (isCaptionLineValid(ithLine)) { | |
int indexOfFirstSpace = ithLine.indexOf(' '); | |
String ithClockStartTime = ithLine.substring(0, indexOfFirstSpace); | |
String captionText = ithLine.substring(indexOfFirstSpace+1, ithLine.length()); | |
// get the time (across the whole day, in seconds); | |
// compute the destination output (VTT caption file) time. | |
int ithClockTimeSeconds = getSecondsFromZoomTime(ithClockStartTime); | |
if (ithClockTimeSeconds != -1) { | |
ithClockTimeSeconds -= initialClockSeconds; | |
ithClockTimeSeconds += offsetSecondsInDestinationFile; | |
String ithVttTimeString = getVttTimeStringFromSeconds(ithClockTimeSeconds); | |
if (i > 0) { | |
String ithCaptionChunk = ""; | |
ithCaptionChunk += prevVttTimeString + " --> " + ithVttTimeString + "\n"; | |
ithCaptionChunk += prevCaptionString + "\n"; | |
ithCaptionChunk += "\n"; | |
myWriter.print(ithCaptionChunk); | |
} | |
// println(ithClockStartTime + "\t" + ithClockTimeSeconds + "\t" + ithVttTimeString); | |
prevVttTimeString = ithVttTimeString; | |
prevCaptionString = captionText; | |
prevClockTimeSeconds = ithClockTimeSeconds; | |
} | |
} | |
} | |
// last caption line has off-by-one issues. | |
String lastCaptionChunk = ""; | |
String bogusEndTimeString = getVttTimeStringFromSeconds(prevClockTimeSeconds+1); | |
lastCaptionChunk += prevVttTimeString + " --> " + bogusEndTimeString + "\n"; | |
lastCaptionChunk += prevCaptionString + "\n"; | |
lastCaptionChunk += "\n"; | |
myWriter.print(lastCaptionChunk); | |
myWriter.flush(); // Writes the remaining data to the file | |
myWriter.close(); // Finishes the file | |
println("Finished."); | |
exit(); // Stops the program | |
} | |
} | |
//--------------------------------------------- | |
String getVttTimeStringFromSeconds (int secs) { | |
// e.g. 00:10.000 or 00:00:10.000 | |
int s = secs % 60; | |
int m = (secs / 60) % 60; | |
int h = (secs / (60*60)); | |
String out = ""; | |
boolean bIncludeHours = true; | |
if (bIncludeHours) { | |
out = nf(h, 2) + ":"; | |
} | |
out += nf(m, 2) + ":"; | |
out += nf(s, 2) + ".000"; | |
return out; | |
} | |
//--------------------------------------------- | |
boolean isCaptionLineValid(String aLine) { | |
boolean out = false; | |
if (aLine != null) { | |
if (aLine.length() > 5) { | |
if (aLine.indexOf(':') > 0) { | |
out = true; | |
} | |
} | |
} | |
return out; | |
} | |
//--------------------------------------------- | |
int getSecondsFromZoomTime(String zoomTime) { | |
// zoomTime is a string from the Zoom transcript, such as "17:31:50" | |
// offsetSeconds is the number of seconds that the speech starts in the new/extracted video file. | |
// | |
int secs = -1; | |
if (zoomTime != null) { | |
int indexOfFirstColon = zoomTime.indexOf(':'); | |
int indexOfLastColon = zoomTime.lastIndexOf(':'); | |
if ((indexOfFirstColon != -1) && (indexOfLastColon != -1)) { | |
String hStrIn = zoomTime.substring(0, indexOfFirstColon); | |
String mStrIn = zoomTime.substring(indexOfFirstColon+1, indexOfLastColon); | |
String sStrIn = zoomTime.substring(indexOfLastColon+1, zoomTime.length()); | |
int hIn = Integer.parseInt(hStrIn); | |
int mIn = Integer.parseInt(mStrIn); | |
int sIn = Integer.parseInt(sStrIn); | |
secs = sIn + mIn*60 + hIn*60*60; | |
} | |
} | |
return secs; | |
} | |
void draw() { | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment