Skip to content

Instantly share code, notes, and snippets.

@jessvb
Last active April 8, 2023 10:03
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jessvb/a9e861ffc6512c4e0ca0c23d8a80bb44 to your computer and use it in GitHub Desktop.
Save jessvb/a9e861ffc6512c4e0ca0c23d8a80bb44 to your computer and use it in GitHub Desktop.
Processes Zoom transcriptions or .vtt transcripts (e.g., for thematic analysis) by removing timestamps and 'user avatar' labels, and replacing actual names with codenames (to deidentify the data).

How to use this file to process your Zoom transcription

  1. Copy process_zoom_transcript.js to your computer.
  2. Copy (e.g., from the Zoom web browser auto-scroll feature) your transcript into a text file called transcript.txt (case sensitive) in the same location where process_zoom_transcript.js is saved. (Note: DON'T use the official .vtt file.)
  3. Edit process_zoom_transcript.js to contain names and name-replacements in the namesAndReplacements variable. For example, if "Ada Lovelace" is in your transcript, update the variable like so:
const namesAndReplacements = [{
        name: 'Ada Lovelace',
        replacement: 'Whatever Codename You Want'
    },
    {
        name: 'Name 2',
        replacement: 'Codename 2'
    }
];
  1. Boot up a terminal and run the command, node process_zoom_transcript.js. (Note, you must have Node.js installed.)
  2. Your newly processed transcript should now be in a file called newTranscript.txt! 🎉
/**
* Use this to get a list of attendees' names based on who sent messages in the
* Zoom chat. Change the value of `chatFilename` to be the .txt file containing
* the Zoom chat messages.
*/
const fs = require('fs');
const chatFilename = 'chat.txt';
const namesSummaryFilename = 'names_from_' + chatFilename;
function getFirstLines(txt, numLines) {
firstFewLines = txt.split('\n', numLines);
return firstFewLines.join('\n');
}
console.log('Let\'s find the names of everyone in the meeting!\n');
fs.readFile(chatFilename, 'utf8', (err, origChat) => {
if (err) {
console.log('Error reading file: ' + err);
} else {
console.log('---------------------------\n' +
'Data incoming! Here\'s the first ten lines or so:');
console.log(getFirstLines(origChat, 10));
console.log('\n---------------------------\n' +
'And here are the names captured from the entire chat:\n');
namesDict = {};
namesList = [];
nameRegex = /(?::\d\d\t)(?<name>.*?)(?::)/g;
namesIter = origChat.matchAll(nameRegex);
for(let nameGroup of namesIter) {
zoomName = nameGroup.groups.name;
console.log(zoomName);
if (!namesDict[zoomName]) {
namesDict[zoomName] = zoomName;
namesList.push(zoomName);
}
}
console.log('Names Dictionary:');
console.log(namesDict);
// Export to file:
fs.writeFile(namesSummaryFilename, namesList.join('\n'), () => {
console.log('---------------------------\n' +
'Saved to file, ' + namesSummaryFilename + '.');
});
}
});
/**
* Use this to get a list of attendees' names from the Zoom transcript. Note: In
* this case, you SHOULD use the .vtt file. (Unfortunately I coded the other
* `process_zoom_transcript.js` file differently from this file— Oops ;P )
* Change the value of `transcriptFilename` to be the name of your .vtt file.
*/
const fs = require('fs');
const transcriptFilename = 'transcript.vtt';
const namesSummaryFilename = 'names_from_' + transcriptFilename.split('.')[0] + '.txt';
function getFirstLines(txt, numLines) {
firstFewLines = txt.split('\n', numLines);
return firstFewLines.join('\n');
}
console.log('Let\'s find the names of everyone in the meeting!\n');
fs.readFile(transcriptFilename, 'utf8', (err, origTranscript) => {
if (err) {
console.log('Error reading file: ' + err);
} else {
console.log('---------------------------\n' +
'Data incoming! Here\'s the first ten lines or so:');
console.log(getFirstLines(origTranscript, 10));
console.log('\n---------------------------\n' +
'And here are the names captured from the entire transcript:\n');
namesDict = {};
namesList = [];
nameRegex = /(?:\.\d\d\d\r\n)(?<name>.*?)(?::)/g;
namesIter = origTranscript.matchAll(nameRegex);
for(let nameGroup of namesIter) {
zoomName = nameGroup.groups.name;
if (!namesDict[zoomName]) {
namesDict[zoomName] = zoomName;
namesList.push(zoomName);
}
}
console.log('Names Dictionary:');
console.log(namesDict);
// Export to file:
fs.writeFile(namesSummaryFilename, namesList.join('\n'), () => {
console.log('---------------------------\n' +
'Saved to file, ' + namesSummaryFilename + '.');
});
}
});
/**
* Use this to clean your Zoom transcript and anonymize given names. Note that you should
* select and copy the Zoom transcript from online instead of downloading the official
* transcript (i.e., DON'T use the official .vtt file).
* Change the value of `namesAndReplacements` with the names of attendees and
* what you would like them to be replaced with (e.g., "Participant 1234").
*/
const fs = require('fs');
const transcriptFilename = 'transcript.txt';
const newTranscriptFilename = 'newTranscript.txt';
const namesAndReplacements = [{
name: 'Name 1',
replacement: 'Codename 1'
},
{
name: 'Name 2',
replacement: 'Codename 2'
}
];
function getFirstLines(txt, numLines) {
firstFewLines = txt.split('\n', numLines);
return firstFewLines.join('\n');
}
console.log('Let\'s get rid of those pesky time stamps and user avatars!\n');
fs.readFile(transcriptFilename, 'utf8', (err, origTranscript) => {
if (err) {
console.log('Error reading file: ' + err);
} else {
console.log('---------------------------\n' +
'Data incoming! Here\'s the first ten lines or so:');
console.log(getFirstLines(origTranscript, 10));
console.log('\n---------------------------\n' +
'And here\'s the new output:\n');
// Replace the time stamps with spaces:
let newTranscript = origTranscript.replace(/\n\d\d:\d\d:\d\d\n/g, ' ');
// Replace the words, "user avatar" with a newline:
newTranscript = newTranscript.replace(/user avatar/g, '\n');
// Replace all names with codenames plus a colon:
namesAndReplacements.forEach((nameAndRepl) => {
regex = new RegExp(nameAndRepl.name, 'g');
newTranscript = newTranscript.replace(regex, nameAndRepl.replacement + ":");
});
console.log(getFirstLines(newTranscript, 10) + '\n');
// Export to file:
fs.writeFile(newTranscriptFilename, newTranscript, () => {
console.log('---------------------------\n' +
'Saved to file, ' + newTranscriptFilename + '.')
});
}
});
/**
* Use this to get rid of timestamps from a .vtt file. See regexr.com to test
* out other regular expressions, if it isn't working.
*
* Usage:
* Change the value of `transcriptFilename` to be the name of your .vtt file.
* To run, use `node remove-timestams-vtt.js` in the terminal.
*/
const fs = require('fs');
const transcriptFilename = 'Transcript.vtt';
const newTranscriptFilename = 'NewTranscript.txt';
function getFirstLines(txt, numLines) {
firstFewLines = txt.split('\n', numLines);
return firstFewLines.join('\n');
}
console.log('Let\'s get rid of those pesky time stamps!\n');
fs.readFile(transcriptFilename, 'utf8', (err, origTranscript) => {
if (err) {
console.log('Error reading file: ' + err);
} else {
console.log('---------------------------\n' +
'Data incoming! Here\'s the first ten lines or so:');
console.log(getFirstLines(origTranscript, 10));
console.log('\n---------------------------\n' +
'And here\'s the new output:\n');
// Replace the time stamps with spaces:
let newTranscript = origTranscript.replace(/\n\d\d:\d\d:\d\d.\d\d\d --> \d\d:\d\d:\d\d.\d\d\d\n- /g, ' ');
// Remove the **first** time stamp too:
newTranscript = newTranscript.replace(/\d\d:\d\d:\d\d.\d\d\d --> \d\d:\d\d:\d\d.\d\d\d\n- /g, ' ');
console.log(getFirstLines(newTranscript, 10) + '\n');
// Optional: Remove all newlines
newTranscript = newTranscript.replace(/\n/g, '');
// Export to file:
fs.writeFile(newTranscriptFilename, newTranscript, () => {
console.log('---------------------------\n' +
'Saved to file, ' + newTranscriptFilename + '.')
});
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment