jessvb/.TranscriptProcessing.md

## .TranscriptProcessing.md

      
    Raw
  

              .TranscriptProcessing.md
            
          
    How to use this file to process your Zoom transcription


Copy process_zoom_transcript.js to your computer.
Copy (e.g., from the Zoom web browser auto-scroll feature) your transcript into a text file called transcript.txt (case sensitive) in the same location where process_zoom_transcript.js is saved. (Note: DON'T use the official .vtt file.)
Edit process_zoom_transcript.js to contain names and name-replacements in the namesAndReplacements variable. For example, if "Ada Lovelace" is in your transcript, update the variable like so:

const namesAndReplacements = [{
        name: 'Ada Lovelace',
        replacement: 'Whatever Codename You Want'
    },
    {
        name: 'Name 2',
        replacement: 'Codename 2'
    }
];


Boot up a terminal and run the command, node process_zoom_transcript.js. (Note, you must have Node.js installed.)
Your newly processed transcript should now be in a file called newTranscript.txt! 🎉


## names_list_from_chat.js
/**
 * Use this to get a list of attendees' names based on who sent messages in the
 * Zoom chat. Change the value of `chatFilename` to be the .txt file containing
 * the Zoom chat messages.
 */

const fs = require('fs');

const chatFilename = 'chat.txt';
const namesSummaryFilename = 'names_from_' + chatFilename;

function getFirstLines(txt, numLines) {
    firstFewLines = txt.split('\n', numLines);
    return firstFewLines.join('\n');
}

console.log('Let\'s find the names of everyone in the meeting!\n');

fs.readFile(chatFilename, 'utf8', (err, origChat) => {
    if (err) {
        console.log('Error reading file: ' + err);
    } else {
        console.log('---------------------------\n' +
            'Data incoming! Here\'s the first ten lines or so:');
        console.log(getFirstLines(origChat, 10));

        console.log('\n---------------------------\n' +
            'And here are the names captured from the entire chat:\n');

        namesDict = {};
        namesList = [];
        nameRegex = /(?::\d\d\t)(?<name>.*?)(?::)/g;
        namesIter = origChat.matchAll(nameRegex);
        for(let nameGroup of namesIter) {
            zoomName = nameGroup.groups.name;
            console.log(zoomName);

            if (!namesDict[zoomName]) {
                namesDict[zoomName] = zoomName;
                namesList.push(zoomName);
            }
        }

        console.log('Names Dictionary:');
        console.log(namesDict);

        // Export to file:
        fs.writeFile(namesSummaryFilename, namesList.join('\n'), () => {
            console.log('---------------------------\n' +
                'Saved to file, ' + namesSummaryFilename + '.');
        });
    }
});

## names_list_from_transcript.js
/**
 * Use this to get a list of attendees' names from the Zoom transcript. Note: In
 * this case, you SHOULD use the .vtt file. (Unfortunately I coded the other
 * `process_zoom_transcript.js` file differently from this file— Oops ;P )
 * Change the value of `transcriptFilename` to be the name of your .vtt file.
 */

const fs = require('fs');

const transcriptFilename = 'transcript.vtt';
const namesSummaryFilename = 'names_from_' + transcriptFilename.split('.')[0] + '.txt';

function getFirstLines(txt, numLines) {
    firstFewLines = txt.split('\n', numLines);
    return firstFewLines.join('\n');
}

console.log('Let\'s find the names of everyone in the meeting!\n');

fs.readFile(transcriptFilename, 'utf8', (err, origTranscript) => {
    if (err) {
        console.log('Error reading file: ' + err);
    } else {
        console.log('---------------------------\n' +
            'Data incoming! Here\'s the first ten lines or so:');
        console.log(getFirstLines(origTranscript, 10));

        console.log('\n---------------------------\n' +
            'And here are the names captured from the entire transcript:\n');

        namesDict = {};
        namesList = [];
        nameRegex = /(?:\.\d\d\d\r\n)(?<name>.*?)(?::)/g;
        namesIter = origTranscript.matchAll(nameRegex);
        for(let nameGroup of namesIter) {
            zoomName = nameGroup.groups.name;

            if (!namesDict[zoomName]) {
                namesDict[zoomName] = zoomName;
                namesList.push(zoomName);
            }
        }

        console.log('Names Dictionary:');
        console.log(namesDict);

        // Export to file:
        fs.writeFile(namesSummaryFilename, namesList.join('\n'), () => {
            console.log('---------------------------\n' +
                'Saved to file, ' + namesSummaryFilename + '.');
        });
    }
});

## process_zoom_transcript.js
/**
 * Use this to clean your Zoom transcript and anonymize given names. Note that you should
 * select and copy the Zoom transcript from online instead of downloading the official
 * transcript (i.e., DON'T use the official .vtt file).
 * Change the value of `namesAndReplacements` with the names of attendees and
 * what you would like them to be replaced with (e.g., "Participant 1234").
 */

const fs = require('fs');

const transcriptFilename = 'transcript.txt';
const newTranscriptFilename = 'newTranscript.txt';
const namesAndReplacements = [{
        name: 'Name 1',
        replacement: 'Codename 1'
    },
    {
        name: 'Name 2',
        replacement: 'Codename 2'
    }
];

function getFirstLines(txt, numLines) {
    firstFewLines = txt.split('\n', numLines);
    return firstFewLines.join('\n');
}

console.log('Let\'s get rid of those pesky time stamps and user avatars!\n');

fs.readFile(transcriptFilename, 'utf8', (err, origTranscript) => {
    if (err) {
        console.log('Error reading file: ' + err);
    } else {
        console.log('---------------------------\n' +
            'Data incoming! Here\'s the first ten lines or so:');
        console.log(getFirstLines(origTranscript, 10));

        console.log('\n---------------------------\n' +
            'And here\'s the new output:\n');

        // Replace the time stamps with spaces:
        let newTranscript = origTranscript.replace(/\n\d\d:\d\d:\d\d\n/g, ' ');
        // Replace the words, "user avatar" with a newline:
        newTranscript = newTranscript.replace(/user avatar/g, '\n');
        // Replace all names with codenames plus a colon:
        namesAndReplacements.forEach((nameAndRepl) => {
            regex = new RegExp(nameAndRepl.name, 'g');
            newTranscript = newTranscript.replace(regex, nameAndRepl.replacement + ":");
        });

        console.log(getFirstLines(newTranscript, 10) + '\n');

        // Export to file:
        fs.writeFile(newTranscriptFilename, newTranscript, () => {
            console.log('---------------------------\n' +
                'Saved to file, ' + newTranscriptFilename + '.')
        });
    }
});

## remove-vtt-timestamps.js
/**
 * Use this to get rid of timestamps from a .vtt file. See regexr.com to test
 * out other regular expressions, if it isn't working.
 *
 * Usage:
 * Change the value of `transcriptFilename` to be the name of your .vtt file.
 * To run, use `node remove-timestams-vtt.js` in the terminal.
 */

const fs = require('fs');

const transcriptFilename = 'Transcript.vtt';
const newTranscriptFilename = 'NewTranscript.txt';

function getFirstLines(txt, numLines) {
    firstFewLines = txt.split('\n', numLines);
    return firstFewLines.join('\n');
}

console.log('Let\'s get rid of those pesky time stamps!\n');

fs.readFile(transcriptFilename, 'utf8', (err, origTranscript) => {
    if (err) {
        console.log('Error reading file: ' + err);
    } else {
        console.log('---------------------------\n' +
            'Data incoming! Here\'s the first ten lines or so:');
        console.log(getFirstLines(origTranscript, 10));

        console.log('\n---------------------------\n' +
            'And here\'s the new output:\n');

        // Replace the time stamps with spaces:
        let newTranscript = origTranscript.replace(/\n\d\d:\d\d:\d\d.\d\d\d --> \d\d:\d\d:\d\d.\d\d\d\n- /g, ' ');
        // Remove the **first** time stamp too:
        newTranscript = newTranscript.replace(/\d\d:\d\d:\d\d.\d\d\d --> \d\d:\d\d:\d\d.\d\d\d\n- /g, ' ');

        console.log(getFirstLines(newTranscript, 10) + '\n');

        // Optional: Remove all newlines
        newTranscript = newTranscript.replace(/\n/g, '');

        // Export to file:
        fs.writeFile(newTranscriptFilename, newTranscript, () => {
            console.log('---------------------------\n' +
                'Saved to file, ' + newTranscriptFilename + '.')
        });
    }
});
	/**
	* Use this to get a list of attendees' names based on who sent messages in the
	* Zoom chat. Change the value of `chatFilename` to be the .txt file containing
	* the Zoom chat messages.
	*/

	const fs = require('fs');

	const chatFilename = 'chat.txt';
	const namesSummaryFilename = 'names_from_' + chatFilename;

	function getFirstLines(txt, numLines) {
	firstFewLines = txt.split('\n', numLines);
	return firstFewLines.join('\n');
	}

	console.log('Let\'s find the names of everyone in the meeting!\n');

	fs.readFile(chatFilename, 'utf8', (err, origChat) => {
	if (err) {
	console.log('Error reading file: ' + err);
	} else {
	console.log('---------------------------\n' +
	'Data incoming! Here\'s the first ten lines or so:');
	console.log(getFirstLines(origChat, 10));

	console.log('\n---------------------------\n' +
	'And here are the names captured from the entire chat:\n');

	namesDict = {};
	namesList = [];
	nameRegex = /(?::\d\d\t)(?<name>.*?)(?::)/g;
	namesIter = origChat.matchAll(nameRegex);
	for(let nameGroup of namesIter) {
	zoomName = nameGroup.groups.name;
	console.log(zoomName);

	if (!namesDict[zoomName]) {
	namesDict[zoomName] = zoomName;
	namesList.push(zoomName);
	}
	}

	console.log('Names Dictionary:');
	console.log(namesDict);

	// Export to file:
	fs.writeFile(namesSummaryFilename, namesList.join('\n'), () => {
	console.log('---------------------------\n' +
	'Saved to file, ' + namesSummaryFilename + '.');
	});
	}
	});
	/**
	* Use this to get a list of attendees' names from the Zoom transcript. Note: In
	* this case, you SHOULD use the .vtt file. (Unfortunately I coded the other
	* `process_zoom_transcript.js` file differently from this file— Oops ;P )
	* Change the value of `transcriptFilename` to be the name of your .vtt file.
	*/

	const fs = require('fs');

	const transcriptFilename = 'transcript.vtt';
	const namesSummaryFilename = 'names_from_' + transcriptFilename.split('.')[0] + '.txt';

	function getFirstLines(txt, numLines) {
	firstFewLines = txt.split('\n', numLines);
	return firstFewLines.join('\n');
	}

	console.log('Let\'s find the names of everyone in the meeting!\n');

	fs.readFile(transcriptFilename, 'utf8', (err, origTranscript) => {
	if (err) {
	console.log('Error reading file: ' + err);
	} else {
	console.log('---------------------------\n' +
	'Data incoming! Here\'s the first ten lines or so:');
	console.log(getFirstLines(origTranscript, 10));

	console.log('\n---------------------------\n' +
	'And here are the names captured from the entire transcript:\n');

	namesDict = {};
	namesList = [];
	nameRegex = /(?:\.\d\d\d\r\n)(?<name>.*?)(?::)/g;
	namesIter = origTranscript.matchAll(nameRegex);
	for(let nameGroup of namesIter) {
	zoomName = nameGroup.groups.name;

	if (!namesDict[zoomName]) {
	namesDict[zoomName] = zoomName;
	namesList.push(zoomName);
	}
	}

	console.log('Names Dictionary:');
	console.log(namesDict);

	// Export to file:
	fs.writeFile(namesSummaryFilename, namesList.join('\n'), () => {
	console.log('---------------------------\n' +
	'Saved to file, ' + namesSummaryFilename + '.');
	});
	}
	});
	/**
	* Use this to clean your Zoom transcript and anonymize given names. Note that you should
	* select and copy the Zoom transcript from online instead of downloading the official
	* transcript (i.e., DON'T use the official .vtt file).
	* Change the value of `namesAndReplacements` with the names of attendees and
	* what you would like them to be replaced with (e.g., "Participant 1234").
	*/

	const fs = require('fs');

	const transcriptFilename = 'transcript.txt';
	const newTranscriptFilename = 'newTranscript.txt';
	const namesAndReplacements = [{
	name: 'Name 1',
	replacement: 'Codename 1'
	},
	{
	name: 'Name 2',
	replacement: 'Codename 2'
	}
	];

	function getFirstLines(txt, numLines) {
	firstFewLines = txt.split('\n', numLines);
	return firstFewLines.join('\n');
	}

	console.log('Let\'s get rid of those pesky time stamps and user avatars!\n');

	fs.readFile(transcriptFilename, 'utf8', (err, origTranscript) => {
	if (err) {
	console.log('Error reading file: ' + err);
	} else {
	console.log('---------------------------\n' +
	'Data incoming! Here\'s the first ten lines or so:');
	console.log(getFirstLines(origTranscript, 10));

	console.log('\n---------------------------\n' +
	'And here\'s the new output:\n');

	// Replace the time stamps with spaces:
	let newTranscript = origTranscript.replace(/\n\d\d:\d\d:\d\d\n/g, ' ');
	// Replace the words, "user avatar" with a newline:
	newTranscript = newTranscript.replace(/user avatar/g, '\n');
	// Replace all names with codenames plus a colon:
	namesAndReplacements.forEach((nameAndRepl) => {
	regex = new RegExp(nameAndRepl.name, 'g');
	newTranscript = newTranscript.replace(regex, nameAndRepl.replacement + ":");
	});

	console.log(getFirstLines(newTranscript, 10) + '\n');

	// Export to file:
	fs.writeFile(newTranscriptFilename, newTranscript, () => {
	console.log('---------------------------\n' +
	'Saved to file, ' + newTranscriptFilename + '.')
	});
	}
	});