Skip to content

Instantly share code, notes, and snippets.

@hippietrail
Last active May 8, 2024 08:22
Show Gist options
  • Save hippietrail/af26d363d7f6ac37565c5074f04e4f5e to your computer and use it in GitHub Desktop.
Save hippietrail/af26d363d7f6ac37565c5074f04e4f5e to your computer and use it in GitHub Desktop.
TypeScript code to fetch one or more YouTube transcripts as plain text without API key
import url from 'url';
import parse from 'html-dom-parser';
import { Element, Text } from 'domhandler';
import { decodeXML } from 'entities';
async function getHtmlByVideoID(videoID: string): Promise<string> {
const reponse = await fetch(url.format({
protocol: 'https',
hostname: 'www.youtube.com',
pathname: 'watch',
query: { v: videoID },
}));
return (await reponse.text());
}
async function main() {
const videoIDsFromCommandline = process.argv.slice(2);
let videoIDs = videoIDsFromCommandline;
let videoIDsToRetry: string[] = [];
while (videoIDs.length > 0) {
const promiseArray = videoIDs.map(getHtmlByVideoID);
const settledPromises = await Promise.allSettled(promiseArray);
for (const [settledNum, settled] of settledPromises.entries()) {
const videoID = `${videoIDs[settledNum]}`;
if (settled.status === 'fulfilled') {
if (settled.value) {
const dom = parse(settled.value);
const data = ((((dom[1] as Element).children[1] as Element).children[0] as Element).children[0] as Text).data;
const startsWith = 'var ytInitialPlayerResponse = ';
if (data.includes(startsWith) && data.endsWith(';')) {
const json = JSON.parse(data.substring(startsWith.length, data.length - 1));
if (!('captions' in json)) {
console.error('no captions', videoID);
} else {
const xml = await (await fetch(json.captions.playerCaptionsTracklistRenderer.captionTracks[0].baseUrl)).text();
const dom = parse(xml);
for (const dn of (dom[1] as Element).children)
console.log(decodeXML(((dn as Element).children[0] as Text).data));
}
}
}
} else if (settled.status === 'rejected') {
console.error(`rejected ${videoID}`, settled.reason);
videoIDsToRetry.push(videoID);
}
}
videoIDs = videoIDsToRetry;
videoIDsToRetry = [];
}
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment