alexose/youtube2podcast.js

## youtube2podcast.js
/*
 * YouTube to Podcast using Node.js and not much else
 *
 * I got annoyed by all of the clunky and/or paid solutions to this problem, so I
 * thought I'd just roll my own.  My guess was that it'd be about 200 lines of code,
 * and I wasn't too far off.  It's not perfect but it works and it's relatively easy
 * to follow.
 *
 * No half-hour Docker installs or 20gb Go libraries to download.  Just throw it on a
 * free EC2 micro server and serve the files using nginx like the good ol days.  It
 * could probably work as a Lambda, too, without too much effort.
 *
 * Files are cached in the filesystem, so it's cheap to run as a cron job in order to
 * continously check for updates.  If something goes wrong, just delete the /public
 * folder and run the script again.
 *
 * You'll need to `npm install -g youtube-dl-exec xml2js node-fetch` before running.
 * Or set everything up with its own directory and package.json file, idc
 *
 */

const https = require("https");
const ytdl = require("youtube-dl-exec");
const fs = require("fs");
const path = require("path");
const xml2js = require("xml2js");
const fetch = require("node-fetch");

const playlistId = "YOUR_YOUTUBE_PLAYLIST_ID";
const apiKey = "YOUR_YOUTUBE_DATA_API_V3_KEY";
const baseUrl = "https://example.com/podcasts/your_podcast";

main();

async function main() {
    // First, get all playlist entries from the YouTube Data API V3
    const entries = await fetchPlaylistItems(playlistId, apiKey);

    // Next, go through each entry, map all necessary fields, and ensure we have a valid audio file to link to.
    // If we don't, use youtube-dl to grab the audio only, figure out the duration, and save it to a directory.
    const processedEntries = await processEntries(entries);

    // Finally, wrap the full list in a valid RSS feed, convert it to XML, and save to a file.
    const rssFeedXML = convertToRssFeed(processedEntries);
    fs.writeFileSync("./public/rss.xml", rssFeedXML);

    console.log("Done!");
}

async function fetchPlaylistItems(playlistId, apiKey, nextPageToken = "") {
    const url = new URL("https://www.googleapis.com/youtube/v3/playlistItems");
    const params = {
        part: "snippet",
        maxResults: 50,
        playlistId: playlistId,
        key: apiKey,
        pageToken: nextPageToken,
    };

    url.search = new URLSearchParams(params).toString();

    try {
        const response = await fetch(url);
        if (!response.ok) {
            throw new Error(`HTTP error! Status: ${response.status}`);
        }
        const data = await response.json();
        const entries = data.items;
        if (data.nextPageToken) {
            const nextEntries = await fetchPlaylistItems(playlistId, apiKey, data.nextPageToken);
            return entries.concat(nextEntries);
        }
        return entries;
    } catch (error) {
        console.error("Error fetching playlist entries:", error);
        process.exit(1);
    }
}

async function processEntries(entries) {
    let arr = [];
    for (let pos = 0; pos < entries.length; pos++) {
        const entry = entries[pos];
        const processedEntry = await processSingleEntry(entry);
        if (processedEntry) arr.push(processedEntry);
    }
    return arr;
}

async function processSingleEntry(unprocessedEntry) {
    if (!unprocessedEntry) return null;

    const entry = mapEntry(unprocessedEntry);

    const id = entry.guid;
    const audioDir = "./public/audio";
    const audioPath = path.join(audioDir, `${id}.mp3`);
    const infoDir = "./public/info";
    const infoPath = path.join(infoDir, `${id}.json`);

    createDirIfMissing(audioDir);
    createDirIfMissing(infoDir);

    // See if cached info exists
    const info = await fetchInfo(id, infoPath);

    if (info === null) {
        // If we can't get info, just bail out since it's probably a deleted video
        return;
    }

    entry["itunes:duration"] = formatDuration(info.duration);

    if (!fs.existsSync(audioPath)) {
        console.log(`No audio for ${entry.title} (${audioPath}). Downloading ${info.duration} seconds now...`);
        try {
            // Download audio
            await ytdl(`http://www.youtube.com/watch?v=${id}`, {
                extractAudio: true,
                audioFormat: "mp3",
                output: audioPath,
            });
        } catch (e) {
            console.error(`Failed to download audio for ${entry.title}:`, e);
            process.exit(1);
        }
    } else {
        console.log(`Found audio for ${entry.title} (${audioPath}).`);
    }

    // Check audio size
    const stats = fs.statSync(audioPath);
    entry.enclosure.$.url = `${baseUrl}/${id}.mp3`;
    entry.enclosure.$.length = stats.size;

    // All done
    return entry;
}

async function fetchInfo(id, infoPath) {
    if (fs.existsSync(infoPath)) {
        info = JSON.parse(fs.readFileSync(infoPath));
    } else {
        try {
            info = await ytdl(`http://www.youtube.com/watch?v=${id}`, {dumpSingleJson: true});
            fs.writeFileSync(infoPath, JSON.stringify(info, null, 2));
        } catch (e) {
            // Handle deleted video
            info = null;
            fs.writeFileSync(infoPath, "null");
        }
    }
    return info;
}

function mapEntry(entry) {
    const snippet = entry.snippet;

    // Extract the highest quality thumbnail available
    const thumbnails = snippet.thumbnails;
    const thumbnailUrl =
        thumbnails.standard?.url || thumbnails.high?.url || thumbnails.medium?.url || thumbnails.default?.url;

    return {
        title: snippet.title,
        "itunes:subtitle": snippet.title.substring(0, 100), // Short summary, adjust as needed
        description: snippet.description,
        "itunes:summary": snippet.description,
        pubDate: new Date(snippet.publishedAt).toUTCString(),
        link: `https://www.youtube.com/watch?v=${snippet.resourceId.videoId}`,
        guid: snippet.resourceId.videoId,
        "itunes:author": snippet.channelTitle,
        enclosure: {
            $: {
                type: "audio/mpeg",
                url: null, // Will be added later
                length: "0", // Will be added later
            },
        },
        "itunes:image": {
            $: {
                href: thumbnailUrl,
            },
        },
        "itunes:explicit": "no",
        "itunes:category": {
            $: {
                text: "Science",
            },
        },
        "itunes:keywords": "Carbon Capture, Climate, Environment",
        "itunes:duration": "00:00:00", // Will be added later
    };
}

function convertToRssFeed(entries) {
    const rssObj = {
        rss: {
            $: {version: "2.0", "xmlns:itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"},
            channel: {
                title: "YOUR TITLE",
                link: "LINK TO YOUTUBE PLAYLIST",
                description: "DESCRIPTION",
                language: "en-us",
                "itunes:author": "AUTHOR",
                "itunes:image": {
                    $: {
                        href: "IMAGE URL",
                    },
                },
                "itunes:owner": {
                    "itunes:name": "OWNER NAME",
                    "itunes:email": "OWNER EMAIL",
                },
                "itunes:explicit": "no",
                "itunes:category": {
                    $: {
                        text: "Science",
                    },
                },
                item: entries,
            },
        },
    };

    return new xml2js.Builder().buildObject(rssObj);
}

// Helper functions
function formatDuration(seconds) {
    const pad = num => num.toString().padStart(2, "0");

    const hours = pad(Math.floor(seconds / 3600));
    const minutes = pad(Math.floor((seconds % 3600) / 60));
    const secondsLeft = pad(seconds % 60);

    return `${hours}:${minutes}:${secondsLeft}`;
}

function createDirIfMissing(dir) {
    if (!fs.existsSync(dir)) {
        fs.mkdirSync(dir, {recursive: true});
    }
}
	/*
	* YouTube to Podcast using Node.js and not much else
	*
	* I got annoyed by all of the clunky and/or paid solutions to this problem, so I
	* thought I'd just roll my own. My guess was that it'd be about 200 lines of code,
	* and I wasn't too far off. It's not perfect but it works and it's relatively easy
	* to follow.
	*
	* No half-hour Docker installs or 20gb Go libraries to download. Just throw it on a
	* free EC2 micro server and serve the files using nginx like the good ol days. It
	* could probably work as a Lambda, too, without too much effort.
	*
	* Files are cached in the filesystem, so it's cheap to run as a cron job in order to
	* continously check for updates. If something goes wrong, just delete the /public
	* folder and run the script again.
	*
	* You'll need to `npm install -g youtube-dl-exec xml2js node-fetch` before running.
	* Or set everything up with its own directory and package.json file, idc
	*
	*/

	const https = require("https");
	const ytdl = require("youtube-dl-exec");
	const fs = require("fs");
	const path = require("path");
	const xml2js = require("xml2js");
	const fetch = require("node-fetch");

	const playlistId = "YOUR_YOUTUBE_PLAYLIST_ID";
	const apiKey = "YOUR_YOUTUBE_DATA_API_V3_KEY";
	const baseUrl = "https://example.com/podcasts/your_podcast";

	main();

	async function main() {
	// First, get all playlist entries from the YouTube Data API V3
	const entries = await fetchPlaylistItems(playlistId, apiKey);

	// Next, go through each entry, map all necessary fields, and ensure we have a valid audio file to link to.
	// If we don't, use youtube-dl to grab the audio only, figure out the duration, and save it to a directory.
	const processedEntries = await processEntries(entries);

	// Finally, wrap the full list in a valid RSS feed, convert it to XML, and save to a file.
	const rssFeedXML = convertToRssFeed(processedEntries);
	fs.writeFileSync("./public/rss.xml", rssFeedXML);

	console.log("Done!");
	}

	async function fetchPlaylistItems(playlistId, apiKey, nextPageToken = "") {
	const url = new URL("https://www.googleapis.com/youtube/v3/playlistItems");
	const params = {
	part: "snippet",
	maxResults: 50,
	playlistId: playlistId,
	key: apiKey,
	pageToken: nextPageToken,
	};

	url.search = new URLSearchParams(params).toString();

	try {
	const response = await fetch(url);
	if (!response.ok) {
	throw new Error(`HTTP error! Status: ${response.status}`);
	}
	const data = await response.json();
	const entries = data.items;
	if (data.nextPageToken) {
	const nextEntries = await fetchPlaylistItems(playlistId, apiKey, data.nextPageToken);
	return entries.concat(nextEntries);
	}
	return entries;
	} catch (error) {
	console.error("Error fetching playlist entries:", error);
	process.exit(1);
	}
	}

	async function processEntries(entries) {
	let arr = [];
	for (let pos = 0; pos < entries.length; pos++) {
	const entry = entries[pos];
	const processedEntry = await processSingleEntry(entry);
	if (processedEntry) arr.push(processedEntry);
	}
	return arr;
	}

	async function processSingleEntry(unprocessedEntry) {
	if (!unprocessedEntry) return null;

	const entry = mapEntry(unprocessedEntry);

	const id = entry.guid;
	const audioDir = "./public/audio";
	const audioPath = path.join(audioDir, `${id}.mp3`);
	const infoDir = "./public/info";
	const infoPath = path.join(infoDir, `${id}.json`);

	createDirIfMissing(audioDir);
	createDirIfMissing(infoDir);

	// See if cached info exists
	const info = await fetchInfo(id, infoPath);

	if (info === null) {
	// If we can't get info, just bail out since it's probably a deleted video
	return;
	}

	entry["itunes:duration"] = formatDuration(info.duration);

	if (!fs.existsSync(audioPath)) {
	console.log(`No audio for ${entry.title} (${audioPath}). Downloading ${info.duration} seconds now...`);
	try {
	// Download audio
	await ytdl(`http://www.youtube.com/watch?v=${id}`, {
	extractAudio: true,
	audioFormat: "mp3",
	output: audioPath,
	});
	} catch (e) {
	console.error(`Failed to download audio for ${entry.title}:`, e);
	process.exit(1);
	}
	} else {
	console.log(`Found audio for ${entry.title} (${audioPath}).`);
	}

	// Check audio size
	const stats = fs.statSync(audioPath);
	entry.enclosure.$.url = `${baseUrl}/${id}.mp3`;
	entry.enclosure.$.length = stats.size;

	// All done
	return entry;
	}

	async function fetchInfo(id, infoPath) {
	if (fs.existsSync(infoPath)) {
	info = JSON.parse(fs.readFileSync(infoPath));
	} else {
	try {
	info = await ytdl(`http://www.youtube.com/watch?v=${id}`, {dumpSingleJson: true});
	fs.writeFileSync(infoPath, JSON.stringify(info, null, 2));
	} catch (e) {
	// Handle deleted video
	info = null;
	fs.writeFileSync(infoPath, "null");
	}
	}
	return info;
	}

	function mapEntry(entry) {
	const snippet = entry.snippet;

	// Extract the highest quality thumbnail available
	const thumbnails = snippet.thumbnails;
	const thumbnailUrl =
	thumbnails.standard?.url \|\| thumbnails.high?.url \|\| thumbnails.medium?.url \|\| thumbnails.default?.url;

	return {
	title: snippet.title,
	"itunes:subtitle": snippet.title.substring(0, 100), // Short summary, adjust as needed
	description: snippet.description,
	"itunes:summary": snippet.description,
	pubDate: new Date(snippet.publishedAt).toUTCString(),
	link: `https://www.youtube.com/watch?v=${snippet.resourceId.videoId}`,
	guid: snippet.resourceId.videoId,
	"itunes:author": snippet.channelTitle,
	enclosure: {
	$: {
	type: "audio/mpeg",
	url: null, // Will be added later
	length: "0", // Will be added later
	},
	},
	"itunes:image": {
	$: {
	href: thumbnailUrl,
	},
	},
	"itunes:explicit": "no",
	"itunes:category": {
	$: {
	text: "Science",
	},
	},
	"itunes:keywords": "Carbon Capture, Climate, Environment",
	"itunes:duration": "00:00:00", // Will be added later
	};
	}

	function convertToRssFeed(entries) {
	const rssObj = {
	rss: {
	$: {version: "2.0", "xmlns:itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"},
	channel: {
	title: "YOUR TITLE",
	link: "LINK TO YOUTUBE PLAYLIST",
	description: "DESCRIPTION",
	language: "en-us",
	"itunes:author": "AUTHOR",
	"itunes:image": {
	$: {
	href: "IMAGE URL",
	},
	},
	"itunes:owner": {
	"itunes:name": "OWNER NAME",
	"itunes:email": "OWNER EMAIL",
	},
	"itunes:explicit": "no",
	"itunes:category": {
	$: {
	text: "Science",
	},
	},
	item: entries,
	},
	},
	};

	return new xml2js.Builder().buildObject(rssObj);
	}

	// Helper functions
	function formatDuration(seconds) {
	const pad = num => num.toString().padStart(2, "0");

	const hours = pad(Math.floor(seconds / 3600));
	const minutes = pad(Math.floor((seconds % 3600) / 60));
	const secondsLeft = pad(seconds % 60);

	return `${hours}:${minutes}:${secondsLeft}`;
	}

	function createDirIfMissing(dir) {
	if (!fs.existsSync(dir)) {
	fs.mkdirSync(dir, {recursive: true});
	}
	}