jonbarrow/animeultima.js

## animeultima.js
const cloudscraper = require('cloudscraper'); // Bypass cloudfare
const { JSDOM } = require('jsdom'); // DOM access in Node
const async = require('async'); // async utils

const animeIdRegex = /anime-id="(\d*)"/; // Regex to find the anime ID for the site
const sourceRegex = /file: "(.*)"/g;
const iframeRegex = /iframe .* src="(.*)"/;

// URL list
const URL_BASE = 'https://animeultima.eu';
const SEARCH_URL = `${URL_BASE}/search?search`;
const EPISODE_LIST_URL = `${URL_BASE}/api/episodeList?animeId`;

// Main function
async function scrape(kitsuDetails, episodeNumber=1) {
	const streams = []; // This will be populated with streams

	// Movies don't have multiple episodes
	if (kitsuDetails.attributes.showType === 'movie') {
		episodeNumber = 1;
	}

	// Search endpoint uses a specific version of the titles
	const {en_us, en, en_jp, jp} = kitsuDetails.attributes.titles;
	const title = (en_jp || en || jp || en_us || ja_jp);

	const search = await cloudscraper.get(`${SEARCH_URL}=${title}`); // Get the search page HTML
	let dom = new JSDOM(search);
	const searchResults = [...dom.window.document.querySelectorAll('.anime-box')] // Map the HTML elements to objects for easier use later
		.map(element => ({
			title: element.querySelector('a').getAttribute('title'),
			href: element.querySelector('a').href,
			//has_dub: (element.querySelectorAll('.anime-meta span.tooltip').length > 1) // 2nd tooltip indicates a dub (no longer used)
		}));

	const anime = searchResults.find(anime => anime.title.includes(title)); // Search for the requested anime

	if (!anime) { // Return nothing if not found
		return null;
	}

	/*
		NOTE:
		There is another potential way to scrape this site that does NOT require using the API or search endpoints

		This site accepts URLs in the form of "https://animeultima.eu/a/SLUG/episode-NUMBER-[sub/dub]" which will
		redirect to the other URL format of "https://animeultima.eu/a/SLUG_SOME-ID/episode-NUMBER_SOME-ID-[sub/dub]"

		This means we do NOT need to know what those additional 2 IDs are in order to directly request an episode
		link, and it also cuts out the need to use the search endpoint to filter series and cuts out the API for
		episode listing

		However, the "SLUG" is NOT reliable or standard. It seems to accept Kitsu slugs for some shows and not others.
		For example it accepts `boku-no-hero-academia-2nd-season` for My Hero Season 2, which is the slug provided by
		Kitsu. But it does NOT accept `spirited-away`, the slug provided by Kitsu, for Spirited Away. In the case of
		Spirited Away it expects `sen-to-chihiro-no-kamikakushi` which is the en_jp title converted to lowercase and
		with spaces replaced by dashes. But even this does not stay reliable, as seen with My Hero Season 4 where the
		slug from Kitsu is `boku-no-hero-academia-4` and the en_jp slug is the same, but yet it expects the slug to be
		`boku-no-hero-academia-4th-season`

		So if you can figure out where this site is getting the slugs from, you can potentially skip all steps needed
		to find the anime and jump directly to episode pages
	*/


	// The url `${anime.href}/episode-${episodeNumber}-sub` also redirects correctly but throws 404 errors when using CF scrapers
	// Maybe different request modules better support this? It would remove the need for the API and animeId requests below

	const animePage = await cloudscraper.get(anime.href); // Request the main anime details page
	const animeIdData = animeIdRegex.exec(animePage); // Look for the ID

	// If ID was not found, return nothing
	if (!animeIdData || !animeIdData[1]) {
		return null;
	}

	// Extract the anime ID
	const animeId = animeIdData[1];

	// Get the episode list and search for the requested episode
	const episodeList = await cloudscraper.get(`${EPISODE_LIST_URL}=${animeId}`, { json: true });
	const { episodes } = episodeList;
	const episode = episodes.find(({ episode_num }) => Number(episode_num) === episodeNumber);

	// If episode was not found, return nothing
	if (!episode) {
		return null;
	}

	const { urls } = episode;
	const url = urls.sub; // Even if there's multiple URLs (for sub and dub), each URL actually has links to both sub and dub (if exist)

	const episodePage = await cloudscraper.get(url);
	dom = new JSDOM(episodePage);

	// Get a list of embeds
	const embeds = [...dom.window.document.querySelectorAll('.server-selector .mirror-selector option')]
	.map(element => {
		const url = element.value;
		const serverType = element.innerHTML.toLowerCase().split(': ').pop(); // Server type determines where to make the following request(s)
		const langAndId = url.split('-').pop(); // Get the episode Id and language values
		const [lang, episodeId] = langAndId.split('/'); // split the values for use

		return {
			dub: lang === 'dub', // check if the episode is dubbed
			server_type: serverType, // store the server type
			// If the server type is for the site ("au"engine and "au".ch = "a"nime"u"ltima) then store the embed
			// "faststream" also seems to be stored on the AU website, but under a different embed location (I noticed this with newer series)
			// If it matches neither, assume it's an iframe to a different service
			url: (
				(serverType === 'auengine' || serverType === 'au.ch') ? `${URL_BASE}/e/${episodeId}` :
				(serverType.includes('faststream') ? `${URL_BASE}/faststream/${episodeId}` : url)
			)
		};
	});

	// Loop over all the URLs in parallel
	return new Promise(resolve => {
		async.each(embeds, (embed, callback) => {
			cloudscraper.get(embed.url)
				.then(html => {
					// If the server type is AU or faststream then pull the source directly. The source is stored in a JSON object in-line
					if (embed.server_type === 'auengine' || embed.server_type === 'au.ch' || embed.server_type.includes('faststream')) {
						const matches = [...html.matchAll(sourceRegex)];

						// there can be multiple streams be embed, so loop over all of them (always at least 2, one standard and one HLS)
						for (const match of matches) {
							streams.push({
								stream: match[1],
								server: embed.server_type,
								dub: embed.dub
							});
						}

						callback();
					} else {
						// Assume it's an iframe if above check fails
						const iframeMatch = iframeRegex.exec(html);

						if (!iframeMatch || !iframeMatch[1]) {
							callback();
						} else {
							const iframeUrl = iframeMatch[1];
							// Pass iframeUrl to a specific embed scraper. These URLs are for services like Streamango and mp4upload
							callback();
						}
					}
				});
		}, () => {
			return resolve(streams);
		});
	});
}

(async () => {
	console.time('Scrape Time');
	const streams = await scrape({ // Fake Kitsu response
		attributes: {
			titles: {
				en_jp: 'Clannad',
			},
			showType: 'TV'
		}
	}, 3);
	console.timeEnd('Scrape Time');
	console.log(streams);
})();
	const cloudscraper = require('cloudscraper'); // Bypass cloudfare
	const { JSDOM } = require('jsdom'); // DOM access in Node
	const async = require('async'); // async utils

	const animeIdRegex = /anime-id="(\d*)"/; // Regex to find the anime ID for the site
	const sourceRegex = /file: "(.*)"/g;
	const iframeRegex = /iframe .* src="(.*)"/;

	// URL list
	const URL_BASE = 'https://animeultima.eu';
	const SEARCH_URL = `${URL_BASE}/search?search`;
	const EPISODE_LIST_URL = `${URL_BASE}/api/episodeList?animeId`;

	// Main function
	async function scrape(kitsuDetails, episodeNumber=1) {
	const streams = []; // This will be populated with streams

	// Movies don't have multiple episodes
	if (kitsuDetails.attributes.showType === 'movie') {
	episodeNumber = 1;
	}

	// Search endpoint uses a specific version of the titles
	const {en_us, en, en_jp, jp} = kitsuDetails.attributes.titles;
	const title = (en_jp \|\| en \|\| jp \|\| en_us \|\| ja_jp);

	const search = await cloudscraper.get(`${SEARCH_URL}=${title}`); // Get the search page HTML
	let dom = new JSDOM(search);
	const searchResults = [...dom.window.document.querySelectorAll('.anime-box')] // Map the HTML elements to objects for easier use later
	.map(element => ({
	title: element.querySelector('a').getAttribute('title'),
	href: element.querySelector('a').href,
	//has_dub: (element.querySelectorAll('.anime-meta span.tooltip').length > 1) // 2nd tooltip indicates a dub (no longer used)
	}));

	const anime = searchResults.find(anime => anime.title.includes(title)); // Search for the requested anime

	if (!anime) { // Return nothing if not found
	return null;
	}

	/*
	NOTE:
	There is another potential way to scrape this site that does NOT require using the API or search endpoints

	This site accepts URLs in the form of "https://animeultima.eu/a/SLUG/episode-NUMBER-[sub/dub]" which will
	redirect to the other URL format of "https://animeultima.eu/a/SLUG_SOME-ID/episode-NUMBER_SOME-ID-[sub/dub]"

	This means we do NOT need to know what those additional 2 IDs are in order to directly request an episode
	link, and it also cuts out the need to use the search endpoint to filter series and cuts out the API for
	episode listing

	However, the "SLUG" is NOT reliable or standard. It seems to accept Kitsu slugs for some shows and not others.
	For example it accepts `boku-no-hero-academia-2nd-season` for My Hero Season 2, which is the slug provided by
	Kitsu. But it does NOT accept `spirited-away`, the slug provided by Kitsu, for Spirited Away. In the case of
	Spirited Away it expects `sen-to-chihiro-no-kamikakushi` which is the en_jp title converted to lowercase and
	with spaces replaced by dashes. But even this does not stay reliable, as seen with My Hero Season 4 where the
	slug from Kitsu is `boku-no-hero-academia-4` and the en_jp slug is the same, but yet it expects the slug to be
	`boku-no-hero-academia-4th-season`

	So if you can figure out where this site is getting the slugs from, you can potentially skip all steps needed
	to find the anime and jump directly to episode pages
	*/


	// The url `${anime.href}/episode-${episodeNumber}-sub` also redirects correctly but throws 404 errors when using CF scrapers
	// Maybe different request modules better support this? It would remove the need for the API and animeId requests below

	const animePage = await cloudscraper.get(anime.href); // Request the main anime details page
	const animeIdData = animeIdRegex.exec(animePage); // Look for the ID

	// If ID was not found, return nothing
	if (!animeIdData \|\| !animeIdData[1]) {
	return null;
	}

	// Extract the anime ID
	const animeId = animeIdData[1];

	// Get the episode list and search for the requested episode
	const episodeList = await cloudscraper.get(`${EPISODE_LIST_URL}=${animeId}`, { json: true });
	const { episodes } = episodeList;
	const episode = episodes.find(({ episode_num }) => Number(episode_num) === episodeNumber);

	// If episode was not found, return nothing
	if (!episode) {
	return null;
	}

	const { urls } = episode;
	const url = urls.sub; // Even if there's multiple URLs (for sub and dub), each URL actually has links to both sub and dub (if exist)

	const episodePage = await cloudscraper.get(url);
	dom = new JSDOM(episodePage);

	// Get a list of embeds
	const embeds = [...dom.window.document.querySelectorAll('.server-selector .mirror-selector option')]
	.map(element => {
	const url = element.value;
	const serverType = element.innerHTML.toLowerCase().split(': ').pop(); // Server type determines where to make the following request(s)
	const langAndId = url.split('-').pop(); // Get the episode Id and language values
	const [lang, episodeId] = langAndId.split('/'); // split the values for use

	return {
	dub: lang === 'dub', // check if the episode is dubbed
	server_type: serverType, // store the server type
	// If the server type is for the site ("au"engine and "au".ch = "a"nime"u"ltima) then store the embed
	// "faststream" also seems to be stored on the AU website, but under a different embed location (I noticed this with newer series)
	// If it matches neither, assume it's an iframe to a different service
	url: (
	(serverType === 'auengine' \|\| serverType === 'au.ch') ? `${URL_BASE}/e/${episodeId}` :
	(serverType.includes('faststream') ? `${URL_BASE}/faststream/${episodeId}` : url)
	)
	};
	});

	// Loop over all the URLs in parallel
	return new Promise(resolve => {
	async.each(embeds, (embed, callback) => {
	cloudscraper.get(embed.url)
	.then(html => {
	// If the server type is AU or faststream then pull the source directly. The source is stored in a JSON object in-line
	if (embed.server_type === 'auengine' \|\| embed.server_type === 'au.ch' \|\| embed.server_type.includes('faststream')) {
	const matches = [...html.matchAll(sourceRegex)];

	// there can be multiple streams be embed, so loop over all of them (always at least 2, one standard and one HLS)
	for (const match of matches) {
	streams.push({
	stream: match[1],
	server: embed.server_type,
	dub: embed.dub
	});
	}

	callback();
	} else {
	// Assume it's an iframe if above check fails
	const iframeMatch = iframeRegex.exec(html);

	if (!iframeMatch \|\| !iframeMatch[1]) {
	callback();
	} else {
	const iframeUrl = iframeMatch[1];
	// Pass iframeUrl to a specific embed scraper. These URLs are for services like Streamango and mp4upload
	callback();
	}
	}
	});
	}, () => {
	return resolve(streams);
	});
	});
	}

	(async () => {
	console.time('Scrape Time');
	const streams = await scrape({ // Fake Kitsu response
	attributes: {
	titles: {
	en_jp: 'Clannad',
	},
	showType: 'TV'
	}
	}, 3);
	console.timeEnd('Scrape Time');
	console.log(streams);
	})();