Skip to content

Instantly share code, notes, and snippets.

@gligoran
Last active May 3, 2022 08:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gligoran/386aafc50e446ccf581d42f91a7bc97f to your computer and use it in GitHub Desktop.
Save gligoran/386aafc50e446ccf581d42f91a7bc97f to your computer and use it in GitHub Desktop.
Slovenian Fairy Tales Downloader
#!/usr/bin/env node
const puppeteer = require('puppeteer');
const request = require('request');
const fs = require('fs');
const axios = require('axios');
const yargs = require('yargs');
const { hideBin } = require('yargs/helpers')
const MEDIA_URL = 'https://api.rtvslo.si/ava/getMedia';
const FAIRY_TAIL_URL = 'https://ziv-zav.rtvslo.si/predvajaj/lahko-noc-otroci';
const FAIRY_TAIL_API_ENDPOINT = 'https://api.rtvslo.si/ava/getSearch2?client_id=82013fb3a531d5414f478747c1aca622&showId=54&clip=show&sort=date&order=desc'
async function getMediaUrl(url) {
let mp3Url = null;
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', (interceptedRequest) => {
if (interceptedRequest.isInterceptResolutionHandled()) {
return;
}
if (interceptedRequest.url().includes(MEDIA_URL)) {
mp3Url = interceptedRequest.url();
interceptedRequest.abort();
} else {
interceptedRequest.continue();
}
});
await page.goto(url);
await browser.close();
console.log({ mp3Url });
return mp3Url;
}
async function getTitle(url) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
const websiteContent = await page.content();
const regex = /<h1 class=\"funky-font h3\">(.*?)<\/h1>/;
const match = websiteContent.match(regex);
await browser.close();
return match[1];
}
async function getMp3Url(url) {
try {
url = await getMediaUrl(url);
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
const websiteContent = await page.content();
await browser.close();
const regex = /\"https"\:\"(.*?)\?/;
const match = websiteContent.match(regex);
return match[1];
} catch (err) {
console.log("Could not resolve the browser instance => ", err);
}
}
async function downloadMp3(id, title) {
const url = `${FAIRY_TAIL_URL}/${id}`;
try {
const mp3Url = await getMp3Url(url);
request.get(mp3Url).pipe(fs.createWriteStream(`downloads/${title}.mp3`));
console.log(title + ' downloading...');
} catch (err) {
console.log(err);
}
}
//downloadMp3("https://ziv-zav.rtvslo.si/predvajaj/lahko-noc-otroci/174867111");
async function scrapeFairyTaleIds(url) {
try {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
const websiteContent = await page.content();
await browser.close();
const regex = /\d{9}/g;
const match = websiteContent.match(regex);
return [...new Set(match)];
} catch (err) {
console.log('Could not resolve the browser instance => ', err);
}
}
async function getFairyTales(last = 1) {
return axios
.get(FAIRY_TAIL_API_ENDPOINT, {
params: {
pageSize: last
}
})
.then(res => {
return res.data.response.recordings
})
.catch(error => {
console.error(error);
});
}
async function downloadLastFairyTale() {
const fairytales = await getFairyTales();
await downloadMp3(fairytales[0].id, fairytales[0].title);
}
// dowload last 10 fairytales
async function downloadLatest(last) {
const fairytales = await getFairyTales(last);
// For is waiting for promise to resolve
for (const fairy_tale of fairytales) {
// Start and wait for file to be downloaded
await downloadMp3(fairy_tale.id, fairy_tale.title);
// Wait for 10 seconds after file is downloaded before it starts downloading the new one
await new Promise(resolve => setTimeout(resolve, 10000));
}
}
const argv = yargs(hideBin(process.argv)).argv
if (typeof argv.latest !== 'undefined') {
downloadLastFairyTale();
} else {
var last = (typeof argv.last !== 'undefined') ? argv.last : 10;
last = last > 50 ? 50 : last
downloadLatest(last);
}
{
"name": "slovenian-fairy-tails-downloader",
"version": "1.0.0",
"bin": "index.js",
"dependencies": {
"axios": "^0.27.2",
"puppeteer": "^13.7.0",
"request": "^2.88.2",
"yargs": "^17.4.1"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment