Last active
July 19, 2020 22:21
-
-
Save ThePendulum/089927824d15eb0bc5c4211e3b1a3ab1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
let pageUrlSelector = '.name a[href*="/music/gameboy-gbs/"]'; | |
let downloadUrlSelector = '#mass_download a'; | |
let lastPageUrl = document.querySelector('.pagination-end a').href; | |
let lastPageNumber = Number(new URL(lastPageUrl).searchParams.get('page')); | |
let pages = Array.from({ length: lastPageNumber }, (value, index) => index + 1).slice(1); | |
let urlsPerPage = await Promise.all(pages.map(async (pageNumber) => { | |
let parser = new DOMParser(); | |
let res = await fetch(`https://www.zophar.net/music/gameboy-gbs?page=${pageNumber}/`); | |
let page = await res.text(); | |
console.log(`Please stand by, scraping page ${pageNumber}/${lastPageNumber}`); | |
let pageDoc = parser.parseFromString(page, 'text/html'); | |
return await Array.from(pageDoc.querySelectorAll(pageUrlSelector)).reduce(async (chain, el, index, array) => { | |
// change game path selector above if necessary | |
const accUrls = await chain; | |
const url = el.href; | |
const res = await fetch(url); | |
const body = await res.text(); | |
console.log(`Please stand by, scraping ${index + 1}/${array.length} from page ${pageNumber}: ${url}`); | |
const doc = parser.parseFromString(body, "text/html"); | |
// change download URL selector if necessary | |
const links = Array.from(doc.querySelectorAll(downloadUrlSelector), el => el.href); | |
return accUrls.concat(links); | |
}, []); | |
})); | |
console.log(urlsPerPage.flat().join('\n')); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment