Skip to content

Instantly share code, notes, and snippets.

@smileart
Created July 11, 2023 12:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save smileart/9de3a68287e9d08da7b38f6c1df2d0a5 to your computer and use it in GitHub Desktop.
Save smileart/9de3a68287e9d08da7b38f6c1df2d0a5 to your computer and use it in GitHub Desktop.
A script to scrape Digital Content Lists from Amazon
// NOTE: It's supposed to be used in DevTools console only, depends on `copy`, `$`, `$$`, etc.
// NOTE: these backups needed cause after a new page loaded $, $$, etc. are undefined for the script running 🤷‍♂️
const selector = $
const selectorAll = $$
const clipboardCopy = copy
let allBooksList = []
function scrapBooks() {
const titles = selectorAll('.digital_entity_title').map((b)=>{ return b.innerText })
const authors = selectorAll('div[id^="content-author"]').map((a)=>{ return a.innerText })
// NOTE: for Audible use this instead...
// const authors = selectorAll('.digital_entity_details > .information_row').map((a)=>{ return a.innerText }).filter((e) => { return e != '' })
let books = []
for (const [i, title] of titles.entries()) {
console.log(`'${title}' by ${authors[i]}`)
books.push(`- '${title}' by ${authors[i]}`)
}
return books
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms))
}
let maxPage = Number($$('a[id^="page-"]').pop().textContent)
let currentPage = 0
while (currentPage < maxPage) {
currentPage += 1
selector(`#page-${currentPage}`).click()
await sleep(2000)
console.log(`Handling page #${currentPage}`)
allBooksList.push(...scrapBooks())
console.log(`Scrapped ${allBooksList.length} books so far...`)
}
clipboardCopy(allBooksList.join("\n"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment