Skip to content

Instantly share code, notes, and snippets.

@iampaul83
Created May 24, 2018 09:51
Show Gist options
  • Save iampaul83/7d834c1de4a2d7332726ef10caf3deaf to your computer and use it in GitHub Desktop.
Save iampaul83/7d834c1de4a2d7332726ef10caf3deaf to your computer and use it in GitHub Desktop.
puppeteer amazon
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://www.amazon.com/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords=s9+plus&rh=i%3Aaps%2Ck%3As9+plus');
// await page.screenshot({path: 'example.png'});
// await page.pdf({path: 'page1.pdf', format: 'A4'});
console.log(await parse(page))
page.click('#pagnNextLink');
await page.waitForNavigation();
// await page.pdf({path: 'page2.pdf', format: 'A4'});
console.log(await parse(page))
await browser.close();
})();
function parse(page) {
return page.evaluate(() => {
const elements = document.querySelectorAll('.s-result-item:not(.aok-hidden)');
const results = []
for (let i = 0; i < elements.length; i++) {
const el = elements[i];
let $title = el.querySelector('h2')
let $price = el.querySelector('.a-fixed-left-grid-col.a-col-right > div:last-child > div.a-column.a-span7 span.a-offscreen')
if (!$price) {
$price = el.querySelector('div > div > div > div.a-fixed-left-grid-col.a-col-right > div:last-child > div.a-column.a-span7 a > span:nth-child(2)')
}
if (!$title || !$price) {
results.push({
error: el.outerHTML
})
continue
}
results.push({
title: $title.textContent,
price: $price.textContent
});
}
return results;
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment