Skip to content

Instantly share code, notes, and snippets.

@kami4ka
Last active December 21, 2021 18:08
Show Gist options
  • Save kami4ka/9a7d38d4e045f742949288acc8867a69 to your computer and use it in GitHub Desktop.
Save kami4ka/9a7d38d4e045f742949288acc8867a69 to your computer and use it in GitHub Desktop.
Amazon batch scraper with error handling. ScrapingAnt API used to get data.
/**
* Amazon Batch Scraper - create file with a list of keywords and all products would be scraped in one CSV file
*
* Installation instructions:
* npm install "@scrapingant/amazon-proxy-scraper"
* npm install json2csv
*
*/
const ProductsScraper = require("@scrapingant/amazon-proxy-scraper");
const readline = require('readline');
const fs = require('fs');
const fsPromise = require('fs').promises;
const Json2csvParser = require('json2csv').Parser;
const KEYWORDS_FILENAME = '12_14_21.csv';
const MAX_PRODUCTS_FOR_KEYWORD = 10;
const SCRAPINGANT_API_KEY = '<YOUR SCRAPINGANT API KEY>';
scrapeFromCSV().then(() => console.log(`Results can be found at: results_${KEYWORDS_FILENAME}`), console.error)
async function scrapeFromCSV() {
const fileStream = fs.createReadStream(KEYWORDS_FILENAME);
const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity
});
const allProducts = [];
for await (const keyword of rl) {
console.log(`Scraping keyword: ${keyword}`);
const scraper = new ProductsScraper({
"apiKey": SCRAPINGANT_API_KEY,
"keyword": keyword,
"number": MAX_PRODUCTS_FOR_KEYWORD,
});
try {
const products = await scraper.startScraping();
allProducts.push(...products);
} catch (e) {
console.error(e);
}
}
await writeDataToCSV(`results_${KEYWORDS_FILENAME}`, allProducts);
}
async function writeDataToCSV(filename, productsList) {
const productsParser = new Json2csvParser({
fields: ['title', 'price', 'savings', 'rating', 'reviews-count', 'score', 'url', 'is-sponsored', 'is-amazon-choice', 'is-discounted', 'before-discount', 'amazon-id', 'thumbnail', 'high-res-image', 'short-description', 'full-description'],
});
return fsPromise.writeFile(filename, productsParser.parse(productsList));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment