Skip to content

Instantly share code, notes, and snippets.

@kami4ka
Last active August 30, 2021 08:21
Show Gist options
  • Save kami4ka/038dc5bbb0a5846ebe6355f1cbe95164 to your computer and use it in GitHub Desktop.
Save kami4ka/038dc5bbb0a5846ebe6355f1cbe95164 to your computer and use it in GitHub Desktop.
Scrape Etsy products information using ScrapingAnt web scraping API
/**
* Get data from Etsy search
*
* ScrapingAnt allows you to scrape for free using proxy servers
*
* npm install @scrapingant/scrapingant-client
* npm install cheerio
**/
const ScrapingAntClient = require('@scrapingant/scrapingant-client');
const cheerio = require('cheerio');
const client = new ScrapingAntClient({ apiKey: "<SCRAPINGANT_API_KEY>" });
const searchTerm = 'cookies cutter';
const maxPages = 10;
(async () => {
const results = [];
// We need to get the cookies first to prevent the block
const mainPageResponse = await client.scrape('https://etsy.com', { proxy_country: 'US' });
for (let currentPage = 1; currentPage <= maxPages; currentPage++) {
const response = await client.scrape(getSearchString(searchTerm, currentPage), { proxy_country: 'US', cookies: mainPageResponse.cookies });
results.push(...await getDataFromPage(response.content));
}
// scraped results
console.log(results);
})();
function getSearchString(term, page) {
const pagePart = page ? `&page=${page}&ref=pagination` : '';
return `https://www.etsy.com/search?q=${term}${pagePart}`;
}
function getDataFromPage(html) {
const pageResults = [];
const $ = cheerio.load(html);
const cards = $('div.v2-listing-card');
for (let i = 0; i < cards.length; i++) {
const result = {};
result.title = $(cards[i]).find('h3.v2-listing-card__title').text().replace(/\s\s+/g, '').replace(/\n/g, '');
result.link = $(cards[i]).find('a.listing-link').attr('href');
result.isDiscounted = $(cards[i]).find('p.search-collage-promotion-price').length > 0;
result.price = result.isDiscounted
? $(cards[i]).find('p.wt-text-title-01 > span > span.currency-value').text()
: $(cards[i]).find('p.wt-text-title-01 > span.currency-value').text();
result.currency = $(cards[i]).find('span.currency-symbol').first().text();
pageResults[i] = result;
}
return pageResults;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment