Skip to content

Instantly share code, notes, and snippets.

@jkantr
Last active January 2, 2021 02:58
Show Gist options
  • Save jkantr/dc65ead838bd38129bc98d7609ad653b to your computer and use it in GitHub Desktop.
Save jkantr/dc65ead838bd38129bc98d7609ad653b to your computer and use it in GitHub Desktop.
const Promise = require('bluebird');
const fetch = require('node-fetch');
const cheerio = require('cheerio');
const getAllUrls = () => {
return fetch('http://regioni.usyouthsoccer.org')
.then(res => res.text())
.then((html) => {
const $ = cheerio.load(html);
return $('#associationscarousel').find('> li > a').toArray().map(link => $(link).attr('href'))
})
}
const scrapeAllUrls = (urls) => {
return Promise.map(urls, (url) => {
return fetch(url).then(res => res.text())
.then((html) => {
const $ = cheerio.load(html);
return $('#CT_PageHeading_pnlPageHeading').text().replace(/(\r\n|\n|\r|\t)/gm,"").trim();
})
}, { concurrency: 6 })
}
getAllUrls().then(scrapeAllUrls).then(console.log)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment