Skip to content

Instantly share code, notes, and snippets.

@adityashedge
Created January 30, 2017 19:46
Show Gist options
  • Save adityashedge/8b43c129d4d4ca1d94b04b0bf9f3b64e to your computer and use it in GitHub Desktop.
Save adityashedge/8b43c129d4d4ca1d94b04b0bf9f3b64e to your computer and use it in GitHub Desktop.
JS crawler to get description and NPM package for all the 'nodeschool' workshops.
/*
"dependencies": {
"cheerio": "^0.22.0",
"request": "^2.79.0"
}
*/
const request = require('request');
const cheerio = require('cheerio');
const SITE_URL = 'https://nodeschool.io';
const SEPARATOR = '-'.repeat(100);
function Crawler(url) {
this.url = url;
this.start = function() {
request(this.url, function(error, response, body) {
if(error) {
console.log('Error:', error.message);
return;
}
const $ = cheerio.load(body);
const elements = $('.workshopper');
for(var i = 0; i < elements.length; i++) {
$ele = $(elements[i]);
if($ele.attr('id')){
var npmCodes = $ele.find('code');
for(var j = 0; j < npmCodes.length; j++) {
$npmCode = $(npmCodes[j]);
if($npmCode.parent().hasClass('workshopper')) {
console.log(SEPARATOR);
console.log($ele.find('p').text());
console.log($npmCode.text());
}
}
}
}
});
}
}
crawler = new Crawler(SITE_URL);
crawler.start();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment