Skip to content

Instantly share code, notes, and snippets.

@shmidtelson
Last active January 28, 2019 19:58
Show Gist options
  • Save shmidtelson/6ddaaa50f367051a254cc5b240297589 to your computer and use it in GitHub Desktop.
Save shmidtelson/6ddaaa50f367051a254cc5b240297589 to your computer and use it in GitHub Desktop.
Irvin parser
var cheerio = require('cheerio'),
needle = require('needle'),
tress = require('tress'),
resolve = require('url'),
fs = require('fs');
var targetURL = 'https://icobench.com/icos';
var results = [];
var get_data = function (url, callback) {
needle.get(url, function (err, res) {
if (err) throw err;
var $ = cheerio.load(res.body);
$('.image_box>a').each(function () {
results.push($(this).attr('href'));
});
$('.num>a').each(function () {
q.push(resolve(targetURL, $(this).attr('href')));
});
callback();
});
};
q = tress(get_data, 10);
q.drain = function () {
fs.writeFileSync('./icons.json', JSON.stringify(results, null, 4));
};
var i = 1;
q.push(targetURL);
while (i < 400) {
var new_url = targetURL + '?page=' + i;
console.log(new_url);
q.push(new_url);
i = i + 1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment