Skip to content

Instantly share code, notes, and snippets.

@melbourne2991
Created March 18, 2014 06:36
Show Gist options
  • Save melbourne2991/9614672 to your computer and use it in GitHub Desktop.
Save melbourne2991/9614672 to your computer and use it in GitHub Desktop.
var request = require('request');
var cheerio = require('cheerio');
var fs = require('fs');
var readline = require('readline');
var stream = require('stream');
if(process.argv[2] === '-links') {
var url = 'https://example.com/just-sold?price_min=0';
var count = 1;
var is_next;
getNext(url);
console.log();
function getNext(url) {
console.log('called');
request(url, function(err, resp, body) {
if (err)
throw err;
$ = cheerio.load(body);
$('#listings > li').each(function() {
var data = $('.largetitle a', this).attr('href');
if (data)
fs.appendFile('links.txt', data + '\n');
});
if($('.pagination .next').length > 0 && count < 3) {
count++;
url = 'https://example.com/just-sold?page=' + count +'&price_min=0'
getNext(url);
} else {
console.log('scrape complete');
}
});
}
} else if(process.argv[2] === '-pages') {
var count = 0;
var instream = fs.createReadStream('links.txt');
var outstream = new stream;
var rl = readline.createInterface(instream, outstream);
rl.on('line', function(line) {
var url = 'https://example.com' + line;
request(url, function(err, resp, body) {
if (err)
throw err;
$ = cheerio.load(body);
// CSV format -- revenue,profit,date,niche,site type,monetization,domain registration date,implementation
});
console.log(url);
});
} else {
console.log('Please specify valid option');
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment