Skip to content

Instantly share code, notes, and snippets.

@whoeverest
Created April 14, 2015 21:02
Show Gist options
  • Save whoeverest/2e1d3771b4d489cd932c to your computer and use it in GitHub Desktop.
Save whoeverest/2e1d3771b4d489cd932c to your computer and use it in GitHub Desktop.
var headers = {
'Host': 'mjs.bg',
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate',
'DNT': '1',
'Referer': 'http://mjs.bg/emsg/index.php',
//'Cookie': '__atuvc=1%7C50; __atuvs=548a27de1a432a1d000; MPBG_SESSION=ar7vv40okm53qu96cdk63e27m6',
'Cookie' : '__atuvc=5%7C10%2C4%7C11%2C3%7C12%2C6%7C13%2C3%7C14; __atuvs=55292a2c65ac7d41000; MPBG_SESSION=lugfgvglauqf9ncigln1jl5u71',
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0'
};
// npm install bluebird cheerio request range
// node crawly.js
var Promise = require('bluebird');
var cheerio = require('cheerio');
var request = Promise.promisifyAll(require('request'));
var range = require('range');
var texts = range(1, 10000).map(function(n) {
return request.postAsync({
url: 'http://mjs.bg/emsg/index.php',
headers: headers,
form: {action:'search', year: 2014, number:n}
}).spread(function(resp, body) {
var $ = cheerio.load(body);
console.error("Downloaded case", n);
console.error($('#container .case').text());
return {n: n, text: $('#container .case').text()};
});
});
Promise.all(texts).then(console.log);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment