Skip to content

Instantly share code, notes, and snippets.

@gasolin
Created April 24, 2014 02:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gasolin/11239472 to your computer and use it in GitHub Desktop.
Save gasolin/11239472 to your computer and use it in GitHub Desktop.
var request = require("request");
var cheerio = require("cheerio");
var jar = request.jar();
var request = request.defaults({
jar: jar,
followRedirect: true,
maxRedirects: 10
});
/**
* Utility function that downloads a URL and invokes
* callback with the data.
*/
function download(url, callback) {
request.get(url, function(error, response, body) {
if (!error && response.statusCode == 200) {
callback(body);
}
});
}
// DO THE JOB
var baseUrl = "http://axe-level-1.herokuapp.com";
var levelUrl = "/lv3/";
var urlNext = "?page=next";
var resultJson = [];
var column_title = ["town", "village", "name"];
var queue = [];
var TOTAL = 76;
request({url: baseUrl}, function () {
for (var i = TOTAL; i >= 0; i--) {
queue.push(i);
}
scheduler();
});
function scheduler() {
var count = queue.pop();
if (count < TOTAL) { console.log(count);
if (count === 0) {
parse(baseUrl + levelUrl);
} else {
parse(baseUrl + levelUrl + urlNext);
}
} else {
console.log(JSON.stringify(resultJson));
}
}
function parse(path) {
download(path, function(data) {
if (data) {
// console.log(data);
var $ = cheerio.load(data);
$("tr").each(function(i, e) {
if(i > 0) {
var columns = $(e).find("td");
// column holder
var person = {};
columns.each(function(j, e) {
person[column_title[j]] = $(e).text();
});
resultJson.push(person);
}
});
}
scheduler();
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment