Skip to content

Instantly share code, notes, and snippets.

@gasolin
Created April 24, 2014 08:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gasolin/11246500 to your computer and use it in GitHub Desktop.
Save gasolin/11246500 to your computer and use it in GitHub Desktop.
var request = require("request");
var cheerio = require("cheerio");
var jar = request.jar();
var headers = {
"user-agent": "Mozilla/5.0 (Mobile; rv:26.0) Gecko/26.0 Firefox/26.0"
};
var options = {
jar: jar,
followRedirect: true,
maxRedirects: 10,
headers: headers
};
var last_url;
/**
* Utility function that downloads a URL and invokes
* callback with the data.
*/
function download(url, callback) {
// fill headders
if (last_url) {
options.headers.Referer = last_url;
}
last_url = url;
options.url = url;
request.get(options, function(error, response, body) {
if (!error && response.statusCode == 200) {
callback(body);
}
});
}
// DO THE JOB
var baseUrl = "http://axe-level-4.herokuapp.com";
var levelUrl = "/lv4/";
var resultJson = [];
var column_title = ["town", "village", "name"];
var queue = [];
request({url: baseUrl}, function () {
landing();
});
function landing() {
download(baseUrl + levelUrl, function(data) {
// console.log(data);
var $ = cheerio.load(data);
if (queue.length === 0) {
$("a").each(function(i, e) {
var href = $(e).attr("href");
queue.push(href);
});
console.log(queue);
scheduler();
}
});
}
var count=0;
function scheduler() {
if (count <= queue.length) {
var path = queue[count];
count += 1;
parse(baseUrl + levelUrl + path);
}
if (count === queue.length + 1) {
console.log(JSON.stringify(resultJson));
}
}
function parse(path) {
download(path, function(data) {
console.log(path);
if (data) {
var $ = cheerio.load(data);
$("tr").each(function(i, e) {
if(i > 0) {
var columns = $(e).find("td");
// column holder
var person = {};
columns.each(function(j, e) {
person[column_title[j]] = $(e).text();
});
resultJson.push(person);
}
});
}
scheduler();
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment