Skip to content

Instantly share code, notes, and snippets.

@billynyh
Created February 20, 2013 12:46
Show Gist options
  • Save billynyh/4995295 to your computer and use it in GitHub Desktop.
Save billynyh/4995295 to your computer and use it in GitHub Desktop.
var $ = require('jquery');
var http = require('http');
var Sync = require('sync');
var URIjs = require('./URI.js'); // http://medialize.github.com/URI.js/
var fs = require('fs');
var DEBUG = false;
var INFO = true;
var Log = { };
Log.d = function (str){ if (DEBUG){ console.log(str); } }
Log.i = function (str){ if (INFO){ console.log(str); } }
Log.e = function (str){ console.log(str); }
function get_page(url, callback){
Log.d("get_page " + url);
var html = "";
http.get(url, function(res){
res.on('data', function(data) {
html += data;
}).on('end', function() {
callback(null, html);
});
});
}
function crawl_notice_home(){
Log.d("crawl_notice_home");
var url = "http://www.scout.org.hk/chi/notice/";
var html = get_page.sync(null, url);
var menu = [];
$(html).find(".noticeshortcut a").each(function(){
var a = $(this);
var uri = new URIjs(a.attr("href"));
menu.push({
'title':a.text(),
'url':uri.absoluteTo(url).toString()+"/",
'label':a.attr("href")
});
});
// next
Scout.pages.push({
"notices" : parse_page(html, url),
"label" : "_root",
"title" : "通告"
});
Scout.menu = menu;
return;
for (var i=0, n=menu.length; i<n; i++){
crawl_page(menu[i].label, menu[i].title, menu[i].url);
}
}
function crawl_page(label, title, url){
Log.d("crawl_page " + url);
var html = get_page.sync(null, url);
Scout.pages.push({
"notices" : parse_page(html, url),
"label" : label,
"title" : title
});
}
function parse_page(html, base_url){
Log.d("parse_page " + base_url);
var doc = $(html);
var page_items = [];
doc.find(".noticecontent tr").each(function(){
var tr = $(this);
var a = tr.find("a");
if (a.length > 0){
// not header
var uri = new URIjs(a.attr("href"));
var tds = tr.find("td");
var obj = {
'date' : $(tds[0]).text(),
'title' : $(tds[1]).text(),
'url' : uri.absoluteTo(base_url).toString()
};
page_items.push(obj);
}
});
return page_items;
}
function writeFileCallback(err){
if (err) Log.e(err);
else Log.i("done");
}
var Scout = {};
Scout.pages = [];
Scout.menu = [];
Sync(function(){
var encoding = "utf8";
var fname = "./scout_hk.json";
Log.i("start crawl");
crawl_notice_home();
Log.i("write file " + fname);
fs.writeFile(fname, JSON.stringify(Scout, null, 4), encoding, writeFileCallback);
/*
fname = "./scout_menu.json";
Log.i("write file " + fname);
fs.writeFile(fname, unescape(encodeURIComponent(JSON.stringify(Scout.menu))), encoding, writeFileCallback);
*/
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment