Skip to content

Instantly share code, notes, and snippets.

@myjian
Last active August 29, 2015 14:01
Show Gist options
  • Save myjian/290f1531dd873b362e5a to your computer and use it in GitHub Desktop.
Save myjian/290f1531dd873b362e5a to your computer and use it in GitHub Desktop.
CCSP-HW4
var request = require('request');
var cheerio = require('cheerio');
var fs = require('fs');
var appledailyURL = [
'http://www.appledaily.com.tw/realtimenews/section/new/1',
'http://www.appledaily.com.tw/realtimenews/section/new/2',
'http://www.appledaily.com.tw/realtimenews/section/new/3',
'http://www.appledaily.com.tw/realtimenews/section/new/4',
'http://www.appledaily.com.tw/realtimenews/section/new/5'];
var categorizedNewsFile = 'appledaily.json';
var news = [];
var categories = [];
var retrieved = 0;
function responseHandler(err, response, body){
retrieved++;
if (!err && response.statusCode === 200){
var $ = cheerio.load(body);
$('li.rtddt').each(function(i, element){
var category = $(this).find('h2').text();
var url = $(this).children('a').attr('href');
var date = $(this).parent().prev().children('time').text();
var time = $(this).find('time').text();
var title = $(this).find('font').text();
var hasVideo = $(this).hasClass('hsv');
var newsItem = {category: category, title: title, date: date, url: url, time: time, video: hasVideo};
news.push(newsItem);
if (categories.indexOf(category) === -1){
categories.push(category);
}
});
}
setTimeout(function(){sendRequest(retrieved)}, 100);
}
function sendRequest(i){
if (i === appledailyURL.length){
finish();
}
else {
request(appledailyURL[i], responseHandler);
}
}
sendRequest(0);
function finish(){
var categorizedNews = [];
categories.forEach(function(element, index, array){
this.push({category: element, news: []});
}, categorizedNews);
news.forEach(function(element, index, array){
var categoryIdx = categories.indexOf(element.category);
this[categoryIdx].news.push({title: element.title, url: element.url, time: element.time, video: element.video});
}, categorizedNews);
var mostNewsNumber = 0;
var mostNewsCategory = 0;
categorizedNews.forEach(function(element, index, array){
element.news_count = element.news.length;
if (element.news_count > mostNewsNumber){
mostNewsCategory = index;
mostNewsNumber = element.news_count;
}
});
console.log(new Date());
console.log('數量最多的分類為 [' + categories[mostNewsCategory] + '],共有 ' + mostNewsNumber + ' 則新聞');
fs.writeFileSync(categorizedNewsFile, JSON.stringify(categorizedNews, null, 4));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment