Skip to content

Instantly share code, notes, and snippets.

@DoubleShift
Created October 23, 2015 12:41
Show Gist options
  • Save DoubleShift/1e10253e0d32b18f2739 to your computer and use it in GitHub Desktop.
Save DoubleShift/1e10253e0d32b18f2739 to your computer and use it in GitHub Desktop.
/**
* Copyright (c) 2014 Meizu bigertech, All rights reserved.
* http://www.bigertech.com/
* @author liuxing
* @date 15/3/16
* @description
*
*/
var Promise = require('bluebird'),
FeedParser = require('feedparser'),
_ = require('lodash'),
request = require('request'),
read = require('node-readability'),
iconv = require('iconv-lite'),
Iconv = require('iconv').Iconv,
BufferHelper = require('bufferhelper'),
postOptions = ['title', 'description', 'summary', 'date', 'link',
'guid', 'author', 'comments', 'origlink', 'image', 'source', 'categories', 'enclosures'],
siteInfoOption = ['title', 'description', 'date', 'link', 'xmlurl', 'author', 'favicon', 'copyright', 'generator', 'image'];
/**
* get all post info ,by rss url
* @param url
* @param options
* @returns {Promise}
*/
function fetchRss(url, options) {
options = options || postOptions;
return new Promise(function (resolve, reject) {
var posts;
var req = request(url, {timeout: 10000, pool: false});
req.setMaxListeners(50);
req.setHeader('user-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36')
req.setHeader('accept', 'text/html,application/xhtml+xml');
var feedparser = new FeedParser();
req.on('error', reject);
req.on('response', function (res) {
var stream = this;
posts = [];
if (res.statusCode !== 200) {
return this.emit('error', new Error('Bad status code'));
}
// ==========================start============================
var bufferHelper = new BufferHelper();
res.on('data', function (chunk) {
bufferHelper.concat(chunk);
});
res.on('end',function(){
var result = iconv.decode(bufferHelper.toBuffer(),'gb2312');
this.emit('data', result);
//return result;
//var charset = getParams(res.headers['content-type'] || '').charset;
//var result = maybeTranslate(this, 'gb2312');
// var iconv = new Iconv('gb2312', 'utf-8');
// stream1 = res.pipe(iconv).pipe(feedparser);
});
stream.pipe(feedparser);
// ==========================end============================
});
req.on('end',function(){
// var stream = this;
// stream.pipe(feedparser);
})
feedparser.on('error', reject);
feedparser.on('end', function (err) {
if (err) {
reject(err);
}
resolve(posts);
});
feedparser.on('readable', function () {
while (post = this.read()) {
var post = _.pick(post, options);
posts.push(post);
}
});
});
}
/**
* get website info
* @param url
* @param options
* @returns {Promise}
*/
function siteInfo(url, options) {
options = options || siteInfoOption;
return new Promise(function (resolve, reject) {
var rss;
var req = request(url, {timeout: 10000, pool: false});
req.setMaxListeners(50);
// Some feeds do not response without user-agent and accept headers.
req.setHeader('user-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36')
req.setHeader('accept', 'text/html,application/xhtml+xml');
var feedparser = new FeedParser();
req.on('error', reject);
req.on('response', function (res) {
var stream = this;
if (res.statusCode !== 200) {
return this.emit('error', new Error('Bad status code'));
}
//charset = getParams(res.headers['content-type'] || '').charset;
stream.pipe(feedparser);
});
feedparser.on('error', reject);
feedparser.on('end', function (err) {
if (err) {
reject(err);
}
resolve(rss);
});
feedparser.on('readable', function () {
var post;
if (post = this.read()) {
rss = _.pick(post.meta, options);
rss.feedurl = url; //rss 的url
resolve(rss);
}
});
});
}
/**
* get all post's body content by post list
* @param posts
* @returns {*|Promise}
*/
function fetchAllContent(posts) {
return Promise.reduce(posts, function (total, post) {
return getCleanBody(post.link).then(function (article) {
post.content = article.content ? article.content : post.description || post.summary;
return post;
});
}, []).then(function (total) {
return posts;
});
}
/**
* get all content and rss post by rssUrl
* @param url
* @returns {*}
*/
function getAllByUrl(url) {
return fetchRss(url).then(function (posts) {
return fetchAllContent(posts);
});
}
/**
* get body content by link
* @param link
* @returns {Promise}
*/
function getCleanBody(link) {
return new Promise(function (resolve, reject) {
read(link, function (err, article, meta) {
if (err) {
reject(err);
}
resolve(article);
});
});
}
module.exports = {
fetchRss: fetchRss,
siteInfo: siteInfo,
fetchAllContent: fetchAllContent,
getCleanBody: getCleanBody,
getAllByUrl: getAllByUrl
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment