Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
javascript: detect next page
/**
* 检测下一页javascript层面的实现, 支持html5形式,时间复杂度n
*
* @author wangshijun <wangshijun2010@gmail.com>
* @package default
* @subpackage default
* @copyright (c) wangshijun2010@gmail.com
* @todo Implement this in a OOP style
*/
var outNextPageArray = []; //html5 和 所有包含下一关键字的
var nextRelArray = []; //html5下一页标识
var nextPageArray = []; //存放检测到的下一页链接元素的数组
var nextChapterArray = []; //下一章元素数组
var nextAllArray = []; //是否包含“下一关键字”,并且文字长度在5个字以内
var nextOtherArray = []; //'下一'关键字中除了nextPageArray,nextChapterArray之外的数组
/**
* 检测下一页
* return: 包含所有下一页链接(href)的数组(json形式)
*/
function detectNextPage() {
//去除前导空格
function leftTrim(str) {
var notValid = /^\s/;
while (notValid.test(str)) {
str = str.replace(notValid, '');
}
return str;
}
//去除空格和变成小写
function simplfyText(text) {
return text.toLowerCase().replace(/\s/gi, '');
}
//链接文字是否包含下一页信息
function isNextPageText(text) {
for (var m = 0, n = nextPageText.length; m < n; m++) {
if (text == nextPageText[m]) {
return true;
}
}
return false;
}
//链接文字是否包含下一章信息
function isNextChapterText(text) {
for (var a = 0, b = nextChapterText.length; a < b; a++) {
if (text == nextChapterText[a]) {
return true;
}
}
return false;
}
//判断元素是否含有下一页标识
function isHtml5Next(el) {
if (el.hasAttribute('rel')) {
if (el.getAttribute('rel').toUpperCase() == 'NEXT') {
return true;
}
}
return false;
}
//是否包含“下一关键字”,并且文字长度在5个字以内
function isContainNext(text) {
if (text.length <= 5) {
for (var i = 0, l = nextAllText.length; i < l; i++) {
text = text.toLowerCase();
if (text.indexOf(nextAllText[i]) >= 0) {
return true;
}
}
}
return false;
}
//将检测到的下一页链接添加到全局数组中
function push2outArray(innerArray) {
for (var i = 0, l = innerArray.length; i < l; i++) {
outNextPageArray.push(innerArray[i]);
}
}
//下一页
var nextPageText = [
'next', 'next page', 'old', 'older', 'earlier',
'下页', '下頁', '下一页', '下一页>', '下一页&gt;',
'[下页]', '下一頁', '下一頁>', '下一頁&gt;',
'后一页', '后一頁', '翻下页', '翻下頁',
'后页', '后頁', '下翻', '下一个', '下一张',
'下一幅', '下一节', '下一条', '下图'];
//下一章
var nextChapterText = ['下章', '下一章', '下篇', '下一篇', '后一章', '后一篇', , '下组图']; //, '>','›'
//包含下一,并且文字在5个字以内的
var nextAllText = ['下一', 'next', '下页', '下頁', '后一', '翻下', '后页', '后頁', '下翻', '下图', '下章', '下篇'];
var anchor = null;
var text = "";
var anchors = document.getElementsByTagName("a"); //所有A元素
//遍历符合‘下一’的链接,然后根据优先级塞选
for (var i = 0, l = anchors.length; i < l; i++) {
anchor = anchors[i];
//html5形式的下一页
if (isHtml5Next(anchor)) {
nextRelArray.push(anchor);
}
//通用方式的下一页
text = anchor.textContent;
if (!text) { //连接中的内容是图片
var image = anchor.getElementsByTagName('img')[0];
if (image && image != 'undefined') {
text = image.alt || anchor.title || image.title;
}
}
text = simplfyText(text);
if (isContainNext(text)) {
nextAllArray.push(anchor);
}
}
for (var i = 0, l = nextAllArray.length; i < l; i++) {
var _anchor = nextAllArray[i];
text = _anchor.textContent;
if (isNextPageText(text)) { //下一页之类
nextPageArray.push(_anchor);
continue;
} else if (isNextChapterText(text)) { //下一章之类
nextChapterArray.push(_anchor);
continue;
} else { //'xx下一xx'之类的
nextOtherArray.push(_anchor);
continue;
}
}
push2outArray(nextRelArray);
push2outArray(nextAllArray);
return getAllNextHrefJSON();
}
/**
* 将链接传递回应用层之后,应用层的回调方法
*/
function displayAllAnchor() {
var len = outNextPageArray.length;
if (len >= 0) {
for (var i = 0; i < len; i++) {
displayAnchorAt(i);
}
}
}
function getAllNextHref() {
var ret = new Array();
for (var i = 0; i < outNextPageArray.length; i++) {
var temp = outNextPageArray[i];
ret.push(temp.getAttribute('href'));
}
return ret;
}
function getAllNextHrefJSON() {
var arr = getAllNextHref();
if (arr.length <= 0) return '[]';
var ret = '[';
for (var i = 0; i < arr.length; i++) {
ret += '"';
ret += arr[i];
ret += '",';
}
ret = ret.substring(0, ret.length - 1);
ret += ']';
return ret;
}
function displayAnchorAt(loc) {
//处理类似jike.com的“下一页”的CSS,需important
outNextPageArray[loc].style.cssText = 'color:#388114 !important';
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.