Created
August 30, 2012 09:59
-
-
Save wangshijun/3525238 to your computer and use it in GitHub Desktop.
javascript: detect next page
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* 检测下一页javascript层面的实现, 支持html5形式,时间复杂度n | |
* | |
* @author wangshijun <wangshijun2010@gmail.com> | |
* @package default | |
* @subpackage default | |
* @copyright (c) wangshijun2010@gmail.com | |
* @todo Implement this in a OOP style | |
*/ | |
var outNextPageArray = []; //html5 和 所有包含下一关键字的 | |
var nextRelArray = []; //html5下一页标识 | |
var nextPageArray = []; //存放检测到的下一页链接元素的数组 | |
var nextChapterArray = []; //下一章元素数组 | |
var nextAllArray = []; //是否包含“下一关键字”,并且文字长度在5个字以内 | |
var nextOtherArray = []; //'下一'关键字中除了nextPageArray,nextChapterArray之外的数组 | |
/** | |
* 检测下一页 | |
* return: 包含所有下一页链接(href)的数组(json形式) | |
*/ | |
function detectNextPage() { | |
//去除前导空格 | |
function leftTrim(str) { | |
var notValid = /^\s/; | |
while (notValid.test(str)) { | |
str = str.replace(notValid, ''); | |
} | |
return str; | |
} | |
//去除空格和变成小写 | |
function simplfyText(text) { | |
return text.toLowerCase().replace(/\s/gi, ''); | |
} | |
//链接文字是否包含下一页信息 | |
function isNextPageText(text) { | |
for (var m = 0, n = nextPageText.length; m < n; m++) { | |
if (text == nextPageText[m]) { | |
return true; | |
} | |
} | |
return false; | |
} | |
//链接文字是否包含下一章信息 | |
function isNextChapterText(text) { | |
for (var a = 0, b = nextChapterText.length; a < b; a++) { | |
if (text == nextChapterText[a]) { | |
return true; | |
} | |
} | |
return false; | |
} | |
//判断元素是否含有下一页标识 | |
function isHtml5Next(el) { | |
if (el.hasAttribute('rel')) { | |
if (el.getAttribute('rel').toUpperCase() == 'NEXT') { | |
return true; | |
} | |
} | |
return false; | |
} | |
//是否包含“下一关键字”,并且文字长度在5个字以内 | |
function isContainNext(text) { | |
if (text.length <= 5) { | |
for (var i = 0, l = nextAllText.length; i < l; i++) { | |
text = text.toLowerCase(); | |
if (text.indexOf(nextAllText[i]) >= 0) { | |
return true; | |
} | |
} | |
} | |
return false; | |
} | |
//将检测到的下一页链接添加到全局数组中 | |
function push2outArray(innerArray) { | |
for (var i = 0, l = innerArray.length; i < l; i++) { | |
outNextPageArray.push(innerArray[i]); | |
} | |
} | |
//下一页 | |
var nextPageText = [ | |
'next', 'next page', 'old', 'older', 'earlier', | |
'下页', '下頁', '下一页', '下一页>', '下一页>', | |
'[下页]', '下一頁', '下一頁>', '下一頁>', | |
'后一页', '后一頁', '翻下页', '翻下頁', | |
'后页', '后頁', '下翻', '下一个', '下一张', | |
'下一幅', '下一节', '下一条', '下图']; | |
//下一章 | |
var nextChapterText = ['下章', '下一章', '下篇', '下一篇', '后一章', '后一篇', , '下组图']; //, '>','›' | |
//包含下一,并且文字在5个字以内的 | |
var nextAllText = ['下一', 'next', '下页', '下頁', '后一', '翻下', '后页', '后頁', '下翻', '下图', '下章', '下篇']; | |
var anchor = null; | |
var text = ""; | |
var anchors = document.getElementsByTagName("a"); //所有A元素 | |
//遍历符合‘下一’的链接,然后根据优先级塞选 | |
for (var i = 0, l = anchors.length; i < l; i++) { | |
anchor = anchors[i]; | |
//html5形式的下一页 | |
if (isHtml5Next(anchor)) { | |
nextRelArray.push(anchor); | |
} | |
//通用方式的下一页 | |
text = anchor.textContent; | |
if (!text) { //连接中的内容是图片 | |
var image = anchor.getElementsByTagName('img')[0]; | |
if (image && image != 'undefined') { | |
text = image.alt || anchor.title || image.title; | |
} | |
} | |
text = simplfyText(text); | |
if (isContainNext(text)) { | |
nextAllArray.push(anchor); | |
} | |
} | |
for (var i = 0, l = nextAllArray.length; i < l; i++) { | |
var _anchor = nextAllArray[i]; | |
text = _anchor.textContent; | |
if (isNextPageText(text)) { //下一页之类 | |
nextPageArray.push(_anchor); | |
continue; | |
} else if (isNextChapterText(text)) { //下一章之类 | |
nextChapterArray.push(_anchor); | |
continue; | |
} else { //'xx下一xx'之类的 | |
nextOtherArray.push(_anchor); | |
continue; | |
} | |
} | |
push2outArray(nextRelArray); | |
push2outArray(nextAllArray); | |
return getAllNextHrefJSON(); | |
} | |
/** | |
* 将链接传递回应用层之后,应用层的回调方法 | |
*/ | |
function displayAllAnchor() { | |
var len = outNextPageArray.length; | |
if (len >= 0) { | |
for (var i = 0; i < len; i++) { | |
displayAnchorAt(i); | |
} | |
} | |
} | |
function getAllNextHref() { | |
var ret = new Array(); | |
for (var i = 0; i < outNextPageArray.length; i++) { | |
var temp = outNextPageArray[i]; | |
ret.push(temp.getAttribute('href')); | |
} | |
return ret; | |
} | |
function getAllNextHrefJSON() { | |
var arr = getAllNextHref(); | |
if (arr.length <= 0) return '[]'; | |
var ret = '['; | |
for (var i = 0; i < arr.length; i++) { | |
ret += '"'; | |
ret += arr[i]; | |
ret += '",'; | |
} | |
ret = ret.substring(0, ret.length - 1); | |
ret += ']'; | |
return ret; | |
} | |
function displayAnchorAt(loc) { | |
//处理类似jike.com的“下一页”的CSS,需important | |
outNextPageArray[loc].style.cssText = 'color:#388114 !important'; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment