Instantly share code, notes, and snippets.
Created
August 3, 2013 14:39
-
Star
(0)
0
You must be signed in to star a gist -
Fork
(0)
0
You must be signed in to fork a gist
-
Save shurain/6146690 to your computer and use it in GitHub Desktop.
Readability for Naver blog. Ripped off from brettterpstra.com/share/readability.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var readabilityVersion = "2"; | |
var readStyle='style-ebook'; | |
var readSize='size-medium'; | |
var readMargin='margin-wide'; | |
(function(){ | |
// removing all existing scripts so they don't cause conflicts... | |
var docscripts = document.getElementsByTagName('script'); | |
for (k=0;k < docscripts.length; k++) { | |
if (docscripts[k].src != null && ! docscripts[k].src.match(/readability|[Cc]lippability/)) { | |
docscripts[k].parentNode.removeChild(docscripts[k]); | |
} | |
} | |
// let's just load jQuery and get it over with | |
var gjs = document.createElement('SCRIPT'); | |
gjs.type='text/javascript'; | |
gjs.src = "http://ajax.googleapis.com/ajax/libs/jquery/1.3.2/jquery.min.js"; | |
document.getElementsByTagName('head')[0].appendChild(gjs); | |
gjs.onload = gjs.onreadystatechange = function(){ | |
$('script').each(function(){ | |
// jQuery gets scripts inside of conditional comments far more easily than I could figure out | |
if (! this.src.match(/readability|[Cc]lippability|jquery\.min\.js$/)) { $(this).remove(); } | |
}); | |
}; | |
var objOverlay = document.createElement("div"); | |
var objinnerDiv = document.createElement("div"); | |
objOverlay.id = "readOverlay"; | |
objinnerDiv.id = "readInner"; | |
// Apply user-selected styling: | |
document.body.className = readStyle; | |
objOverlay.className = readStyle; | |
objinnerDiv.className = readMargin + " " + readSize; | |
objinnerDiv.appendChild(grabArticle()); // Get the article and place it inside the inner Div | |
objOverlay.appendChild(objinnerDiv); // Insert the inner div into the overlay | |
// For totally hosed HTML, add body node that can't be found because of bad HTML or something. | |
if(document.body == null) | |
{ | |
body = document.createElement("body"); | |
document.body = body; | |
} | |
document.body.innerHTML = ""; | |
// Inserts the new content : | |
document.body.insertBefore(objOverlay, document.body.firstChild); | |
var o = document.body.firstChild; | |
return o.innerHTML; | |
})(); | |
function getElementsByClassName(classname, node) { | |
if(!node) node = document.getElementsByTagName("body")[0]; | |
var a = []; | |
var re = new RegExp('\\b' + classname + '\\b'); | |
var els = node.getElementsByTagName("*"); | |
for(var i=0,j=els.length; i<j; i++) { | |
if(re.test(els[i].className))a.push(els[i]); | |
} | |
return a; | |
} | |
function grabArticle() { | |
var topDiv = document.getElementById("postListBody"); | |
var articleContent = document.createElement("DIV"); | |
var articleTitle = document.createElement("H1"); | |
var articleFooter = document.createElement("DIV"); | |
// Replace all doubled-up <BR> tags with <P> tags, and remove fonts. | |
var pattern = new RegExp ("<br/?>[ \r\n\s]*<br/?>", "g"); | |
topDiv.innerHTML = topDiv.innerHTML.replace(pattern, "</p><p>").replace(/<\/?font[^>]*>/g, ''); | |
// Grab the title from the <title> tag and inject it as the title. | |
articleTitle.innerHTML = document.title; | |
articleContent.appendChild(articleTitle); | |
// REMOVES ALL STYLESHEETS ... | |
for (var k=0;k < document.styleSheets.length; k++) { | |
if (document.styleSheets[k].href != null && document.styleSheets[k].href.lastIndexOf("readability") == -1) { | |
document.styleSheets[k].disabled = true; | |
} | |
} | |
// | |
// var sh = getElementsByClassName("syntaxhighlighter"); | |
// for (var i=0;i < sh.length;i++) { | |
// var bar = getElementsByClassName("toolbar",sh[i]); | |
// if (bar.length > 0) { | |
// for (var bn=0;bn < bar.length;bn++) { | |
// bar[bn].parentNode.removeChild(bar[bn]); | |
// } | |
// } | |
// var numbers = getElementsByClassName("number",sh[i]); | |
// if (numbers.length > 0) { | |
// for (var num=0;num < numbers.length;num++) { | |
// numbers[num].parentNode.removeChild(numbers[num]); | |
// } | |
// } | |
// } | |
// | |
// var dp = getElementsByClassName("dp-highlighter"); | |
// for (var d=0;d < dp.length;d++) { | |
// dp[d].parentNode.removeChild(dp[d]); | |
// } | |
// | |
// var sth = getElementsByClassName("standardLighter"); | |
// for (d=0;d < sth.length;d++) { | |
// sth[d].parentNode.removeChild(sth[d]); | |
// } | |
// | |
// Remove all style tags in head (not doing this on IE) : | |
var styleTags = document.getElementsByTagName("style"); | |
for (var l=0;l < styleTags.length; l++) { | |
if (navigator.appName != "Microsoft Internet Explorer") | |
styleTags[l].textContent = ""; | |
} | |
topDiv = killCodeSpans(topDiv); // removes span tags | |
cleanStyles(topDiv); // Removes all style attributes | |
topDiv = killDivs(topDiv); // Goes in and removes DIV's that have more non <p> stuff than <p> stuff | |
topDiv = killBreaks(topDiv); // Removes any consecutive <br />'s into just one <br /> | |
// Cleans out junk from the topDiv just in case: | |
topDiv = clean(topDiv, "form"); | |
// topDiv = clean(topDiv, "object"); | |
topDiv = clean(topDiv, "table", 8); | |
topDiv = clean(topDiv, "h1"); | |
// topDiv = clean(topDiv, "h2"); | |
topDiv = clean(topDiv, "iframe"); | |
var garbage = topDiv.getElementsByClassName('blogapp_area'); | |
for (var i = 0;i < garbage.length;i++) { | |
garbage[i].parentNode.removeChild(garbage[i]); | |
} | |
garbage = topDiv.getElementsByClassName('post-btn'); | |
for (var i = 0;i < garbage.length;i++) { | |
garbage[i].parentNode.removeChild(garbage[i]); | |
} | |
// Add the footer and contents: | |
articleFooter.id = "readFooter"; | |
articleFooter.innerHTML = "\ | |
<a href='http://lab.arc90.com/experiments/readability'><img src='http://lab.arc90.com/experiments/readability/images/footer-readability.png'></a>\ | |
<a href='http://www.arc90.com'><img src='http://lab.arc90.com/experiments/readability/images/footer-arc90.png'></a>\ | |
<a href='http://www.twitter.com/arc90' class='footer-twitterLink'>Follow us on Twitter »</a>\ | |
<div class='footer-right' >\ | |
<span class='version'>Readability version " + readabilityVersion + "</span>\ | |
</div>\ | |
"; | |
articleContent.appendChild(topDiv); | |
// articleContent.appendChild(articleFooter); | |
document.onkeyup = docOnKeyup; | |
return articleContent; | |
} | |
function docOnKeyup(ev) | |
{ | |
var keyID = null; | |
if (navigator.appName == "Microsoft Internet Explorer") { | |
keyID = event.keyCode; | |
} else { | |
keyID = (window.event) ? event.keyCode : ev.keyCode; | |
} | |
var bgcolor,fgcolor,acolor; | |
switch (keyID) { | |
case 27: // escape | |
document.location.reload(true); | |
break; | |
case 37: // left arrow | |
bgcolor = "#222"; | |
fgcolor = "#F3EFCE"; | |
acolor = "#A19F89"; | |
break; | |
case 39: // right arrow | |
bgcolor = "#fff"; | |
fgcolor = "#333"; | |
acolor = "#276F78"; | |
break; | |
case 46: // delete | |
bgcolor = "#eee"; | |
fgcolor = "#333"; | |
acolor = "#blue"; | |
break; | |
} | |
body = document.getElementById("readOverlay"); | |
// body.className = body.className.replace('/\blightened\b/','') + " darkened"; | |
body.style.backgroundColor = bgcolor; | |
body.style.color = fgcolor; | |
var alinks = body.getElementsByTagName('a'); | |
for (var lc = 0;lc < alinks.length;lc++) { | |
alinks[lc].style.color = acolor; | |
} | |
} | |
// Get the inner text of a node - cross browser compatibly. | |
function getInnerText(e) { | |
if (navigator.appName == "Microsoft Internet Explorer") | |
return e.innerText; | |
else | |
return e.textContent; | |
} | |
// Get character count | |
function getCharCount ( e,s ) { | |
s = s || ","; | |
return getInnerText(e).split(s).length; | |
} | |
function cleanStyles( e ) { | |
e = e || document; | |
var cur = e.firstChild; | |
// If we had a bad node, there's not much we can do. | |
if(!e) | |
return; | |
// Remove any root styles, if we're able. | |
if(typeof e.removeAttribute == 'function') | |
e.removeAttribute('style'); | |
// Go until there are no more child nodes | |
while ( cur != null ) { | |
if ( cur.nodeType == 1 ) { | |
// Remove style attribute(s) : | |
cur.removeAttribute("style"); | |
cleanStyles( cur ); | |
} | |
cur = cur.nextSibling; | |
} | |
} | |
function killDivs ( e ) { | |
var divsList = e.getElementsByTagName( "div" ); | |
var curDivLength = divsList.length; | |
// Gather counts for other typical elements embedded within. | |
// Traverse backwards so we can remove nodes at the same time without effecting the traversal. | |
for (var i=curDivLength-1; i >= 0; i--) { | |
var p = divsList[i].getElementsByTagName("p").length; | |
var img = divsList[i].getElementsByTagName("img").length; | |
var li = divsList[i].getElementsByTagName("li").length; | |
var a = divsList[i].getElementsByTagName("a").length; | |
var embed = divsList[i].getElementsByTagName("embed").length; | |
var object = divsList[i].getElementsByTagName("object").length; | |
var pre = divsList[i].getElementsByTagName("pre").length; | |
var code = divsList[i].getElementsByTagName("code").length; | |
var divId = divsList[i].id; | |
var divClass = divsList[i].className; | |
var sphereit = divsList[i].innerHTML.match("<!-- sphereit") == null ? 0 : 1; | |
// If the number of commas is less than 10 (bad sign) ... | |
if ( getCharCount(divsList[i]) < 10 ) { | |
// And the number of non-paragraph elements is more than paragraphs | |
// or other ominous signs : | |
if (( img > p || li > p || a > p || p == 0 || divId.match("comment") != null || divClass.match("comment") != null || divId.match("share") != null || divClass.match("share") != null) && ( pre == 0 && code == 0 && embed == 0 && object == 0 && sphereit == 0 )) { | |
if (!p == 0 && img == 1) { divsList[i].parentNode.removeChild(divsList[i]); } | |
} | |
} | |
var stopwords = ['comment','share','footer','^ad']; | |
for (var sw = 0;sw<stopwords.length;sw++) { | |
regex = new RegExp(stopwords[sw]); | |
if (divId.match(regex) != null || divClass.match(regex) != null) { | |
console.log('matched '+stopwords[sw]); | |
divsList[i].parentNode.removeChild(divsList[i]); | |
} | |
} | |
// if (divId.match("comment") != null || divClass.match("comment") != null || divId.match("share") != null || divClass.match("share") != null || divClass.match("footer") != null || divId.match("footer") != null || divClass.match(/^ad/) != null || divId.match(/^ad/) != null) { | |
// divsList[i].parentNode.removeChild(divsList[i]); | |
// } | |
} | |
return e; | |
} | |
function killBreaks ( e ) { | |
e.innerHTML = e.innerHTML.replace(/(<br\s*\/?>(\s| ?)*){1,}/g,'<br />'); | |
return e; | |
} | |
function killCodeSpans ( e ) { | |
e.innerHTML = e.innerHTML.replace(/<\/?\s?span(?:[^>]+)?>/g,""); | |
return e; | |
} | |
function clean(e, tags, minWords) { | |
var targetList; | |
var y; | |
if (tags == "table") { | |
targetList = e.getElementsByTagName( tags ); | |
minWords = minWords || 1000000; | |
for (y=0; y < targetList.length; y++) { | |
// If the text content isn't laden with words, remove the child: | |
cells = targetList[y].getElementsByTagName('td').length; | |
if (cells < minWords) { | |
targetList[y].parentNode.removeChild(targetList[y]); | |
} | |
} | |
} else { | |
targetList = e.getElementsByTagName( tags ); | |
minWords = minWords || 1000000; | |
for (y=0; y < targetList.length; y++) { | |
// If the text content isn't laden with words, remove the child: | |
if (getCharCount(targetList[y], " ") < minWords && targetList[y].tagName != 'pre') { | |
targetList[y].parentNode.removeChild(targetList[y]); | |
} | |
} | |
} | |
return e; | |
} | |
function convert(e,tagId){ | |
var children,parent,newNode; | |
var elems = document.getElementsByTagName(tagId); | |
for (y=0; y < elems.length; y++) { | |
children = elems[y].childNodes; | |
parent = elems[y].parentNode; | |
newNode = document.createElement("span"); | |
newNode.setAttribute("style","font-weight:bold"); | |
for(var i=0;i<children.length;i++){ | |
newNode.appendChild(children[i]); | |
} | |
parent.replaceChild(newNode,elems[y]); | |
} | |
return e; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment