Skip to content

Instantly share code, notes, and snippets.

@shurain
Created August 3, 2013 14:39
Show Gist options
  • Save shurain/6146690 to your computer and use it in GitHub Desktop.
Save shurain/6146690 to your computer and use it in GitHub Desktop.
Readability for Naver blog. Ripped off from brettterpstra.com/share/readability.js
var readabilityVersion = "2";
var readStyle='style-ebook';
var readSize='size-medium';
var readMargin='margin-wide';
(function(){
// removing all existing scripts so they don't cause conflicts...
var docscripts = document.getElementsByTagName('script');
for (k=0;k < docscripts.length; k++) {
if (docscripts[k].src != null && ! docscripts[k].src.match(/readability|[Cc]lippability/)) {
docscripts[k].parentNode.removeChild(docscripts[k]);
}
}
// let's just load jQuery and get it over with
var gjs = document.createElement('SCRIPT');
gjs.type='text/javascript';
gjs.src = "http://ajax.googleapis.com/ajax/libs/jquery/1.3.2/jquery.min.js";
document.getElementsByTagName('head')[0].appendChild(gjs);
gjs.onload = gjs.onreadystatechange = function(){
$('script').each(function(){
// jQuery gets scripts inside of conditional comments far more easily than I could figure out
if (! this.src.match(/readability|[Cc]lippability|jquery\.min\.js$/)) { $(this).remove(); }
});
};
var objOverlay = document.createElement("div");
var objinnerDiv = document.createElement("div");
objOverlay.id = "readOverlay";
objinnerDiv.id = "readInner";
// Apply user-selected styling:
document.body.className = readStyle;
objOverlay.className = readStyle;
objinnerDiv.className = readMargin + " " + readSize;
objinnerDiv.appendChild(grabArticle()); // Get the article and place it inside the inner Div
objOverlay.appendChild(objinnerDiv); // Insert the inner div into the overlay
// For totally hosed HTML, add body node that can't be found because of bad HTML or something.
if(document.body == null)
{
body = document.createElement("body");
document.body = body;
}
document.body.innerHTML = "";
// Inserts the new content :
document.body.insertBefore(objOverlay, document.body.firstChild);
var o = document.body.firstChild;
return o.innerHTML;
})();
function getElementsByClassName(classname, node) {
if(!node) node = document.getElementsByTagName("body")[0];
var a = [];
var re = new RegExp('\\b' + classname + '\\b');
var els = node.getElementsByTagName("*");
for(var i=0,j=els.length; i<j; i++) {
if(re.test(els[i].className))a.push(els[i]);
}
return a;
}
function grabArticle() {
var topDiv = document.getElementById("postListBody");
var articleContent = document.createElement("DIV");
var articleTitle = document.createElement("H1");
var articleFooter = document.createElement("DIV");
// Replace all doubled-up <BR> tags with <P> tags, and remove fonts.
var pattern = new RegExp ("<br/?>[ \r\n\s]*<br/?>", "g");
topDiv.innerHTML = topDiv.innerHTML.replace(pattern, "</p><p>").replace(/<\/?font[^>]*>/g, '');
// Grab the title from the <title> tag and inject it as the title.
articleTitle.innerHTML = document.title;
articleContent.appendChild(articleTitle);
// REMOVES ALL STYLESHEETS ...
for (var k=0;k < document.styleSheets.length; k++) {
if (document.styleSheets[k].href != null && document.styleSheets[k].href.lastIndexOf("readability") == -1) {
document.styleSheets[k].disabled = true;
}
}
//
// var sh = getElementsByClassName("syntaxhighlighter");
// for (var i=0;i < sh.length;i++) {
// var bar = getElementsByClassName("toolbar",sh[i]);
// if (bar.length > 0) {
// for (var bn=0;bn < bar.length;bn++) {
// bar[bn].parentNode.removeChild(bar[bn]);
// }
// }
// var numbers = getElementsByClassName("number",sh[i]);
// if (numbers.length > 0) {
// for (var num=0;num < numbers.length;num++) {
// numbers[num].parentNode.removeChild(numbers[num]);
// }
// }
// }
//
// var dp = getElementsByClassName("dp-highlighter");
// for (var d=0;d < dp.length;d++) {
// dp[d].parentNode.removeChild(dp[d]);
// }
//
// var sth = getElementsByClassName("standardLighter");
// for (d=0;d < sth.length;d++) {
// sth[d].parentNode.removeChild(sth[d]);
// }
//
// Remove all style tags in head (not doing this on IE) :
var styleTags = document.getElementsByTagName("style");
for (var l=0;l < styleTags.length; l++) {
if (navigator.appName != "Microsoft Internet Explorer")
styleTags[l].textContent = "";
}
topDiv = killCodeSpans(topDiv); // removes span tags
cleanStyles(topDiv); // Removes all style attributes
topDiv = killDivs(topDiv); // Goes in and removes DIV's that have more non <p> stuff than <p> stuff
topDiv = killBreaks(topDiv); // Removes any consecutive <br />'s into just one <br />
// Cleans out junk from the topDiv just in case:
topDiv = clean(topDiv, "form");
// topDiv = clean(topDiv, "object");
topDiv = clean(topDiv, "table", 8);
topDiv = clean(topDiv, "h1");
// topDiv = clean(topDiv, "h2");
topDiv = clean(topDiv, "iframe");
var garbage = topDiv.getElementsByClassName('blogapp_area');
for (var i = 0;i < garbage.length;i++) {
garbage[i].parentNode.removeChild(garbage[i]);
}
garbage = topDiv.getElementsByClassName('post-btn');
for (var i = 0;i < garbage.length;i++) {
garbage[i].parentNode.removeChild(garbage[i]);
}
// Add the footer and contents:
articleFooter.id = "readFooter";
articleFooter.innerHTML = "\
<a href='http://lab.arc90.com/experiments/readability'><img src='http://lab.arc90.com/experiments/readability/images/footer-readability.png'></a>\
<a href='http://www.arc90.com'><img src='http://lab.arc90.com/experiments/readability/images/footer-arc90.png'></a>\
<a href='http://www.twitter.com/arc90' class='footer-twitterLink'>Follow us on Twitter &raquo;</a>\
<div class='footer-right' >\
<span class='version'>Readability version " + readabilityVersion + "</span>\
</div>\
";
articleContent.appendChild(topDiv);
// articleContent.appendChild(articleFooter);
document.onkeyup = docOnKeyup;
return articleContent;
}
function docOnKeyup(ev)
{
var keyID = null;
if (navigator.appName == "Microsoft Internet Explorer") {
keyID = event.keyCode;
} else {
keyID = (window.event) ? event.keyCode : ev.keyCode;
}
var bgcolor,fgcolor,acolor;
switch (keyID) {
case 27: // escape
document.location.reload(true);
break;
case 37: // left arrow
bgcolor = "#222";
fgcolor = "#F3EFCE";
acolor = "#A19F89";
break;
case 39: // right arrow
bgcolor = "#fff";
fgcolor = "#333";
acolor = "#276F78";
break;
case 46: // delete
bgcolor = "#eee";
fgcolor = "#333";
acolor = "#blue";
break;
}
body = document.getElementById("readOverlay");
// body.className = body.className.replace('/\blightened\b/','') + " darkened";
body.style.backgroundColor = bgcolor;
body.style.color = fgcolor;
var alinks = body.getElementsByTagName('a');
for (var lc = 0;lc < alinks.length;lc++) {
alinks[lc].style.color = acolor;
}
}
// Get the inner text of a node - cross browser compatibly.
function getInnerText(e) {
if (navigator.appName == "Microsoft Internet Explorer")
return e.innerText;
else
return e.textContent;
}
// Get character count
function getCharCount ( e,s ) {
s = s || ",";
return getInnerText(e).split(s).length;
}
function cleanStyles( e ) {
e = e || document;
var cur = e.firstChild;
// If we had a bad node, there's not much we can do.
if(!e)
return;
// Remove any root styles, if we're able.
if(typeof e.removeAttribute == 'function')
e.removeAttribute('style');
// Go until there are no more child nodes
while ( cur != null ) {
if ( cur.nodeType == 1 ) {
// Remove style attribute(s) :
cur.removeAttribute("style");
cleanStyles( cur );
}
cur = cur.nextSibling;
}
}
function killDivs ( e ) {
var divsList = e.getElementsByTagName( "div" );
var curDivLength = divsList.length;
// Gather counts for other typical elements embedded within.
// Traverse backwards so we can remove nodes at the same time without effecting the traversal.
for (var i=curDivLength-1; i >= 0; i--) {
var p = divsList[i].getElementsByTagName("p").length;
var img = divsList[i].getElementsByTagName("img").length;
var li = divsList[i].getElementsByTagName("li").length;
var a = divsList[i].getElementsByTagName("a").length;
var embed = divsList[i].getElementsByTagName("embed").length;
var object = divsList[i].getElementsByTagName("object").length;
var pre = divsList[i].getElementsByTagName("pre").length;
var code = divsList[i].getElementsByTagName("code").length;
var divId = divsList[i].id;
var divClass = divsList[i].className;
var sphereit = divsList[i].innerHTML.match("<!-- sphereit") == null ? 0 : 1;
// If the number of commas is less than 10 (bad sign) ...
if ( getCharCount(divsList[i]) < 10 ) {
// And the number of non-paragraph elements is more than paragraphs
// or other ominous signs :
if (( img > p || li > p || a > p || p == 0 || divId.match("comment") != null || divClass.match("comment") != null || divId.match("share") != null || divClass.match("share") != null) && ( pre == 0 && code == 0 && embed == 0 && object == 0 && sphereit == 0 )) {
if (!p == 0 && img == 1) { divsList[i].parentNode.removeChild(divsList[i]); }
}
}
var stopwords = ['comment','share','footer','^ad'];
for (var sw = 0;sw<stopwords.length;sw++) {
regex = new RegExp(stopwords[sw]);
if (divId.match(regex) != null || divClass.match(regex) != null) {
console.log('matched '+stopwords[sw]);
divsList[i].parentNode.removeChild(divsList[i]);
}
}
// if (divId.match("comment") != null || divClass.match("comment") != null || divId.match("share") != null || divClass.match("share") != null || divClass.match("footer") != null || divId.match("footer") != null || divClass.match(/^ad/) != null || divId.match(/^ad/) != null) {
// divsList[i].parentNode.removeChild(divsList[i]);
// }
}
return e;
}
function killBreaks ( e ) {
e.innerHTML = e.innerHTML.replace(/(<br\s*\/?>(\s|&nbsp;?)*){1,}/g,'<br />');
return e;
}
function killCodeSpans ( e ) {
e.innerHTML = e.innerHTML.replace(/<\/?\s?span(?:[^>]+)?>/g,"");
return e;
}
function clean(e, tags, minWords) {
var targetList;
var y;
if (tags == "table") {
targetList = e.getElementsByTagName( tags );
minWords = minWords || 1000000;
for (y=0; y < targetList.length; y++) {
// If the text content isn't laden with words, remove the child:
cells = targetList[y].getElementsByTagName('td').length;
if (cells < minWords) {
targetList[y].parentNode.removeChild(targetList[y]);
}
}
} else {
targetList = e.getElementsByTagName( tags );
minWords = minWords || 1000000;
for (y=0; y < targetList.length; y++) {
// If the text content isn't laden with words, remove the child:
if (getCharCount(targetList[y], " ") < minWords && targetList[y].tagName != 'pre') {
targetList[y].parentNode.removeChild(targetList[y]);
}
}
}
return e;
}
function convert(e,tagId){
var children,parent,newNode;
var elems = document.getElementsByTagName(tagId);
for (y=0; y < elems.length; y++) {
children = elems[y].childNodes;
parent = elems[y].parentNode;
newNode = document.createElement("span");
newNode.setAttribute("style","font-weight:bold");
for(var i=0;i<children.length;i++){
newNode.appendChild(children[i]);
}
parent.replaceChild(newNode,elems[y]);
}
return e;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment