Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
window.readable =function(loops) {
function isDescendant(parent, child) {
var node = child.parentNode;
while (node != null) {
if (node == parent) {
return true;
}
node = node.parentNode;
}
return false;
}
function extractBiggestNode(elements) {
var maxLength = 0;
var currentElement = null;
for(var i=0; i<elements.length; i++) {
var element = elements[i];
text = element.textContent.replace(' ', '');
console.log(text.length, element.textContent.length);
if(text.length > maxLength) {
maxLength = text.length;
currentElement = element;
}
}
for(var i=0; i<elements.length; i++) {
var element = elements[i];
if(element != currentElement && !isDescendant(currentElement, element)) {
element.parentNode.removeChild(element);
}
}
return currentElement;
}
// Start point
var element = null,
elements = document.body.querySelectorAll('div, article, section');
// Loops?
for(var i=0; i<loops; i++) {
element = extractBiggestNode(elements);
if(element) {
elements = element.querySelectorAll('div, article, section');
} else {
break;
}
}
if(element) console.log(element.textContent, element.textContent.length, element);
}
// v2
// tested & working on: theverge.com, techcrunch.com
// failing on: pandodaily.com
window.cleanUp = function(element, lengthMultiplier) {
element = element || document.body,
lengthMultiplier = lengthMultiplier || 0.1;
console.log('Running loop with multiplier: ' + lengthMultiplier, element);
var articleElements = 'div,section,article';
var isDescendant = function(parent, child) {
var node = child.parentNode;
while (node != null) {
if (node == parent) {
return true;
}
node = node.parentNode;
}
return false;
}
var containers = element.querySelectorAll(articleElements),
longestElement = null;
// Count child elements text
var childLength = 0,
children = element.querySelectorAll(articleElements);
for(var j=0; j<children.length; j++) {
var child = children[j];
childLength += child.textContent.length;
}
element.childLength = childLength;
for(var i=0; i<containers.length; i++) {
var container = containers[i],
length = container.textContent.length;
// Skip non-text elements
if(length == 0) continue;
// Count child elements text
var childLength = 0,
children = container.querySelectorAll(articleElements);
for(var j=0; j<children.length; j++) {
var child = children[j];
childLength += child.textContent.length;
}
// Save this
containers[i].childLength = childLength;
// Skip 'almost empty' elements
if(length < (childLength * 0.1)) continue;
// Assign longest container
if(!longestElement || length > longestElement.textContent.length) longestElement = container;
}
if(longestElement) {
var length = element.textContent.length,
longestLength = longestElement.textContent.length,
childLength = element.childLength,
longestChildLength = longestElement.childLength
lengthDiff = length - longestLength,
childLengthDiff = childLength - longestChildLength;
console.log(element, ' -> ', longestElement);
console.log('length: ' + length);
console.log('longestLength: ' + longestLength);
console.log('childLength: ' + childLength);
console.log('longestChildLength: ' + longestChildLength);
console.log('lengthDiff: ' + lengthDiff);
console.log('childLengthDiff: ' + childLengthDiff);
if(childLengthDiff > (length * lengthMultiplier)) {
lengthMultiplier += 0.2;
if(lengthMultiplier < 0.1) lengthMultiplier = 0.1;
return cleanUp(longestElement, lengthMultiplier);
}
}
var allElements = document.body.querySelectorAll('*');
for(var i=0; i<allElements.length; i++) {
var e = allElements[i];
if(e && e != element && !isDescendant(e, element) && !isDescendant(element, e)) {
e.parentNode.removeChild(e);
}
}
console.log('Ended on: ', element);
console.log('End');
};
cleanUp();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment