Skip to content

Instantly share code, notes, and snippets.

@SimplGy
Last active November 5, 2018 04:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SimplGy/a03294eb4e3a8d36e54df8b8014c88c7 to your computer and use it in GitHub Desktop.
Save SimplGy/a03294eb4e3a8d36e54df8b8014c88c7 to your computer and use it in GitHub Desktop.
This counts up all the words (innerText) at each level of the dom tree.
// Procedure:
countWords(document.body);
/*
* Count the text inside each element.
* Parents include all the words of their children.
* Additionally, notes how much of the text comes "before" this node in depth-first dom order.
*
* The purpose of this was to find which subtrees of the dom have most of the text.
*
* The output looks like:
"div: 8264 (22.55%) inside. 433 (1.19%) before)": [
"p: 292 (0.8%) inside. 433 (1.19%) before)": [
"span: 72 (0.2%) inside. 433 (1.19%) before)": []
"span: 119 (0.33%) inside. 505 (1.38%) before)": []
"a: 21 (0.06%) inside. 624 (1.71%) before)": [
"span: 21 (0.06%) inside. 624 (1.71%) before)": []
"span: 77 (0.22%) inside. 645 (1.76%) before)": []
*/
function countWords(el, max = el.innerText.length, charsSeen = 0) {
if (shouldStop(el)) {
return;
}
const innerText = cleanText(el);
const len = innerText.length;
// If there aren't many letters, show them; otherwise show a count
const desc = (len > 0 && len < 42)
? `'${innerText}'`
: len;
// What % of total text does this node have under it?
const inside = toPercent(len/max);
const label = `${nameOf(el)}: ${desc} (${inside}%) inside. ${charsSeen} (${toPercent(charsSeen/max)}%) before)`
el.setAttribute('title', label);
const kidStats = [];
for (let kid of Array.from(el.children)) {
const stats = countWords(kid, max, charsSeen); // Recurse
if (!stats) { continue; }
kidStats.push(stats);
charsSeen += cleanText(kid).length;
}
return { [label]: kidStats };
}
function nameOf(el) {
return `${el.tagName.toLowerCase()}`;
}
function shouldStop(el) {
if (el == null) { return true; }
// Is one of the non-content tags
const tagsWithNoContent = ['SCRIPT', 'IMG']; // TODO: save image paths
if (tagsWithNoContent.includes(el.tagName)) { return true; }
// Contains no text
if (el.innerText == null || el.innerText.length === 0) { return true; }
return false;
}
function cleanText(el = {}) {
return (el.innerText || '').trim();
}
// Show a percentage rounded to two decimal places
function toPercent(decimal) {
return Math.ceil(decimal * 10000) / 100
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment