Last active
November 5, 2018 04:54
-
-
Save SimplGy/a03294eb4e3a8d36e54df8b8014c88c7 to your computer and use it in GitHub Desktop.
This counts up all the words (innerText) at each level of the dom tree.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Procedure: | |
countWords(document.body); | |
/* | |
* Count the text inside each element. | |
* Parents include all the words of their children. | |
* Additionally, notes how much of the text comes "before" this node in depth-first dom order. | |
* | |
* The purpose of this was to find which subtrees of the dom have most of the text. | |
* | |
* The output looks like: | |
"div: 8264 (22.55%) inside. 433 (1.19%) before)": [ | |
"p: 292 (0.8%) inside. 433 (1.19%) before)": [ | |
"span: 72 (0.2%) inside. 433 (1.19%) before)": [] | |
"span: 119 (0.33%) inside. 505 (1.38%) before)": [] | |
"a: 21 (0.06%) inside. 624 (1.71%) before)": [ | |
"span: 21 (0.06%) inside. 624 (1.71%) before)": [] | |
"span: 77 (0.22%) inside. 645 (1.76%) before)": [] | |
*/ | |
function countWords(el, max = el.innerText.length, charsSeen = 0) { | |
if (shouldStop(el)) { | |
return; | |
} | |
const innerText = cleanText(el); | |
const len = innerText.length; | |
// If there aren't many letters, show them; otherwise show a count | |
const desc = (len > 0 && len < 42) | |
? `'${innerText}'` | |
: len; | |
// What % of total text does this node have under it? | |
const inside = toPercent(len/max); | |
const label = `${nameOf(el)}: ${desc} (${inside}%) inside. ${charsSeen} (${toPercent(charsSeen/max)}%) before)` | |
el.setAttribute('title', label); | |
const kidStats = []; | |
for (let kid of Array.from(el.children)) { | |
const stats = countWords(kid, max, charsSeen); // Recurse | |
if (!stats) { continue; } | |
kidStats.push(stats); | |
charsSeen += cleanText(kid).length; | |
} | |
return { [label]: kidStats }; | |
} | |
function nameOf(el) { | |
return `${el.tagName.toLowerCase()}`; | |
} | |
function shouldStop(el) { | |
if (el == null) { return true; } | |
// Is one of the non-content tags | |
const tagsWithNoContent = ['SCRIPT', 'IMG']; // TODO: save image paths | |
if (tagsWithNoContent.includes(el.tagName)) { return true; } | |
// Contains no text | |
if (el.innerText == null || el.innerText.length === 0) { return true; } | |
return false; | |
} | |
function cleanText(el = {}) { | |
return (el.innerText || '').trim(); | |
} | |
// Show a percentage rounded to two decimal places | |
function toPercent(decimal) { | |
return Math.ceil(decimal * 10000) / 100 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment