Skip to content

Instantly share code, notes, and snippets.

@AyoAlfonso
Forked from mwunsch/text_nodes.js
Created October 30, 2022 14:48
Show Gist options
  • Save AyoAlfonso/1cda6f355692f0fc8b2f3f9507b781f0 to your computer and use it in GitHub Desktop.
Save AyoAlfonso/1cda6f355692f0fc8b2f3f9507b781f0 to your computer and use it in GitHub Desktop.
Get the text nodes out of a document, ignoring the ones that are in Elements where the text value aren't likely to be valuable (like <script> tags) and nodes containing just whitespace.
function getLegitTextNodes() {
if (!document.createTreeWalker) return [];
var blacklist = ['SCRIPT', 'OPTION', 'TEXTAREA'],
textNodes = [],
walker = document.createTreeWalker(
document.body,
NodeFilter.SHOW_TEXT,
function excludeBlacklistedNodes(node) {
if (blacklist.indexOf(node.parentElement.nodeName.toUpperCase()) >= 0) return NodeFilter.FILTER_REJECT;
if (String.prototype.trim && !node.nodeValue.trim().length) return NodeFilter.FILTER_SKIP;
return NodeFilter.FILTER_ACCEPT;
},
false
);
while(walker.nextNode()) textNodes.push(walker.currentNode);
return textNodes;
}
// usage:
// getLegitTextNodes().forEach(function (node, i) { console.log(node.nodeValue) })
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment