Skip to content

Instantly share code, notes, and snippets.

@crismanNoble
Created September 5, 2012 18:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save crismanNoble/3641428 to your computer and use it in GitHub Desktop.
Save crismanNoble/3641428 to your computer and use it in GitHub Desktop.
Get frequency of words on a webpage.
//todos: 1) moar than just <p>s, just words. 2)strip out &amp;s and stuff 3)Order by top use
//via: https://www.squarefree.com/bookmarklets/seo.html
javascript:(function(){
var T={},W=[],C=0,s,i;
function F(n){
var i,x,a,w,t=n.tagName;
if(n.nodeType==3){
a=n.data.toLowerCase().split(/[\s\(\)\:\,\.;\<\>\&\'\"]/);
for(i in a)if(w=a[i]){
w=" "+w;T[w]=T[w]?T[w]+1:1;++C;}
}
if(t!="SCRIPT"&&t!="STYLE")
for(i=0;x=n.childNodes[i];++i)F(x)}F(document);
for(i in T)W.push([T[i],i]);
W.sort(function(a,b){var x=b[0]-a[0];return x?x:((b[1]<a[1])?1:-1)});
s="<h3>"+C+" words</h3>";
for(i in W)s+=W[i][0]+":"+W[i][1]+"<br>";
with(open().document){
write(s);
close();
}
})()
//via: http://stackoverflow.com/questions/4367986/how-to-get-all-textnodes-in-html-document-from-specific-tags-using-javascript
//solving number 1:
function getTextNodes(root, tagNamesArray) {
var textNodes = [];
var regex = new RegExp("^(" + tagNamesArray.join("|") + ")$", "i");
var insideMatchingElement = false;
function getNodes(node, insideMatchingElement) {
if (node.nodeType == 3 && insideMatchingElement) {
textNodes.push(node);
} else if (node.nodeType == 1) {
var childrenInsideMatchingElement = insideMatchingElement || regex.test(node.nodeName);
for (var child = node.firstChild; child; child = child.nextSibling) {
getNodes(child, childrenInsideMatchingElement);
}
}
}
getNodes(root);
return textNodes;
}
var textNodes = getTextNodes(document.body, ["blockquote","em","h4","h6","p"]);
//via: http://oreilly.com/javascript/excerpts/javascript-good-parts/awful-parts.html#object
//and: http://stackoverflow.com/questions/3479776/help-me-write-a-bookmarklete-that-counts-word-frequency
function countWordFrequency(){
pars = document.getElementsByTagName('p');
var texts ='';
for(var i=0; i< pars.length; i++){texts = texts + pars[i].innerHTML;}
var words = texts.toLowerCase( ).split(/[\s,.]+/);
var freq = {};
len = words.length;
for (var i=0; i<len; i++) {
// if (freq[words[i]]) { bug if one of the words is "constructor"!
if (typeof freq[words[i]] === 'number') {
freq[words[i]] += 1;
} else {
freq[words[i]] = 1;
}
}
return freq;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment