Skip to content

Instantly share code, notes, and snippets.

@Bevilacqua
Created February 11, 2017 13:32
Show Gist options
  • Save Bevilacqua/50cfcd26579ce4386ae46bd4b6c3692d to your computer and use it in GitHub Desktop.
Save Bevilacqua/50cfcd26579ce4386ae46bd4b6c3692d to your computer and use it in GitHub Desktop.
Search between an HTML document's body tags and find the most common words
$(function() {
alert("DOM loaded");
parseHTML();
});
function parseHTML() {
var words = {};
var conent = "";
content = $('body').html();
alert(content);
content = content.replace(/(?:\r\n|\r|\n)/g, ' ');
content = content.replace(/</g, " <");
content = content.replace(/>/g, "> ");
alert(content);
divided = content.split(" ");
alert(divided.toString());
hash = {};
divided.forEach(function(element) {
if(hash[element] != undefined)
{
hash[element] += 1;
} else if(element.match(/[^A-Za-z0-9\-_]/) || element == '') {
console.log("skip");
} else {
hash[element] = 1;
}
});
var items = Object.keys(hash).map(function(key) {
return [key, hash[key]];
});
items.sort(function(first, second) {
return second[1] - first[1];
});
console.log(items.slice(0, 100));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment