Skip to content

Instantly share code, notes, and snippets.

@calvinf
Created December 10, 2010 08:53
Show Gist options
  • Save calvinf/735990 to your computer and use it in GitHub Desktop.
Save calvinf/735990 to your computer and use it in GitHub Desktop.
Utility Functions, a Test Class, and test cases for counting words in input text
/* 3 test cases */
var lazy = new WordCountTestCase(' The lazy brown dog jumped over the fuzzy dog! ', 2, ['the','dog'], 'Lazy Dog Test');
var lorem = new WordCountTestCase('Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Nam liber tempor cum soluta nobis eleifend option congue nihil imperdiet doming id quod mazim placerat facer possim assum. Typi non habent claritatem insitam; est usus legentis in iis qui facit eorum claritatem. Investigationes demonstraverunt lectores legere me lius quod ii legunt saepius. Claritas est etiam processus dynamicus, qui sequitur mutationem consuetudium lectorum. Mirum est notare quam littera gothica, quam nunc putamus parum claram, anteposuerit litterarum formas humanitatis per seacula quarta decima et quinta decima. Eodem modo typi, qui nunc nobis videntur parum clari, fiant sollemnes in futurum.', 4, ['qui', 'in'], 'Lorem Ipsum Test');
var slashdot = new WordCountTestCase('Meshach writes "Amidst all the angst about Google taking away the caps lock key from Chrome it now appears that is not the case. With one small change any user can change the Modifier Key from a Search key to a Caps Lock key. Peace has been restored..."If there must be such a thing as a Caps Lock key on conventional keyboards, I wish it could be banished (along with the Insert/Delete pair) to a hard-to-fumble-upon switch on the bottom of the keyboard or laptop.', 7, ['the'], 'Slashdot Paragraph Test');
/*
* WordCountTestCase Class
* Expects: paragraph, integer for expected count, array of expected words, and a test name
* returns boolean, test results logged to console
*/
function WordCountTestCase(paragraph, expectedCount, expectedWords, testName) {
this.testName = testName || 'Test';
this.paragraph = paragraph;
this.expectedCount = expectedCount;
this.expectedWords = expectedWords;
this.results = Utils.wordFrequencyCount(this.paragraph);
if(this.testCount() && this.testWords()) {
console.log('%s: Passed.', this.testName);
return true;
} else {
console.log('%s: Failed.', this.testName);
return false;
}
}
/*
* test count of words
*/
WordCountTestCase.prototype.testCount = function() {
if(this.expectedCount !== this.results.count) {
console.log('Test failure: Expected count (%s) does not match.', this.expectedCount);
return false;
}
return true;
};
/*
* test expected words
*/
WordCountTestCase.prototype.testWords = function() {
for (var i = 0, len = this.expectedWords.length; i < len; i++) {
var index = Utils.indexOf(this.results.words, this.expectedWords[i]);
if(typeof index === undefined || index == -1) {
console.log('Test failure: Expected word (' + this.expectedWords[i] + ') not found.');
return false;
}
}
return true;
};
var Utils = {};
/* trim whitespace */
Utils.trim = function(text) {
return text === null ? '' : text.toString().replace(/^\s+/, "").replace(/\s+$/, "");
};
/* lowercase, remove leading & trailing non-word characters */
Utils.filter = function(text) {
return text === null ? '' : text.toString().toLowerCase().replace(/^\W+/,'').replace(/\W+$/,'');
};
/*
* count highest frequency words
* return object w/ count and words array
*/
Utils.wordFrequencyCount = function(text) {
text = text ? Utils.trim(text) : '';
var words = text.split(/\W+/); //split on 1 or more non-word characters
var unique = {}; //hash for count of unique words
var highest = { //used to track highest as we go, avoid additional loop
count : 0,
words : []
};
for (var i = 0, len = words.length; i < len; i++) {
var word = Utils.filter(words[i]); //lowercases word
if(!unique[word]) { // add if not present
unique[word] = 1;
} else {
unique[word] += 1;
}
if(unique[word] > highest.count) {
highest.count = unique[word];
highest.words = [word];
} else if(unique[word] == highest.count) {
highest.words.push(word);
}
}
return highest;
};
/*
* array indexOf function (since not all browsers have)
*/
Utils.indexOf = function(arr, item) {
for (var i = 0, length = arr.length; i < length; i++) {
if (arr[i] === item) {return i;}
}
return -1;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment