Created
December 10, 2010 08:53
-
-
Save calvinf/735990 to your computer and use it in GitHub Desktop.
Utility Functions, a Test Class, and test cases for counting words in input text
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* 3 test cases */ | |
var lazy = new WordCountTestCase(' The lazy brown dog jumped over the fuzzy dog! ', 2, ['the','dog'], 'Lazy Dog Test'); | |
var lorem = new WordCountTestCase('Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Nam liber tempor cum soluta nobis eleifend option congue nihil imperdiet doming id quod mazim placerat facer possim assum. Typi non habent claritatem insitam; est usus legentis in iis qui facit eorum claritatem. Investigationes demonstraverunt lectores legere me lius quod ii legunt saepius. Claritas est etiam processus dynamicus, qui sequitur mutationem consuetudium lectorum. Mirum est notare quam littera gothica, quam nunc putamus parum claram, anteposuerit litterarum formas humanitatis per seacula quarta decima et quinta decima. Eodem modo typi, qui nunc nobis videntur parum clari, fiant sollemnes in futurum.', 4, ['qui', 'in'], 'Lorem Ipsum Test'); | |
var slashdot = new WordCountTestCase('Meshach writes "Amidst all the angst about Google taking away the caps lock key from Chrome it now appears that is not the case. With one small change any user can change the Modifier Key from a Search key to a Caps Lock key. Peace has been restored..."If there must be such a thing as a Caps Lock key on conventional keyboards, I wish it could be banished (along with the Insert/Delete pair) to a hard-to-fumble-upon switch on the bottom of the keyboard or laptop.', 7, ['the'], 'Slashdot Paragraph Test'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* WordCountTestCase Class | |
* Expects: paragraph, integer for expected count, array of expected words, and a test name | |
* returns boolean, test results logged to console | |
*/ | |
function WordCountTestCase(paragraph, expectedCount, expectedWords, testName) { | |
this.testName = testName || 'Test'; | |
this.paragraph = paragraph; | |
this.expectedCount = expectedCount; | |
this.expectedWords = expectedWords; | |
this.results = Utils.wordFrequencyCount(this.paragraph); | |
if(this.testCount() && this.testWords()) { | |
console.log('%s: Passed.', this.testName); | |
return true; | |
} else { | |
console.log('%s: Failed.', this.testName); | |
return false; | |
} | |
} | |
/* | |
* test count of words | |
*/ | |
WordCountTestCase.prototype.testCount = function() { | |
if(this.expectedCount !== this.results.count) { | |
console.log('Test failure: Expected count (%s) does not match.', this.expectedCount); | |
return false; | |
} | |
return true; | |
}; | |
/* | |
* test expected words | |
*/ | |
WordCountTestCase.prototype.testWords = function() { | |
for (var i = 0, len = this.expectedWords.length; i < len; i++) { | |
var index = Utils.indexOf(this.results.words, this.expectedWords[i]); | |
if(typeof index === undefined || index == -1) { | |
console.log('Test failure: Expected word (' + this.expectedWords[i] + ') not found.'); | |
return false; | |
} | |
} | |
return true; | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var Utils = {}; | |
/* trim whitespace */ | |
Utils.trim = function(text) { | |
return text === null ? '' : text.toString().replace(/^\s+/, "").replace(/\s+$/, ""); | |
}; | |
/* lowercase, remove leading & trailing non-word characters */ | |
Utils.filter = function(text) { | |
return text === null ? '' : text.toString().toLowerCase().replace(/^\W+/,'').replace(/\W+$/,''); | |
}; | |
/* | |
* count highest frequency words | |
* return object w/ count and words array | |
*/ | |
Utils.wordFrequencyCount = function(text) { | |
text = text ? Utils.trim(text) : ''; | |
var words = text.split(/\W+/); //split on 1 or more non-word characters | |
var unique = {}; //hash for count of unique words | |
var highest = { //used to track highest as we go, avoid additional loop | |
count : 0, | |
words : [] | |
}; | |
for (var i = 0, len = words.length; i < len; i++) { | |
var word = Utils.filter(words[i]); //lowercases word | |
if(!unique[word]) { // add if not present | |
unique[word] = 1; | |
} else { | |
unique[word] += 1; | |
} | |
if(unique[word] > highest.count) { | |
highest.count = unique[word]; | |
highest.words = [word]; | |
} else if(unique[word] == highest.count) { | |
highest.words.push(word); | |
} | |
} | |
return highest; | |
}; | |
/* | |
* array indexOf function (since not all browsers have) | |
*/ | |
Utils.indexOf = function(arr, item) { | |
for (var i = 0, length = arr.length; i < length; i++) { | |
if (arr[i] === item) {return i;} | |
} | |
return -1; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment