Created
October 3, 2019 01:47
-
-
Save gusmcnair/a67946c96feb01b41c4a1ad526703443 to your computer and use it in GitHub Desktop.
Grokking assignment: Most frequent word
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function getTokens(rawString) { | |
//This section splits words into strings by splitting at spaces and punctuation, makes them all lowercase so upper/lowercase letters will be counted as the same, and removes all non-truthy values. | |
return rawString.toLowerCase().split(/[ ,!.";:-]+/).filter(Boolean) | |
//This sorts the elements alphabetically | |
.sort(); | |
} | |
function mostFrequentWord(text) { | |
//Runs the above function getTokens and identifies the results as the variable "words" | |
let words = getTokens(text); | |
//Sets a running list (currently empty) of the most-used words | |
let wordFrequencies = {} | |
//Runs through all words in the words variable | |
for (let i = 0; i <= words.length; i++) { | |
//If the word has already appeared and is stored in the list as a variable, add one more to its value in the wordFrequencies list | |
if (words[i] in wordFrequencies) { | |
wordFrequencies[words[i]]++; | |
//Otherwise, add it to the wordFrequencies list with a value of one | |
} else { | |
wordFrequencies[words[i]] = 1; | |
} | |
} | |
//Identifies the first word on the list, which since a sort was run, should be the first alphabetically | |
let currentMaxKey = Object.keys(wordFrequencies)[0]; | |
//Identifies the frequency of that word | |
let currentMaxCount = wordFrequencies[currentMaxKey]; | |
//Runs through the entire list of words. If a word's frequency is greater than the current max, that word becomes the new currentMaxKey and its value becomes the new currentMaxCount. | |
for (let word in wordFrequencies) { | |
if (wordFrequencies[word] > currentMaxCount) { | |
currentMaxKey = word; | |
currentMaxCount = wordFrequencies[word]; | |
} | |
} | |
//Returns the word with the highest frequency and ends the function. | |
return currentMaxKey; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment