Skip to content

Instantly share code, notes, and snippets.

@kynatro
Created November 30, 2023 19:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kynatro/8434fa85133562e1a4d3a8cf907bdcaf to your computer and use it in GitHub Desktop.
Save kynatro/8434fa85133562e1a4d3a8cf907bdcaf to your computer and use it in GitHub Desktop.
Word count and sort module
export const IGNORE_WORDS = ['', 'a', 'an', 'and', 'as', 'be', 'by', 'for', 'has', 'he', 'her', 'hers', 'him', 'his', 'in', 'it', 'of', 'or', 'our', 'ours', 'the', 'their', 'theirs', 'this', 'to', 'us', 'was', 'we', 'were'];
/**
* Sort counted words by their count in descending order
*
* @param {Object} wordCounts Dictionary of words and counts
* @returns {Object} Sorted dictionary of word counts
*/
export function sortByCount(wordCounts) {
let sorted = {};
Object.keys(wordCounts)
.sort((a, b) => {
// Sort by count in descending order
if (wordCounts[a] < wordCounts[b]) {
return 1;
} else if (wordCounts[a] > wordCounts[b]) {
return -1;
}
// Sort alphabetically in ascending order for like counts
else if (a.toLowerCase() > b.toLowerCase()) {
return 1;
} else if (a.toLowerCase() < b.toLowerCase()) {
return -1;
}
return 0;
})
.forEach(word => sorted[word] = wordCounts[word]);
return sorted;
}
/**
* Split text into words
*
* Removes IGNORE_WORDS and can optionally ignore case sensitivity.
*
* @param {String} text Corpus of text to split
* @param {Object} options
* @param {Boolean} caseSensitive (false) Respect case sensitivity
* @param {Boolean} removeIgnoredWords (true) Include all words in the returned Array
* @returns {Array} Array of words
*/
export function words(text, options = { caseSensitive: false, removeIgnoredWords: true }) {
const { caseSensitive, removeIgnoredWords } = options;
let words = (caseSensitive ? text : text.toLowerCase()).split(' ')
// Trim punctuation
.map(word => word.replace(/^[^\w]+|[^\w]+$/, ''))
if (removeIgnoredWords) {
words = words.reduce((words, word) => !IGNORE_WORDS.includes(word) ? [...words, word] : words, []);
}
return words;
}
/**
* Count unique instances of words
*
* @param {Array} words Array of words to count uniquely
* @returns {Object} Dictionary of word counts
*/
export function wordCounts(words) {
return words.reduce((obj, word) => ({
...obj,
[word]: (obj[word] || 0) + 1
}), {})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment