Skip to content

Instantly share code, notes, and snippets.

@zgover
Last active August 8, 2020 05:53
Show Gist options
  • Save zgover/8130eb3b7dd6f915316435e8f0cf657e to your computer and use it in GitHub Desktop.
Save zgover/8130eb3b7dd6f915316435e8f0cf657e to your computer and use it in GitHub Desktop.
/**
* @license
* Copyright Gover Construction LLC. All Rights Reserved.
*
* Use of this source code is governed by an MIT-style license that can be
* found in the LICENSE file at https://gist.github.com/zgover/678d51ffc2477d2e714052d7154f784b
*/
/**
* Removes stop words (e.g. the, we, you, did, a, etc.) from
* the provided string of text
*
* @param {String} str the text to remove stop words from
* @param {Object} opt the configuration options
* {String} separator the character between two potential stop words
**/
export default function removeStopWords(str, opt = {}) {
const { separator } = { separator: ' ', ...opt }
const words = []
String(str).split(separator).forEach((word) => {
//if (stopWords.some((stopWord) => word !== stopWord)) {
if (!stopWords.includes(word.toLowerCase()) { // Refinement (untested)
words.push(word)
}
})
return words
}
export const stopWords = [
'a', 'about', 'above', 'after', 'again', 'against', 'all', 'am', 'an', 'and', 'any', 'are', 'as', 'at', 'be',
'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'could', 'did', 'do', 'does',
'doing', 'down', 'during', 'each', 'few', 'for', 'from', 'further', 'had', 'has', 'have', 'having', 'he',
'he\'d', 'he\'ll', 'he\'s', 'her', 'here', 'here\'s', 'hers', 'herself', 'him', 'himself', 'his', 'how',
'how\'s', 'i', 'i\'d', 'i\'ll', 'i\'m', 'i\'ve', 'if', 'in', 'into', 'is', 'it', 'it\'s', 'its', 'itself', 'let\'s',
'me', 'more', 'most', 'my', 'myself', 'nor', 'of', 'on', 'once', 'only', 'or', 'other', 'ought', 'our', 'ours',
'ourselves', 'out', 'over', 'own', 'same', 'she', 'she\'d', 'she\'ll', 'she\'s', 'should', 'so',
'some', 'such', 'than', 'that', 'that\'s', 'the', 'their', 'theirs', 'them', 'themselves', 'then', 'there',
'there\'s', 'these', 'they', 'they\'d', 'they\'ll', 'they\'re', 'they\'ve', 'this', 'those', 'through', 'to',
'too', 'under', 'until', 'up', 'very', 'was', 'we', 'we\'d', 'we\'ll', 'we\'re', 'we\'ve', 'were', 'what',
'what\'s', 'when', 'when\'s', 'where', 'where\'s', 'which', 'while', 'who', 'who\'s', 'whom', 'why', 'why\'s',
'with', 'would', 'you', 'you\'d', 'you\'ll', 'you\'re', 'you\'ve', 'your', 'yours', 'yourself', 'yourselves',
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment