Skip to content

Instantly share code, notes, and snippets.

@dimkir
Created January 10, 2019 13:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dimkir/64ef1ec07271b561754c686e94851de2 to your computer and use it in GitHub Desktop.
Save dimkir/64ef1ec07271b561754c686e94851de2 to your computer and use it in GitHub Desktop.
Some stopwords from the current tokenizer
'use strict';
const system = [
'.',
's',
'm',
'v',
't',
'1',
'p',
'13',
'll',
'd',
're',
'doesn', // typo
'tuesday', // it was part of the question, this is why people typed it
'anything', // broad
'nothing',
'etc',
'asdf',
'asfasdf',
'dsafasdf',
'devlivered',
];
const particles = [
'a',
'and',
'the',
'with',
'to',
'or',
'of',
'go',
'my',
'for',
'out',
'in',
'some',
'i',
'on',
'then',
'up',
'at',
'be',
'it',
'if',
'is',
'before',
'this',
'after',
'an',
'next',
'that',
'not',
'are',
'as',
'over',
'few',
'maybe',
'either',
'so',
'we',
'will',
'followed',
'by',
'probably',
'new',
'but',
'depending', // ??
'when',
'they',
'cheeky',
'always',
'something',
'its',
'them',
'yes',
'neither',
'our',
'can',
'sometimes',
'mid',
'otherwise',
'usually',
'from',
'such',
'do',
'try',
'why',
'sure',
'you',
'just',
'while',
'us',
'every',
'normally',
'generally',
'kinda',
'myself',
'we',
'would',
'who',
'their',
'ones',
'there',
'off',
'been',
'has',
'got',
'between',
'one',
'does',
];
const adverbs = [
'ideally',
'depends',
'currently',
];
const adjectives = [
'old',
'quick',
'absolute',
'last',
'light',
'hearty',
'little',
'choosen',
'chosen',
];
const ing = [
'avoiding',
'staying',
'catching',
'indulging',
'getting',
'having',
'making',
'heading',
'hanging',
'going',
'watching',
];
const calendar = [
'weekend',
'wednesday',
'tuesday', //
'day',
'evening',
'days',
'night',
'week',
'hour',
'midweek',
'months',
'year',
'winter',
'summer',
];
const needy = [
'delivered',
'take',
'bit',
'headed',
'toss',
'need',
'prefer',
'quality',
'enjoy',
'born',
'neon',
'relevant',
'see',
'garbage',
'match',
'front',
'beat',
'meet',
'fire',
'time',
'watch',
'glass',
'amount',
'junk',
'judge',
'exciting',
'wild',
'casual',
'red',
'back',
'nice', // ??
'good', // ??
'make',
'feet',
'long',
'lovely',
'grab',
'fuss',
'get',
'put',
'have',
'head',
'like',
'catch',
'spend',
'work', // ?? can they want to do work?
'straight',
'early',
'ideal',
'two',
'boring',
'dried',
'different',
];
const stopwords = {
system,
particles,
adverbs,
};
const stopwordsArray = [].concat(...[
system,
particles,
adverbs,
adjectives,
ing,
calendar,
needy,
]); // meaningless one-grams
module.exports = {
stopwords,
stopwordsArray,
// stopwordsFilter,
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment