Skip to content

Instantly share code, notes, and snippets.

@westc
Last active November 29, 2023 05:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save westc/e014d95e7c5e916a3413211852c37f91 to your computer and use it in GitHub Desktop.
Save westc/e014d95e7c5e916a3413211852c37f91 to your computer and use it in GitHub Desktop.
A JavaScript class that can be used in a rudimentary search engine to test and find all matches for search terms entered.
class Searcher {
/** @type {string} */
#terms;
/** @type {boolean} */
#matchWordStart;
/** @type {boolean} */
#matchWordEnd;
/** @type {RegExp} */
#rgxFullNeg;
/** @type {RegExp} */
#rgxFullPos;
/** @type {RegExp} */
#rgxPartial;
/**
* @param {string} terms
* @param {object} options
* @param {?boolean=} options.matchWordStart
* Optional. Defaults to `true`.
* @param {?boolean=} options.matchWordEnd
* Optional. Defaults to `false`.
*/
constructor(terms, options) {
this.#terms = terms;
options = Object(options);
this.#matchWordStart = options.matchWordStart !== false;
this.#matchWordEnd = !!options.matchWordEnd;
this.#updateRegExps();
}
#updateRegExps() {
this.#rgxFullNeg = null;
this.#rgxFullPos = null;
this.#rgxPartial = null;
let nonModdedTargets = [];
let plusTargets = [];
let minusTargets = [];
for (let match, rgx = /([-+])?("([^"]*)"|\S+)/g; match = rgx.exec(this.#terms.trim()); ) {
let target = (match[3] == null ? match[2] : match[3]).replace(
/^(?=([\p{L}\p{N}]))|(?<=([\p{L}\p{N}]))$|\\(.)|(\s?\*\s?)|(\s+)|[.+?^$\[\]\\{}()|]/gu,
(m, startWithWord, endsWithWord, escapedChar, asterisk, whitespace, index, target) => {
if (startWithWord) {
return this.#matchWordStart ? '(?<!\\p{L}|\\p{N})' : '';
}
if (endsWithWord) {
return this.#matchWordEnd ? '(?!\\p{L}|\\p{N})' : '';
}
if (whitespace) {
return '\\s+';
}
if (!asterisk) {
return '\\' + (escapedChar || m);
}
let multipleMod = match[3] ? '(\\s+\\S+)*' : '';
let isStarter = index == 0 || asterisk.charAt(0) !== '*';
let isFinisher = index + 1 === target.length || asterisk.slice(-1) !== '*';
return isStarter
? '(?:\\s|)\\S+' + multipleMod + (isFinisher ? '' : '(?:\\s|$)')
: '\\S*';
}
);
let mod = match[1];
(mod !== '+' ? mod === '-' ? minusTargets : nonModdedTargets : plusTargets).push(target);
}
if (plusTargets.length + nonModdedTargets.length === 0) {
nonModdedTargets.push('^');
}
if (minusTargets.length) {
this.#rgxFullNeg = new RegExp(minusTargets.join('|'), 'iu');
}
if (plusTargets.length) {
this.#rgxFullPos = new RegExp(plusTargets.map(t => `(?=^[^]*${t})`).join(''), 'iu');
}
this.#rgxPartial = new RegExp([...nonModdedTargets, ...plusTargets].join('|'), 'giu');
}
set matchWordStart(matchWordStart) {
this.#matchWordStart = matchWordStart;
this.#updateRegExps();
}
get matchWordStart() {
return this.#matchWordStart;
}
set matchWordEnd(matchWordEnd) {
this.#matchWordEnd = matchWordEnd;
this.#updateRegExps();
}
get matchWordEnd() {
return this.#matchWordEnd;
}
set terms(terms) {
this.#terms = terms;
this.#updateRegExps();
}
get terms() {
return this.#terms;
}
/**
* @param {string} value
* @returns {boolean}
*/
test(value) {
return this.#rgxFullNeg?.test(value)
? false
: this.#rgxFullPos?.test(value) === false
? false
: this.#rgxPartial.test(value);
}
/**
* @param {string} value
* @returns {RegExpMatchArray[]}
*/
matchAll(value) {
return (!this.#rgxFullNeg?.test(value) && this.#rgxFullPos?.test(value) !== false)
? [...value.matchAll(this.#rgxPartial)]
: [];
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment