Skip to content

Instantly share code, notes, and snippets.

@espretto
Last active May 29, 2024 08:33
Show Gist options
  • Save espretto/41e8e0d78377ce100fc1 to your computer and use it in GitHub Desktop.
Save espretto/41e8e0d78377ce100fc1 to your computer and use it in GitHub Desktop.
ts: fuzzy search
import escapeRegExp from "lodash.escaperegexp";
/**
* used to create regular expressions that match if all needle-chars appear in
* the same order w/ any number of inbetween chars.
*/
export function fuzzyRegExp(needle: string) {
return new RegExp(needle.split("").map(escapeRegExp).join(".*?"), "i");
}
const isASCIIPrintable = (char: string) => {
const charCode = char.charCodeAt(0);
return 32 <= charCode && charCode < 127;
};
/**
* if the "starter" of the given character's NFKD normalization is an ASCII printable, return it.
* otherwise return the given character.
*
* A "starter" is the first char with a "canonical combining class value" of zero.
* @see https://unicode.org/reports/tr15/#Norm_Forms
*/
const extractASCIIStarter = (char: string) => {
const starter = char.normalize("NFKD").charAt(0);
return isASCIIPrintable(starter) ? starter : char;
};
/**
* the needle is found if all needle-chars appear in the same order w/ any
* number of inbetween chars.
*
* Any composed forms in the haystack will be decomposed to their NFKD normalization and reduced to
* their "starter" characters or first characters in case of ligatures IF these are ASCII printable.
*
* @returns match-array or null if not matched.
* match-array items at even indices hold matched needle-chars.
* match-array items at odd indices hold inbetween chars.
*/
export function fuzzyMatch(
needle: string,
haystack: string,
strict: boolean = false,
) {
const needles = needle.toLowerCase();
const nHaystack = haystack.toLowerCase().split("").map(extractASCIIStarter);
// indices of both matched and unmatched substrings (cf. \b in regex)
const boundaries = [0];
let anchor = 0;
for (let i = 0; i < needles.length; i++) {
const seeker = nHaystack.indexOf(needles.charAt(i), anchor);
if (seeker < 0) {
if (strict) return null;
else continue;
}
if (seeker > anchor) boundaries.push(anchor, seeker);
anchor = seeker + 1;
}
if (anchor < nHaystack.length) boundaries.push(anchor);
boundaries.push(nHaystack.length);
const matchArray = [];
for (let i = 1; i < boundaries.length; i++) {
matchArray.push(haystack.substring(boundaries[i - 1], boundaries[i]));
}
return matchArray;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment