espretto/fuzzySearch.ts

## fuzzySearch.ts
import escapeRegExp from "lodash.escaperegexp";

/**
 * used to create regular expressions that match if all needle-chars appear in
 * the same order w/ any number of inbetween chars.
 */
export function fuzzyRegExp(needle: string) {
  return new RegExp(needle.split("").map(escapeRegExp).join(".*?"), "i");
}

const isASCIIPrintable = (char: string) => {
  const charCode = char.charCodeAt(0);
  return 32 <= charCode && charCode < 127;
};

/**
 * if the "starter" of the given character's NFKD normalization is an ASCII printable, return it.
 * otherwise return the given character.
 *
 * A "starter" is the first char with a "canonical combining class value" of zero.
 * @see https://unicode.org/reports/tr15/#Norm_Forms
 */
const extractASCIIStarter = (char: string) => {
  const starter = char.normalize("NFKD").charAt(0);
  return isASCIIPrintable(starter) ? starter : char;
};

/**
 * the needle is found if all needle-chars appear in the same order w/ any
 * number of inbetween chars.
 *
 * Any composed forms in the haystack will be decomposed to their NFKD normalization and reduced to
 * their "starter" characters or first characters in case of ligatures IF these are ASCII printable.
 *
 * @returns match-array or null if not matched.
 *   match-array items at even indices hold matched needle-chars.
 *   match-array items at odd indices hold inbetween chars.
 */
export function fuzzyMatch(
  needle: string,
  haystack: string,
  strict: boolean = false,
) {
  const needles = needle.toLowerCase();
  const nHaystack = haystack.toLowerCase().split("").map(extractASCIIStarter);

  // indices of both matched and unmatched substrings (cf. \b in regex)
  const boundaries = [0];

  let anchor = 0;
  for (let i = 0; i < needles.length; i++) {
    const seeker = nHaystack.indexOf(needles.charAt(i), anchor);

    if (seeker < 0) {
      if (strict) return null;
      else continue;
    }
    if (seeker > anchor) boundaries.push(anchor, seeker);

    anchor = seeker + 1;
  }

  if (anchor < nHaystack.length) boundaries.push(anchor);
  boundaries.push(nHaystack.length);

  const matchArray = [];
  for (let i = 1; i < boundaries.length; i++) {
    matchArray.push(haystack.substring(boundaries[i - 1], boundaries[i]));
  }

  return matchArray;
}
	import escapeRegExp from "lodash.escaperegexp";

	/**
	* used to create regular expressions that match if all needle-chars appear in
	* the same order w/ any number of inbetween chars.
	*/
	export function fuzzyRegExp(needle: string) {
	return new RegExp(needle.split("").map(escapeRegExp).join(".*?"), "i");
	}

	const isASCIIPrintable = (char: string) => {
	const charCode = char.charCodeAt(0);
	return 32 <= charCode && charCode < 127;
	};

	/**
	* if the "starter" of the given character's NFKD normalization is an ASCII printable, return it.
	* otherwise return the given character.
	*
	* A "starter" is the first char with a "canonical combining class value" of zero.
	* @see https://unicode.org/reports/tr15/#Norm_Forms
	*/
	const extractASCIIStarter = (char: string) => {
	const starter = char.normalize("NFKD").charAt(0);
	return isASCIIPrintable(starter) ? starter : char;
	};

	/**
	* the needle is found if all needle-chars appear in the same order w/ any
	* number of inbetween chars.
	*
	* Any composed forms in the haystack will be decomposed to their NFKD normalization and reduced to
	* their "starter" characters or first characters in case of ligatures IF these are ASCII printable.
	*
	* @returns match-array or null if not matched.
	* match-array items at even indices hold matched needle-chars.
	* match-array items at odd indices hold inbetween chars.
	*/
	export function fuzzyMatch(
	needle: string,
	haystack: string,
	strict: boolean = false,
	) {
	const needles = needle.toLowerCase();
	const nHaystack = haystack.toLowerCase().split("").map(extractASCIIStarter);

	// indices of both matched and unmatched substrings (cf. \b in regex)
	const boundaries = [0];

	let anchor = 0;
	for (let i = 0; i < needles.length; i++) {
	const seeker = nHaystack.indexOf(needles.charAt(i), anchor);

	if (seeker < 0) {
	if (strict) return null;
	else continue;
	}
	if (seeker > anchor) boundaries.push(anchor, seeker);

	anchor = seeker + 1;
	}

	if (anchor < nHaystack.length) boundaries.push(anchor);
	boundaries.push(nHaystack.length);

	const matchArray = [];
	for (let i = 1; i < boundaries.length; i++) {
	matchArray.push(haystack.substring(boundaries[i - 1], boundaries[i]));
	}

	return matchArray;
	}