Skip to content

Instantly share code, notes, and snippets.

@CDillinger
Last active July 25, 2023 09:17
Show Gist options
  • Save CDillinger/2aa02128f840bdca90340ce08ee71bc2 to your computer and use it in GitHub Desktop.
Save CDillinger/2aa02128f840bdca90340ce08ee71bc2 to your computer and use it in GitHub Desktop.
C# Implementation of Fuzzy Match
// LICENSE
//
// This software is dual-licensed to the public domain and under the following
// license: you are granted a perpetual, irrevocable license to copy, modify,
// publish, and distribute this file as you see fit.
using System;
using System.Collections.Generic;
public static class FuzzyMatcher
{
/// <summary>
/// Does a fuzzy search for a pattern within a string.
/// </summary>
/// <param name="stringToSearch">The string to search for the pattern in.</param>
/// <param name="pattern">The pattern to search for in the string.</param>
/// <returns>true if each character in pattern is found sequentially within stringToSearch; otherwise, false.</returns>
public static bool FuzzyMatch(string stringToSearch, string pattern)
{
var patternIdx = 0;
var strIdx = 0;
var patternLength = pattern.Length;
var strLength = stringToSearch.Length;
while (patternIdx != patternLength && strIdx != strLength)
{
if (char.ToLower(pattern[patternIdx]) == char.ToLower(stringToSearch[strIdx]))
++patternIdx;
++strIdx;
}
return patternLength != 0 && strLength != 0 && patternIdx == patternLength;
}
/// <summary>
/// Does a fuzzy search for a pattern within a string, and gives the search a score on how well it matched.
/// </summary>
/// <param name="stringToSearch">The string to search for the pattern in.</param>
/// <param name="pattern">The pattern to search for in the string.</param>
/// <param name="outScore">The score which this search received, if a match was found.</param>
/// <returns>true if each character in pattern is found sequentially within stringToSearch; otherwise, false.</returns>
public static bool FuzzyMatch(string stringToSearch, string pattern, out int outScore)
{
// Score consts
const int adjacencyBonus = 5; // bonus for adjacent matches
const int separatorBonus = 10; // bonus if match occurs after a separator
const int camelBonus = 10; // bonus if match is uppercase and prev is lower
const int leadingLetterPenalty = -3; // penalty applied for every letter in stringToSearch before the first match
const int maxLeadingLetterPenalty = -9; // maximum penalty for leading letters
const int unmatchedLetterPenalty = -1; // penalty for every letter that doesn't matter
// Loop variables
var score = 0;
var patternIdx = 0;
var patternLength = pattern.Length;
var strIdx = 0;
var strLength = stringToSearch.Length;
var prevMatched = false;
var prevLower = false;
var prevSeparator = true; // true if first letter match gets separator bonus
// Use "best" matched letter if multiple string letters match the pattern
char? bestLetter = null;
char? bestLower = null;
int? bestLetterIdx = null;
var bestLetterScore = 0;
var matchedIndices = new List<int>();
// Loop over strings
while (strIdx != strLength)
{
var patternChar = patternIdx != patternLength ? pattern[patternIdx] as char? : null;
var strChar = stringToSearch[strIdx];
var patternLower = patternChar != null ? char.ToLower((char)patternChar) as char? : null;
var strLower = char.ToLower(strChar);
var strUpper = char.ToUpper(strChar);
var nextMatch = patternChar != null && patternLower == strLower;
var rematch = bestLetter != null && bestLower == strLower;
var advanced = nextMatch && bestLetter != null;
var patternRepeat = bestLetter != null && patternChar != null && bestLower == patternLower;
if (advanced || patternRepeat)
{
score += bestLetterScore;
matchedIndices.Add((int)bestLetterIdx);
bestLetter = null;
bestLower = null;
bestLetterIdx = null;
bestLetterScore = 0;
}
if (nextMatch || rematch)
{
var newScore = 0;
// Apply penalty for each letter before the first pattern match
// Note: Math.Max because penalties are negative values. So max is smallest penalty.
if (patternIdx == 0)
{
var penalty = Math.Max(strIdx * leadingLetterPenalty, maxLeadingLetterPenalty);
score += penalty;
}
// Apply bonus for consecutive bonuses
if (prevMatched)
newScore += adjacencyBonus;
// Apply bonus for matches after a separator
if (prevSeparator)
newScore += separatorBonus;
// Apply bonus across camel case boundaries. Includes "clever" isLetter check.
if (prevLower && strChar == strUpper && strLower != strUpper)
newScore += camelBonus;
// Update pattern index IF the next pattern letter was matched
if (nextMatch)
++patternIdx;
// Update best letter in stringToSearch which may be for a "next" letter or a "rematch"
if (newScore >= bestLetterScore)
{
// Apply penalty for now skipped letter
if (bestLetter != null)
score += unmatchedLetterPenalty;
bestLetter = strChar;
bestLower = char.ToLower((char)bestLetter);
bestLetterIdx = strIdx;
bestLetterScore = newScore;
}
prevMatched = true;
}
else
{
score += unmatchedLetterPenalty;
prevMatched = false;
}
// Includes "clever" isLetter check.
prevLower = strChar == strLower && strLower != strUpper;
prevSeparator = strChar == '_' || strChar == ' ';
++strIdx;
}
// Apply score for last match
if (bestLetter != null)
{
score += bestLetterScore;
matchedIndices.Add((int)bestLetterIdx);
}
outScore = score;
return patternIdx == patternLength;
}
}
@CDillinger
Copy link
Author

This was created after reading Forrest Smith's blog post on reverse engineering Sublime Text's Fuzzy Match. Forrest originally wrote a C++ and JavaScript implementation which can be found in this repository. Links to implementations written by other users in different languages can be found linked in the repository's readme as well.

These are the signatures of the functions included in this code:

public static bool FuzzyMatch(string stringToSearch, string pattern);
public static bool FuzzyMatch(string stringToSearch, string pattern, out int outScore);

If you would like to change these methods to extension methods, just add a this keyword before the first parameter:

public static bool FuzzyMatch(this string stringToSearch, string pattern);
public static bool FuzzyMatch(this string stringToSearch, string pattern, out int outScore);

@jrkd
Copy link

jrkd commented Oct 14, 2016

Hey CDillinger,

Many thanks for this. don't suppose you have any performance comparisons against the C++ version?

Jono

@tajmone
Copy link

tajmone commented Jul 19, 2019

Hi @CDillinger,

just wanted to let you know that I've added your C# port to a new project built around Forrest Smith's FTS Fuzzy Match:

@Codename2200
Copy link

Hey CDillinger,
I found your code really helpful. Btw, there was an update on this article (https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/)
Update — 2/18/2017

It would be great also if you could apply the recursive match.

Thanks!

@tajmone
Copy link

tajmone commented May 14, 2020

It would be great also if you could apply the recursive match.

I agree, it would be great. And I'll be happy to include the updated algorithm in in the fuzzy-search repository.

@Codename2200
Copy link

Codename2200 commented May 15, 2020

I made the recursive version but it doesn't work.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace Tests
{
    public static class Fuzzy
    {
        const int SEQUENTIAL_BONUS = 15;              // bonus for adjacent matches
        const int SEPARATOR_BONUS = 30;               // bonus if match occurs after a separator
        const int CAMEL_BONUS = 30;                   // bonus if match is uppercase and prev is lower
        const int FIRST_LETTER_BONUS = 15;            // bonus if the first letter is matched
        const int LEADING_LETTER_PENALTY = -5;        // penalty applied for every letter in str before the first match
        const int MAX_LEADING_LETTER_PENALTY = -15;   // maximum penalty for leading letters
        const int UNMATCHED_LETTER_PENALTY = -1;      // penalty for every letter that doesn't matter

        public static bool FuzzyMatchRecursive(string pattern, string target, out int outScore)
        {
            int recursionCount = 0;
            int recursionLimit = 10;
            List<int> matches = new List<int>();
            int maxMatches = 256;

            return FuzzyMatchRecursive(
                pattern,                    // string to Search
                target,                     // target string
                0,                          // patternCurIndex
                0,                          // strCurrIndex
                null,                       // srcMatces
                matches,                    // matches
                maxMatches,                 // maxMatches
                0,                          // nextMatch
                recursionCount,             // recursionCount
                recursionLimit,             // recursionLimit
                out outScore);
        }

        public static bool FuzzyMatchRecursive(
            string pattern,
            string str,
            int patternCurIndex,
            int strCurrIndex,
            List<int> srcMatces,
            List<int> matches,
            int maxMatches,
            int nextMatch,
            int recursionCount,
            int recursionLimit,
            out int outScore)
        {
            outScore = 0;

            // Return if recursion limit is reached.
            if (++recursionCount >= recursionLimit)
                return false;

            // Return if we reached ends of strings.
            if (patternCurIndex == pattern.Length || strCurrIndex == str.Length)
                return false;

            // Recursion params
            bool recursiveMatch = false;
            List<int> bestRecursiveMatches = new List<int>();
            int bestRecursiveScore = 0;

            // Loop through pattern and str looking for a match.
            bool firstMatch = true;

            while (patternCurIndex < pattern.Length && strCurrIndex < str.Length)
            {
                // Match found.
                var patternChar = char.ToLower(pattern[patternCurIndex]);
                var strChar = char.ToLower(str[strCurrIndex]);

                if (patternChar == strChar)
                {
                    if (nextMatch > maxMatches)
                        return false;

                    if (firstMatch && srcMatces != null)
                    {
                        matches = srcMatces;
                        firstMatch = false;
                    }

                    List<int> recursiveMatches = new List<int>();
                    var ismatched = FuzzyMatchRecursive(
                        pattern,
                        str,
                        patternCurIndex,
                        strCurrIndex + 1,
                        matches,
                        recursiveMatches,
                        maxMatches,
                        nextMatch,
                        recursionCount,
                        recursionLimit,
                        out int recursiveScore);

                    if (ismatched)
                    {
                        // Pick best recursive score.
                        if (!recursiveMatch || recursiveScore > bestRecursiveScore)
                        {
                            bestRecursiveMatches = recursiveMatches;
                            bestRecursiveScore = recursiveScore;
                        }
                        recursiveMatch = true;
                    }

                    nextMatch++;
                    matches.Add(strCurrIndex);

                    ++patternCurIndex;
                }

                ++strCurrIndex;
            }

            var matched = patternCurIndex == pattern.Length;

            if (matched)
            {
                outScore = 100;

                // Apply leading letter penalty
                int penalty = LEADING_LETTER_PENALTY * matches[0];
                penalty = penalty < MAX_LEADING_LETTER_PENALTY ? MAX_LEADING_LETTER_PENALTY : penalty;
                outScore += penalty;

                //Apply unmatched penalty
                int unmatched = str.Length - nextMatch;
                outScore += UNMATCHED_LETTER_PENALTY * unmatched;

                for (int i = 0; i < nextMatch; i++)
                {
                    int currIdx = matches[i];

                    if (i > 0)
                    {
                        int prevIdx = matches[i - 1];
                        if (currIdx == prevIdx + 1)
                        {
                            outScore += SEQUENTIAL_BONUS;
                        }
                    }

                    // Check for bonuses based on neighbor character value.
                    if (currIdx > 0)
                    {
                        // Camel case
                        char neighbor = str[currIdx - 1];
                        char curr = str[currIdx];
                        if (neighbor == char.ToLower(neighbor) && curr == char.ToUpper(curr))
                        {
                            outScore += CAMEL_BONUS;
                        }
                        bool isNeighbourSeparator = neighbor == '_' || neighbor == ' ';
                        if (isNeighbourSeparator)
                        {
                            outScore += SEPARATOR_BONUS;
                        }
                    }
                    else
                    {
                        // First letter
                        outScore += FIRST_LETTER_BONUS;
                    }
                }

                // Return best result
                if (recursiveMatch && (!matched || bestRecursiveScore > outScore))
                {
                    // Recursive score is better than "this"
                    matches = bestRecursiveMatches;
                    outScore = bestRecursiveScore;
                    return true;
                }
                else if (matched)
                {
                    // "this" score is better than recursive
                    return true;
                }
                else
                {
                    return false;
                }
            }

            return false;
        }

        public static string FuzzyMatchRecursive(string pattern, string[] targets)
        {
            if (!string.IsNullOrEmpty(pattern) && targets?.Length > 0)
            {
                Dictionary<string, int> distances = new Dictionary<string, int>();

                foreach (var item in targets)
                {
                    var result = FuzzyMatchRecursive(pattern, item, out int score);
                    distances.Add(item, score);
                }

                return distances.Aggregate((l, r) => l.Value > r.Value ? l : r).Key;
            }

            return pattern;
        }

        public static void Test()
        {
            string[] targets = new[] {
                "Internal Hire or Backfill",
                "OTE Neutral",
                "Funded by Business Leader Budget",
                "Funded by Merit Budget"
            };

            string pattern = "Intern Conversion";
            string pattern2 = "Internal Hire";

            var result = FuzzyMatchRecursive(pattern, targets);
            var result2 = FuzzyMatchRecursive(pattern2, targets);

            if (result != "Internal Hire or Backfill")
                throw new Exception("No match found!");

            if (result2 != "Internal Hire or Backfill")
                throw new Exception("No match found!");
        }
    }
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment