Skip to content

Instantly share code, notes, and snippets.

@CDillinger CDillinger/FuzzyMatch.cs
Last active Sep 25, 2019

Embed
What would you like to do?
C# Implementation of Fuzzy Match
// LICENSE
//
// This software is dual-licensed to the public domain and under the following
// license: you are granted a perpetual, irrevocable license to copy, modify,
// publish, and distribute this file as you see fit.
using System;
using System.Collections.Generic;
public static class FuzzyMatcher
{
/// <summary>
/// Does a fuzzy search for a pattern within a string.
/// </summary>
/// <param name="stringToSearch">The string to search for the pattern in.</param>
/// <param name="pattern">The pattern to search for in the string.</param>
/// <returns>true if each character in pattern is found sequentially within stringToSearch; otherwise, false.</returns>
public static bool FuzzyMatch(string stringToSearch, string pattern)
{
var patternIdx = 0;
var strIdx = 0;
var patternLength = pattern.Length;
var strLength = stringToSearch.Length;
while (patternIdx != patternLength && strIdx != strLength)
{
if (char.ToLower(pattern[patternIdx]) == char.ToLower(stringToSearch[strIdx]))
++patternIdx;
++strIdx;
}
return patternLength != 0 && strLength != 0 && patternIdx == patternLength;
}
/// <summary>
/// Does a fuzzy search for a pattern within a string, and gives the search a score on how well it matched.
/// </summary>
/// <param name="stringToSearch">The string to search for the pattern in.</param>
/// <param name="pattern">The pattern to search for in the string.</param>
/// <param name="outScore">The score which this search received, if a match was found.</param>
/// <returns>true if each character in pattern is found sequentially within stringToSearch; otherwise, false.</returns>
public static bool FuzzyMatch(string stringToSearch, string pattern, out int outScore)
{
// Score consts
const int adjacencyBonus = 5; // bonus for adjacent matches
const int separatorBonus = 10; // bonus if match occurs after a separator
const int camelBonus = 10; // bonus if match is uppercase and prev is lower
const int leadingLetterPenalty = -3; // penalty applied for every letter in stringToSearch before the first match
const int maxLeadingLetterPenalty = -9; // maximum penalty for leading letters
const int unmatchedLetterPenalty = -1; // penalty for every letter that doesn't matter
// Loop variables
var score = 0;
var patternIdx = 0;
var patternLength = pattern.Length;
var strIdx = 0;
var strLength = stringToSearch.Length;
var prevMatched = false;
var prevLower = false;
var prevSeparator = true; // true if first letter match gets separator bonus
// Use "best" matched letter if multiple string letters match the pattern
char? bestLetter = null;
char? bestLower = null;
int? bestLetterIdx = null;
var bestLetterScore = 0;
var matchedIndices = new List<int>();
// Loop over strings
while (strIdx != strLength)
{
var patternChar = patternIdx != patternLength ? pattern[patternIdx] as char? : null;
var strChar = stringToSearch[strIdx];
var patternLower = patternChar != null ? char.ToLower((char)patternChar) as char? : null;
var strLower = char.ToLower(strChar);
var strUpper = char.ToUpper(strChar);
var nextMatch = patternChar != null && patternLower == strLower;
var rematch = bestLetter != null && bestLower == strLower;
var advanced = nextMatch && bestLetter != null;
var patternRepeat = bestLetter != null && patternChar != null && bestLower == patternLower;
if (advanced || patternRepeat)
{
score += bestLetterScore;
matchedIndices.Add((int)bestLetterIdx);
bestLetter = null;
bestLower = null;
bestLetterIdx = null;
bestLetterScore = 0;
}
if (nextMatch || rematch)
{
var newScore = 0;
// Apply penalty for each letter before the first pattern match
// Note: Math.Max because penalties are negative values. So max is smallest penalty.
if (patternIdx == 0)
{
var penalty = Math.Max(strIdx * leadingLetterPenalty, maxLeadingLetterPenalty);
score += penalty;
}
// Apply bonus for consecutive bonuses
if (prevMatched)
newScore += adjacencyBonus;
// Apply bonus for matches after a separator
if (prevSeparator)
newScore += separatorBonus;
// Apply bonus across camel case boundaries. Includes "clever" isLetter check.
if (prevLower && strChar == strUpper && strLower != strUpper)
newScore += camelBonus;
// Update pattern index IF the next pattern letter was matched
if (nextMatch)
++patternIdx;
// Update best letter in stringToSearch which may be for a "next" letter or a "rematch"
if (newScore >= bestLetterScore)
{
// Apply penalty for now skipped letter
if (bestLetter != null)
score += unmatchedLetterPenalty;
bestLetter = strChar;
bestLower = char.ToLower((char)bestLetter);
bestLetterIdx = strIdx;
bestLetterScore = newScore;
}
prevMatched = true;
}
else
{
score += unmatchedLetterPenalty;
prevMatched = false;
}
// Includes "clever" isLetter check.
prevLower = strChar == strLower && strLower != strUpper;
prevSeparator = strChar == '_' || strChar == ' ';
++strIdx;
}
// Apply score for last match
if (bestLetter != null)
{
score += bestLetterScore;
matchedIndices.Add((int)bestLetterIdx);
}
outScore = score;
return patternIdx == patternLength;
}
}
@CDillinger

This comment has been minimized.

Copy link
Owner Author

commented Apr 15, 2016

This was created after reading Forrest Smith's blog post on reverse engineering Sublime Text's Fuzzy Match. Forrest originally wrote a C++ and JavaScript implementation which can be found in this repository. Links to implementations written by other users in different languages can be found linked in the repository's readme as well.

These are the signatures of the functions included in this code:

public static bool FuzzyMatch(string stringToSearch, string pattern);
public static bool FuzzyMatch(string stringToSearch, string pattern, out int outScore);

If you would like to change these methods to extension methods, just add a this keyword before the first parameter:

public static bool FuzzyMatch(this string stringToSearch, string pattern);
public static bool FuzzyMatch(this string stringToSearch, string pattern, out int outScore);
@jrkd

This comment has been minimized.

Copy link

commented Oct 14, 2016

Hey CDillinger,

Many thanks for this. don't suppose you have any performance comparisons against the C++ version?

Jono

@tajmone

This comment has been minimized.

Copy link

commented Jul 19, 2019

Hi @CDillinger,

just wanted to let you know that I've added your C# port to a new project built around Forrest Smith's FTS Fuzzy Match:

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.