-
-
Save Varriount/ba8d95461e23762d2683426b83cc90b6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A Fuzzy Match implementation inspired by the sublime text fuzzy match algorithm | |
# as described here: https://blog.forrestthewoods.com/reverse-engineering-sublime-text-s-fuzzy-match-4cffeed33fdb | |
# Heavily modified to provide more subjectively useful results | |
# for on the Nim manual. | |
# | |
import strutils | |
import math | |
import macros | |
const | |
MaxUnmatchedLeadingChar = 3 | |
## Maximum number of times the penalty for unmatched leading chars is applied. | |
HeadingScaleFactor = 0.5 | |
## The score from before the colon Char is multiplied by this. | |
## This is to weight function signatures and descriptions over module titles. | |
type | |
ScoreCard = enum | |
StartMatch = -100 ## Start matching. | |
LeadingCharDiff = -3 ## An unmatched, leading character was found. | |
CharDiff = -1 ## An unmatched character was found. | |
CharMatch = 0 ## A matched character was found. | |
ConsecutiveMatch = 5 ## A consecutive match was found. | |
LeadingCharMatch = 10 ## The character matches the begining of the | |
## string or the first character of a word | |
## or camel case boundry. | |
WordBoundryMatch = 20 ## The last ConsecutiveCharMatch that | |
## immediately precedes the end of the string, | |
## end of the pattern, or a LeadingCharMatch. | |
proc fuzzyMatch*(pattern, str: cstring) : tuple[score: int, matched: bool] = | |
var | |
scoreState = StartMatch | |
headerMatched = false | |
unmatchedLeadingCharCount = 0 | |
consecutiveMatchCount = 0 | |
strIndex = 0 | |
patIndex = 0 | |
score = 0 | |
template transition(nextState) = | |
scoreState = nextState | |
score += ord(scoreState) | |
while (strIndex < str.len) and (patIndex < pattern.len): | |
var | |
patternChar = pattern[patIndex].toLowerAscii | |
strChar = str[strIndex].toLowerAscii | |
# Ignore certain characters | |
if patternChar in {'_', ' ', '.'}: | |
patIndex += 1 | |
continue | |
if strChar in {'_', ' ', '.'}: | |
strIndex += 1 | |
continue | |
# Since this algorithm will be used to search against Nim documentation, | |
# the below logic prioritizes headers. | |
if not headerMatched and strChar == ':': | |
headerMatched = true | |
scoreState = StartMatch | |
score = toInt(floor(HeadingScaleFactor * toFloat(score))) | |
patIndex = 0 | |
strIndex += 1 | |
continue | |
if strChar == patternChar: | |
case scoreState | |
of StartMatch, WordBoundryMatch: | |
scoreState = LeadingCharMatch | |
of CharMatch: | |
transition(ConsecutiveMatch) | |
of LeadingCharMatch, ConsecutiveMatch: | |
consecutiveMatchCount += 1 | |
scoreState = ConsecutiveMatch | |
score += ord(ConsecutiveMatch) * consecutiveMatchCount | |
if scoreState == LeadingCharMatch: | |
score += ord(LeadingCharMatch) | |
var onBoundary = (patIndex == high(pattern)) | |
if not onBoundary: | |
let | |
nextPatternChar = toLowerAscii(pattern[patIndex + 1]) | |
nextStrChar = toLowerAscii(str[strIndex + 1]) | |
onBoundary = ( | |
nextStrChar notin {'a'..'z'} and | |
nextStrChar != nextPatternChar | |
) | |
if onBoundary: | |
transition(WordBoundryMatch) | |
of CharDiff, LeadingCharDiff: | |
var isLeadingChar = ( | |
str[strIndex - 1] notin Letters or | |
str[strIndex - 1] in {'a'..'z'} and | |
str[strIndex] in {'A'..'Z'} | |
) | |
if isLeadingChar: | |
scoreState = LeadingCharMatch | |
#a non alpha or a camel case transition counts as a leading char. | |
# Transition the state, but don't give the bonus yet; wait until we verify a consecutive match. | |
else: | |
transition(CharMatch) | |
patIndex += 1 | |
else: | |
case scoreState | |
of StartMatch: | |
transition(LeadingCharDiff) | |
of ConsecutiveMatch: | |
transition(CharDiff) | |
consecutiveMatchCount = 0 | |
of LeadingCharDiff: | |
if unmatchedLeadingCharCount < MaxUnmatchedLeadingChar: | |
transition(LeadingCharDiff) | |
unmatchedLeadingCharCount += 1 | |
else: | |
transition(CharDiff) | |
strIndex += 1 | |
result = ( | |
score: max(0, score), | |
matched: (score > 0), | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment