Varriount/fuzzy.nim Secret

## fuzzy.nim
# A Fuzzy Match implementation inspired by the sublime text fuzzy match algorithm
# as described here: https://blog.forrestthewoods.com/reverse-engineering-sublime-text-s-fuzzy-match-4cffeed33fdb
# Heavily modified to provide more subjectively useful results
# for on the Nim manual.
#
import strutils
import math
import macros


const
  MaxUnmatchedLeadingChar = 3
  ## Maximum number of times the penalty for unmatched leading chars is applied.

  HeadingScaleFactor = 0.5
  ## The score from before the colon Char is multiplied by this.
  ## This is to weight function signatures and descriptions over module titles.


type
  ScoreCard = enum
    StartMatch           = -100 ## Start matching.
    LeadingCharDiff      = -3   ## An unmatched, leading character was found.
    CharDiff             = -1   ## An unmatched character was found.
    CharMatch            = 0    ## A matched character was found.
    ConsecutiveMatch     = 5    ## A consecutive match was found.
    LeadingCharMatch     = 10   ## The character matches the begining of the
                                ## string or the first character of a word
                                ## or camel case boundry.
    WordBoundryMatch     = 20   ## The last ConsecutiveCharMatch that
                                ## immediately precedes the end of the string,
                                ## end of the pattern, or a LeadingCharMatch.


proc fuzzyMatch*(pattern, str: cstring) : tuple[score: int, matched: bool] =
  var
    scoreState = StartMatch
    headerMatched = false
    unmatchedLeadingCharCount = 0
    consecutiveMatchCount = 0
    strIndex = 0
    patIndex = 0
    score = 0

  template transition(nextState) =
    scoreState = nextState
    score += ord(scoreState)

  while (strIndex < str.len) and (patIndex < pattern.len):
    var
      patternChar = pattern[patIndex].toLowerAscii
      strChar     = str[strIndex].toLowerAscii

    # Ignore certain characters
    if patternChar in {'_', ' ', '.'}:
      patIndex += 1
      continue
    if strChar in {'_', ' ', '.'}:
      strIndex += 1
      continue

    # Since this algorithm will be used to search against Nim documentation,
    # the below logic prioritizes headers.
    if not headerMatched and strChar == ':':
      headerMatched = true
      scoreState = StartMatch
      score = toInt(floor(HeadingScaleFactor * toFloat(score)))
      patIndex = 0
      strIndex += 1
      continue

    if strChar == patternChar:
      case scoreState
      of StartMatch, WordBoundryMatch:
        scoreState = LeadingCharMatch

      of CharMatch:
        transition(ConsecutiveMatch)

      of LeadingCharMatch, ConsecutiveMatch:
        consecutiveMatchCount += 1
        scoreState = ConsecutiveMatch
        score += ord(ConsecutiveMatch) * consecutiveMatchCount

        if scoreState == LeadingCharMatch:
          score += ord(LeadingCharMatch)

        var onBoundary = (patIndex == high(pattern))
        if not onBoundary:
          let
            nextPatternChar = toLowerAscii(pattern[patIndex + 1])
            nextStrChar     = toLowerAscii(str[strIndex + 1])

          onBoundary = (
            nextStrChar notin {'a'..'z'} and
            nextStrChar != nextPatternChar
          )

        if onBoundary:
          transition(WordBoundryMatch)

      of CharDiff, LeadingCharDiff:
        var isLeadingChar = (
          str[strIndex - 1] notin Letters or
          str[strIndex - 1] in {'a'..'z'} and
          str[strIndex] in {'A'..'Z'}
        )

        if isLeadingChar:
          scoreState = LeadingCharMatch
          #a non alpha or a camel case transition counts as a leading char.
          # Transition the state, but don't give the bonus yet; wait until we verify a consecutive match.
        else:
          transition(CharMatch)
      patIndex += 1

    else:
      case scoreState
      of StartMatch:
        transition(LeadingCharDiff)

      of ConsecutiveMatch:
        transition(CharDiff)
        consecutiveMatchCount = 0

      of LeadingCharDiff:
        if unmatchedLeadingCharCount < MaxUnmatchedLeadingChar:
          transition(LeadingCharDiff)
        unmatchedLeadingCharCount += 1

      else:
        transition(CharDiff)

    strIndex += 1

  result = (
    score:   max(0, score),
    matched: (score > 0),
  )
	# A Fuzzy Match implementation inspired by the sublime text fuzzy match algorithm
	# as described here: https://blog.forrestthewoods.com/reverse-engineering-sublime-text-s-fuzzy-match-4cffeed33fdb
	# Heavily modified to provide more subjectively useful results
	# for on the Nim manual.
	#
	import strutils
	import math
	import macros


	const
	MaxUnmatchedLeadingChar = 3
	## Maximum number of times the penalty for unmatched leading chars is applied.

	HeadingScaleFactor = 0.5
	## The score from before the colon Char is multiplied by this.
	## This is to weight function signatures and descriptions over module titles.


	type
	ScoreCard = enum
	StartMatch = -100 ## Start matching.
	LeadingCharDiff = -3 ## An unmatched, leading character was found.
	CharDiff = -1 ## An unmatched character was found.
	CharMatch = 0 ## A matched character was found.
	ConsecutiveMatch = 5 ## A consecutive match was found.
	LeadingCharMatch = 10 ## The character matches the begining of the
	## string or the first character of a word
	## or camel case boundry.
	WordBoundryMatch = 20 ## The last ConsecutiveCharMatch that
	## immediately precedes the end of the string,
	## end of the pattern, or a LeadingCharMatch.


	proc fuzzyMatch*(pattern, str: cstring) : tuple[score: int, matched: bool] =
	var
	scoreState = StartMatch
	headerMatched = false
	unmatchedLeadingCharCount = 0
	consecutiveMatchCount = 0
	strIndex = 0
	patIndex = 0
	score = 0

	template transition(nextState) =
	scoreState = nextState
	score += ord(scoreState)

	while (strIndex < str.len) and (patIndex < pattern.len):
	var
	patternChar = pattern[patIndex].toLowerAscii
	strChar = str[strIndex].toLowerAscii

	# Ignore certain characters
	if patternChar in {'_', ' ', '.'}:
	patIndex += 1
	continue
	if strChar in {'_', ' ', '.'}:
	strIndex += 1
	continue

	# Since this algorithm will be used to search against Nim documentation,
	# the below logic prioritizes headers.
	if not headerMatched and strChar == ':':
	headerMatched = true
	scoreState = StartMatch
	score = toInt(floor(HeadingScaleFactor * toFloat(score)))
	patIndex = 0
	strIndex += 1
	continue

	if strChar == patternChar:
	case scoreState
	of StartMatch, WordBoundryMatch:
	scoreState = LeadingCharMatch

	of CharMatch:
	transition(ConsecutiveMatch)

	of LeadingCharMatch, ConsecutiveMatch:
	consecutiveMatchCount += 1
	scoreState = ConsecutiveMatch
	score += ord(ConsecutiveMatch) * consecutiveMatchCount

	if scoreState == LeadingCharMatch:
	score += ord(LeadingCharMatch)

	var onBoundary = (patIndex == high(pattern))
	if not onBoundary:
	let
	nextPatternChar = toLowerAscii(pattern[patIndex + 1])
	nextStrChar = toLowerAscii(str[strIndex + 1])

	onBoundary = (
	nextStrChar notin {'a'..'z'} and
	nextStrChar != nextPatternChar
	)

	if onBoundary:
	transition(WordBoundryMatch)

	of CharDiff, LeadingCharDiff:
	var isLeadingChar = (
	str[strIndex - 1] notin Letters or
	str[strIndex - 1] in {'a'..'z'} and
	str[strIndex] in {'A'..'Z'}
	)

	if isLeadingChar:
	scoreState = LeadingCharMatch
	#a non alpha or a camel case transition counts as a leading char.
	# Transition the state, but don't give the bonus yet; wait until we verify a consecutive match.
	else:
	transition(CharMatch)
	patIndex += 1

	else:
	case scoreState
	of StartMatch:
	transition(LeadingCharDiff)

	of ConsecutiveMatch:
	transition(CharDiff)
	consecutiveMatchCount = 0

	of LeadingCharDiff:
	if unmatchedLeadingCharCount < MaxUnmatchedLeadingChar:
	transition(LeadingCharDiff)
	unmatchedLeadingCharCount += 1

	else:
	transition(CharDiff)

	strIndex += 1

	result = (
	score: max(0, score),
	matched: (score > 0),
	)