Skip to content

Instantly share code, notes, and snippets.

@Boorj
Created November 6, 2018 13:19
Show Gist options
  • Save Boorj/f6075ea3b996dbd033c98f8120b9e058 to your computer and use it in GitHub Desktop.
Save Boorj/f6075ea3b996dbd033c98f8120b9e058 to your computer and use it in GitHub Desktop.
Excel trigram (3 ngram) comparison function
Function preg_replace(text, strPattern As String, strReplace As String)
Dim txt As String: txt = text
Set regEx = CreateObject("VBScript.RegExp")
With regEx
.Global = True
.MultiLine = False
.IgnoreCase = False
.Pattern = strPattern
End With
If Len(strPattern) > 0 And regEx.Test(txt) Then
txt = regEx.Replace(txt, strReplace)
End If
preg_replace = txt
End Function
Function TRIGRAM(str)
str__ = "__" & str & "__"
Dim trigrams As String: trigrams = ""
For i = 1 To Len(str) + 2
trigrams = trigrams & " " & Mid(str__, i, 3)
Next i
TRIGRAM = trigrams
End Function
Function TRIGRAM_COMPARE(str1_ As String, str2_ As String)
str1 = TRIGRAM(preg_replace(LCase(str1_), "[(). ,/\\+*\-""']+", ""))
str2 = TRIGRAM(preg_replace(LCase(str2_), "[(). ,/\\+*\-""']+", ""))
str1split = Split(Trim(str1), " ")
str2trunc = str2
For i = 0 To UBound(str1split, 1)
str2trunc = Replace(str2trunc, " " & str1split(i), "", 1, 1)
Next
trigramsCount = (Len(str2) / 4)
TRIGRAM_COMPARE = (Len(str2trunc) / 4) & "/" & (trigramsCount - 2)
End Function
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment