Last active
November 8, 2022 19:23
-
-
Save KBeDevel/25d608dfe32417067be337ab29a0c3a5 to your computer and use it in GitHub Desktop.
Similarity calculator for string inputs implemented in TypeScript
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Get the similarity percentage between two strings. | |
* Based on @link{https://gist.github.com/sumn2u/0e0b5d9505ad096284928a987ace13fb#file-jaro-wrinker-js} | |
*/ | |
export const calculateStringsSimilarity = ( | |
firstString: string, | |
secondString: string, | |
config?: { | |
/** | |
* If true, the function will return the similarity percentage as a float number between 0 and 1. | |
* If false, the function will return the similarity percentage as an float between 0 and 100. | |
*/ | |
asRatio?: boolean | |
} | |
) => { | |
let matchesFound = 0 | |
if (firstString.trim() === secondString.trim()) return config?.asRatio ? 1 : 100 | |
const range = | |
Math.floor(Math.max(firstString.length, secondString.length) / 2) - 1 | |
const matchesInFirstString = new Array(firstString.length) | |
const matchesInSecondString = new Array(secondString.length) | |
new Array(firstString.length) | |
.fill(null) | |
.forEach((_, firstStringCharIndex) => { | |
const high = | |
firstStringCharIndex + range <= secondString.length | |
? firstStringCharIndex + range | |
: secondString.length - 1 | |
let low = firstStringCharIndex >= range ? firstStringCharIndex - range : 0 | |
while (low <= high) { | |
if ( | |
!matchesInFirstString[firstStringCharIndex] && | |
!matchesInSecondString[low] && | |
firstString.charAt(firstStringCharIndex) === secondString.charAt(low) | |
) { | |
++matchesFound | |
matchesInFirstString[firstStringCharIndex] = matchesInSecondString[ | |
low | |
] = true | |
low = high | |
} | |
low++ | |
} | |
}) | |
if (matchesFound === 0) return 0 | |
let transpositionsCounterIndex = 0, | |
transpositions = 0 | |
new Array(firstString.length) | |
.fill(null) | |
.forEach((_, firstStringCharIndex) => { | |
if (matchesInFirstString[firstStringCharIndex]) { | |
while (transpositionsCounterIndex < secondString.length) { | |
if (matchesInSecondString[transpositionsCounterIndex]) { | |
transpositionsCounterIndex += 1 | |
break | |
} | |
if ( | |
firstString.charAt(firstStringCharIndex) !== | |
secondString.charAt(transpositionsCounterIndex) | |
) { | |
++transpositions | |
} | |
transpositionsCounterIndex++ | |
} | |
} | |
}) | |
let weight = | |
(matchesFound / firstString.length + | |
matchesFound / secondString.length + | |
(matchesFound - transpositions / 2) / matchesFound) / | |
3 | |
let lengthPrefix = 0 | |
const scoreScalingFactor = 0.1 | |
if (weight > 0.7) { | |
while ( | |
firstString[lengthPrefix] === secondString[lengthPrefix] && | |
lengthPrefix < 4 | |
) | |
++lengthPrefix | |
weight = weight + lengthPrefix * scoreScalingFactor * (1 - weight) | |
} | |
if (config?.asRatio) { | |
return weight | |
} | |
return weight * 100 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment