Last active
September 5, 2016 16:58
-
-
Save fsschmitt/fdb4ec07b05ef948e2d347fade402192 to your computer and use it in GitHub Desktop.
SimilaryUtils Library, compares two strings and based on the Sørensen–Dice coefficient it calculates the % of similarity between both strings.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* SimilarityUtils Library | |
* Compares two strings and based on the Sørensen–Dice coefficient it calculates the % of similarity between both strings. | |
* | |
* Usage: | |
* SimilarityUtils.compare("string","stringg").toFixed(2); | |
***/ | |
var SimilarityUtils = { | |
/** | |
* | |
* Get Bigram of a word, e.g. "String", translates into: ["st","tr","ri","in","ng"] | |
* | |
***/ | |
bigramSet : function(string1){ | |
var arr = []; | |
if(string1.length > 1){ | |
for (var index = 1; index < string1.length; index++) { | |
arr.push(string1.substring(index -1, index + 1)); | |
} | |
} | |
else{ | |
arr.push(string1); | |
} | |
return arr; | |
}, | |
/** | |
* | |
* Get Number of direct index matches in two arrays | |
* | |
***/ | |
numElementMatch : function(arr1, arr2){ | |
var numOfMatches = 0; | |
var short = (arr1.length < arr2.length)? arr1 : arr2; | |
for(var index = 0; index < short.length; index++){ | |
if(arr1[index] == arr2[index]){ | |
numOfMatches++; | |
} | |
} | |
return numOfMatches; | |
}, | |
/** | |
* | |
* Calculate percentage of similarity in two words based on the Sørensen–Dice coefficient | |
* | |
***/ | |
compare : function(string1, string2){ | |
var arr1 = this.bigramSet(string1); | |
var arr2 = this.bigramSet(string2); | |
var numOfMatches = this.numElementMatch(arr1,arr2); | |
return (2*numOfMatches) / (arr1.length + arr2.length); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment