Skip to content

Instantly share code, notes, and snippets.

@Joopmicroop
Last active June 1, 2018 16:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Joopmicroop/770ab436806da9476c9ced0d0c91960e to your computer and use it in GitHub Desktop.
Save Joopmicroop/770ab436806da9476c9ced0d0c91960e to your computer and use it in GitHub Desktop.
compare sentences / equality fitness function
function calculateStringFitness(str0,str1,opt){
function countMatches(a0, a1){
var matches, pos0, pos1, matchObj;
matches = pos0 = pos1 = 0;
matchObj = {};
a0 = a0.sort();
a1 = a1.sort();
function compareToSortOrder( a, b ) {
if(a===b) return 0;
return ([a,b].sort()[0]===a) ? -1 : 1;
}
do {
var compare = compareToSortOrder( a0[pos0], a1[pos1] );
if (compare == 0) {
matchObj[a0[pos0]] = (matchObj[a0[pos0]]) ? matchObj[a0[pos0]]+1 : 1;
matches++; pos0++; pos1++;
}
else if (compare < 0) pos0++;
else if (compare > 0) pos1++;
} while ( pos0 < a0.length && pos1 < a1.length );
return {
matchCount:matches,
matches:matchObj
}
}
function sorensenDice(str0, str1, opt){
if(opt && opt.caseSensitive == false){
str0 = str0.toLowerCase();
str1 = str1.toLowerCase();
}
var splitChar = (opt && opt.split == 'word') ? ' ':'';
var a0 = str0.split(splitChar);
var a1 = str1.split(splitChar);
var match = countMatches(a0, a1);
var dsc = (2.0*(match.matchCount))/(a0.length+a1.length);
return {
matchCount: match.matchCount,
matches: match.matches,
simFactor: dsc
};
}
// calculate sorensenDice aka match coefficient
// https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
var sd = sorensenDice(str0,str1,opt);
// calculate jaccardIndex from sorensenDice aka overlap coefficient
// https://en.wikipedia.org/wiki/Jaccard_index
var ji = sd.simFactor/(2-sd.simFactor);
return {
matchCount: sd.matchCount,
matches: sd.matches,
sorensenDice: sd.simFactor,
jaccardIndex: ji
};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment