Created
February 14, 2018 10:27
-
-
Save Tusko/dd57ce6777984c3751a51f4836e0e09e to your computer and use it in GitHub Desktop.
ClosestWord.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var threshold = 3; | |
function distancer(s1, s2) { | |
// sift3: http://siderite.blogspot.com/2007/04/super-fast-and-accurate-string-distance.html | |
if (s1 == null || s1.length === 0) { | |
if (s2 == null || s2.length === 0) { | |
return 0; | |
} else { | |
return s2.length; | |
} | |
} | |
if (s2 == null || s2.length === 0) { | |
return s1.length; | |
} | |
var c = 0; | |
var offset1 = 0; | |
var offset2 = 0; | |
var lcs = 0; | |
var maxOffset = 5; | |
while ((c + offset1 < s1.length) && (c + offset2 < s2.length)) { | |
if (s1.charAt(c + offset1) == s2.charAt(c + offset2)) { | |
lcs++; | |
} else { | |
offset1 = 0; | |
offset2 = 0; | |
for (var i = 0; i < maxOffset; i++) { | |
if ((c + i < s1.length) && (s1.charAt(c + i) == s2.charAt(c))) { | |
offset1 = i; | |
break; | |
} | |
if ((c + i < s2.length) && (s1.charAt(c) == s2.charAt(c + i))) { | |
offset2 = i; | |
break; | |
} | |
} | |
} | |
c++; | |
} | |
return (s1.length + s2.length) /2 - lcs; | |
} | |
function closestWord(string, matches) { | |
if (!string || !matches) return false; | |
var distance, match; | |
for (var i = 0, candidate; candidate = matches[i]; i++) { | |
if (string === candidate) return string; | |
var measurement = distancer(string, candidate); | |
if (!distance || measurement < distance) { | |
distance = measurement; | |
match = candidate; | |
} | |
} | |
if (distance > threshold) return false; | |
return match; | |
} | |
/* | |
Usage: | |
closestWord('dag', ['dog', 'cat', 'bird']) //return dog | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment