Skip to content

Instantly share code, notes, and snippets.

@Techcable
Last active May 7, 2019 22:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save Techcable/5b8729666eaef878534ea490e624ad84 to your computer and use it in GitHub Desktop.
Save Techcable/5b8729666eaef878534ea490e624ad84 to your computer and use it in GitHub Desktop.
Javascript Damerau Levenshtein Similarity metrics
function damerau_levenshtein(first, second) {
"use strict";
// TODO: Support non-BMP characters (like that's ever going to happen)
if (first == second) return 0;
var firstLen = first.length;
var secondLen = second.length;
if (firstLen == 0) return secondLen;
if (secondLen == 0) return firstLen;
var distances = [];
for (var i = 0; i < firstLen + 2; i++) {
distances.push(Array(secondLen + 2).fill(0));
}
const maxDistance = firstLen + secondLen;
distances[0][0] = maxDistance;
for (var i = 0; i < firstLen + 1; i++) {
distances[i + 1][0] = maxDistance;
distances[i + 1][1] = i;
}
for (var j = 0; j < secondLen + 1; j++) {
distances[0][j + 1] = maxDistance;
distances[1][j + 1] = j;
}
var chars = new Map();
for (var i = 1; i < firstLen + 1; i++) {
var db = 0;
for (var j = 1; j < secondLen + 1; j++) {
var k = chars.get(second.charAt(j - 1));
if (typeof k == 'undefined') {
k = 0;
}
const l = db;
var cost = 1;
if (first[i - 1] == second[j - 1]) {
cost = 0;
db = j;
}
const substitutionCost = distances[i][j] + cost;
const insertionCost = distances[i][j + 1] + 1;
const deletionCost = distances[i + 1][j] + 1;
const transpositionCost = distances[k][l] +
(i - k -1) + 1 + (j - l - 1);
distances[i + 1][j + 1] = Math.min(
substitutionCost,
insertionCost,
deletionCost,
transpositionCost
);
}
chars.set(first[i - 1], i);
}
return distances[firstLen + 1][secondLen + 1];
}
console.assert(damerau_levenshtein("abcdefghijkl", "bacedfgihjlk") == 4);
console.assert(damerau_levenshtein("ab", "a") == 1);
console.assert(damerau_levenshtein("The quick brown fox jumped over the angry dog.", "Lehem ipsum dolor sit amet, dicta latine an eam.") == 36)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment