Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Levenshtein distance between two given strings implemented in JavaScript and usable as a Node.js module
/*
Copyright (c) 2011 Andrei Mackenzie
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
// Compute the edit distance between the two given strings
exports.getEditDistance = function(a, b){
if(a.length == 0) return b.length;
if(b.length == 0) return a.length;
var matrix = [];
// increment along the first column of each row
var i;
for(i = 0; i <= b.length; i++){
matrix[i] = [i];
}
// increment each column in the first row
var j;
for(j = 0; j <= a.length; j++){
matrix[0][j] = j;
}
// Fill in the rest of the matrix
for(i = 1; i <= b.length; i++){
for(j = 1; j <= a.length; j++){
if(b.charAt(i-1) == a.charAt(j-1)){
matrix[i][j] = matrix[i-1][j-1];
} else {
matrix[i][j] = Math.min(matrix[i-1][j-1] + 1, // substitution
Math.min(matrix[i][j-1] + 1, // insertion
matrix[i-1][j] + 1)); // deletion
}
}
}
return matrix[b.length][a.length];
};
mohsen1 commented Oct 14, 2012

A faster implementation

function levenshteinDistance (s, t) {
    if (!s.length) return t.length;
    if (!t.length) return s.length;

    return Math.min(
        levenshteinDistance(s.substr(1), t) + 1,
        levenshteinDistance(t.substr(1), s) + 1,
        levenshteinDistance(s.substr(1), t.substr(1)) + (s[0] !== t[0] ? 1 : 0)
    ) + 1;
}
mohsen1 commented Oct 14, 2012

A faster implementation

function levenshteinDistance (s, t) {
    if (!s.length) return t.length;
    if (!t.length) return s.length;

    return Math.min(
        levenshteinDistance(s.substr(1), t) + 1,
        levenshteinDistance(t.substr(1), s) + 1,
        levenshteinDistance(s.substr(1), t.substr(1)) + (s[0] !== t[0] ? 1 : 0)
    ) + 1;
}
canassa commented Oct 16, 2012

@mohsen1

Your implementation is not faster: http://jsperf.com/levenshtein-distance-2

Owner

Someone asked me to be explicit about how this code is licensed, so I added an MIT license header.

cowdude commented Mar 6, 2013

Thanks for the implementation. Does wonders, exactly what I needed for my NodeJS script.

Sorry if this is dumb question, but what exactly is the use case?

Owner

@dotnetCarpenter I've used this as part of a simple suggestion system. It works by calculating edit distances between some input string and all known acceptable inputs. If the edit distance of any is within an acceptable range, the system can offer a suggestion - e.g. input 'JvaaScript' - "Did you mean 'JavaScript'?"

anfurny commented Mar 22, 2015

Also mhosen's implementation is incorrect. Provides distance between "house" and "house" as 5, should be 0.

achatha commented Aug 28, 2015

mhosen's implementation is actually computing hamming distance and NOT Levenshtein.

This is great. Thanks!

If I use this in my own MIT licensed github project... then what do I need to do to attribute the code?

bdelespierre commented Jun 8, 2016 edited

the exact same algorithm, condensed for your courtesy:

String.prototype.levenstein = function(string) {
    var a = this, b = string + "", m = [], i, j, min = Math.min;

    if (!(a && b)) return (b || a).length;

    for (i = 0; i <= b.length; m[i] = [i++]);
    for (j = 0; j <= a.length; m[0][j] = j++);

    for (i = 1; i <= b.length; i++) {
        for (j = 1; j <= a.length; j++) {
            m[i][j] = b.charAt(i - 1) == a.charAt(j - 1)
                ? m[i - 1][j - 1]
                : m[i][j] = min(
                    m[i - 1][j - 1] + 1, 
                    min(m[i][j - 1] + 1, m[i - 1 ][j]))
        }
    }

    return m[b.length][a.length];
}

@bdelespierre - Thanks - that is a useful snippet, although there is a slight mistake when compared with the original -- missed the + 1 for the deletion case -- here is your snippet updated with that small fix

String.prototype.levenstein = function(string) {
    var a = this, b = string + "", m = [], i, j, min = Math.min;

    if (!(a && b)) return (b || a).length;

    for (i = 0; i <= b.length; m[i] = [i++]);
    for (j = 0; j <= a.length; m[0][j] = j++);

    for (i = 1; i <= b.length; i++) {
        for (j = 1; j <= a.length; j++) {
            m[i][j] = b.charAt(i - 1) == a.charAt(j - 1)
                ? m[i - 1][j - 1]
                : m[i][j] = min(
                    m[i - 1][j - 1] + 1, 
                    min(m[i][j - 1] + 1, m[i - 1 ][j] + 1))
        }
    }

    return m[b.length][a.length];
}
nahidakbar commented Jun 16, 2016 edited

Cache matrix for even faster performance.

Thanks for sharing :)

rd4k1 commented Aug 19, 2016 edited

Having four loops was really bugging me.
Also, Math.min can take more than two numbers

var levenshtein = function(a, b){
    if(!a || !b) return (a || b).length;
    var m = [];
    for(var i = 0; i <= b.length; i++){
        m[i] = [i];
        if(i === 0) continue;
        for(var j = 0; j <= a.length; j++){
            m[0][j] = j;
            if(j === 0) continue;
            m[i][j] = b.charAt(i - 1) == a.charAt(j - 1) ? m[i - 1][j - 1] : Math.min(
                m[i-1][j-1] + 1,
                m[i][j-1] + 1,
                m[i-1][j] + 1
            );
        }
    }
    return m[b.length][a.length];
};

Here a version that only needs O(min(m,n)) memory, instead of the original's O(m*n):

var levenshtein = function(a, b) {
  if(a.length == 0) return b.length; 
  if(b.length == 0) return a.length;

  // swap to save some memory O(min(a,b)) instead of O(a)
  if(a.length > b.length) {
    var tmp = a;
    a = b;
    b = tmp;
  }

  var row = [];
  // init the row
  for(var i = 0; i <= a.length; i++){
    row[i] = i;
  }

  // fill in the rest
  for(var i = 1; i <= b.length; i++){
    var prev = i;
    for(var j = 1; j <= a.length; j++){
      var val;
      if(b.charAt(i-1) == a.charAt(j-1)){
        val = row[j-1]; // match
      } else {
        val = Math.min(row[j-1] + 1, // substitution
                       prev + 1,     // insertion
                       row[j] + 1);  // deletion
      }
      row[j - 1] = prev;
      prev = val;
    }
    row[a.length] = prev;
  }

  return row[a.length];
}

Runtime should be the same. Code is also licensed as MIT, same as OP.

kigiri commented Nov 26, 2016

I did some test and it turns out caching Math.min or passing it more than 2 arguments was a huge preformance loss (60% slower on V8)
My guess is that v8 has some highly optimised Math.min that takes only 2 args and he isn't detecting it if we don't call min from Math.
Caching the .length of the strings was actualy slower, == vs === made no significative differences.

so after some iterations this was the fastests i was able to get :

const levenshtein = (a, b) => {
  if (a.length === 0) return b.length
  if (b.length === 0) return a.length
  let tmp, i, j, prev, val
  // swap to save some memory O(min(a,b)) instead of O(a)
  if (a.length > b.length) {
    tmp = a
    a = b
    b = tmp
  }

  row = Array(a.length + 1)
  // init the row
  for (i = 0; i <= a.length; i++) {
    row[i] = i
  }

  // fill in the rest
  for (i = 1; i <= b.length; i++) {
    prev = i
    for (j = 1; j <= a.length; j++) {
      if (b[i-1] === a[j-1]) {
        val = row[j-1] // match
      } else {
        val = Math.min(row[j-1] + 1, // substitution
              Math.min(prev + 1,     // insertion
                       row[j] + 1))  // deletion
      }
      row[j - 1] = prev
      prev = val
    }
    row[a.length] = prev
  }
  return row[a.length]
}

Which is mainly just milto-mirdita version with the 2 Math.min, hoisting the variables, and fixed length array Array(a.length + 1)
wierdly enough it does a significative difference.
still MIT

rksm commented Jan 14, 2017

@kigiri

row isn't declared.

I suggest let tmp, i, j, prev, val => let tmp, i, j, prev, val, row;

1db8k commented Feb 9, 2017

Here is the jsperf test of @kigiri's 50% faster solution with @rksm's row variable declaration.

DerekZiemba commented Apr 13, 2017 edited

Here's a JSBench of almost every implementation on this post. I managed to cut another 5% off kigiri's fastest in everything except firefox.
https://jsperf.com/levenshtein-distance-bench/1

Below is the fastest. It's based off of kirgiri's fastest but improves it by caching array lengths and reducing and reusing variables where possible.

function dziemba_levenshtein(a, b){
	var tmp;
	if (a.length === 0) { return b.length; }
	if (b.length === 0) { return a.length; }
	if (a.length > b.length) { tmp = a; a = b; b = tmp; }

	var i, j, res, alen = a.length, blen = b.length, row = Array(alen);
	for (i = 0; i <= alen; i++) { row[i] = i; }

	for (i = 1; i <= blen; i++) {
		res = i;
		for (j = 1; j <= alen; j++) {
			tmp = row[j - 1];
			row[j - 1] = res;
			res = b[i - 1] === a[j - 1] ? tmp : Math.min(tmp + 1, Math.min(res + 1, row[j] + 1));
		}
	}
	return res;
}
mo202 commented Apr 15, 2017

Thanks for all the hard work guys, very helpful!

J0-nas commented Apr 18, 2017

@DerekZiemba

I'm afraid there is a bug in your code. For my example ("badbadnotgood", "s") it returned 2 instead of 13.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment