Skip to content

Instantly share code, notes, and snippets.

@Imxset21
Last active August 29, 2015 14:06
Show Gist options
  • Save Imxset21/214a26a39e25bdacebc7 to your computer and use it in GitHub Desktop.
Save Imxset21/214a26a39e25bdacebc7 to your computer and use it in GitHub Desktop.
Portable C99 Implementation of Damerau–Levenshtein Distance Algorithm for Strings
/*
Copyright (c) 2014 Pedro Rittner
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <assert.h>
#include <string.h>
#define d(i,j) dd[(i) * (m+2) + (j)]
#define min(x,y) ((x) < (y) ? (x) : (y))
#define min3(a,b,c) ((a) < (b) ? min((a),(c)) : min((b),(c)))
#define min4(a,b,c,d) ((a) < (b) ? min3((a),(c),(d)) : min3((b),(c),(d)))
int64_t dldist2(const char* s, const char* t, int64_t n, int64_t m)
{
int64_t cost = 0;
const int64_t INFINITY = n + m;
// Sanity checks
assert(s != NULL);
assert(t != NULL);
assert(n > 0);
assert(m > 0);
// Allocate our matrices/vectors
int64_t* restrict DA = (int64_t*) calloc(256, sizeof(int64_t));
int64_t* restrict dd = (int64_t*) calloc((n + 2) * (m + 2), sizeof(int64_t));
d(0,0) = INFINITY;
for (int64_t i = 0; i < n + 1; i++)
{
d(i + 1, 1) = i;
d(i + 1, 0) = INFINITY;
}
for (int64_t j = 0; j < m + 1; j++)
{
d(1, j + 1) = j;
d(0, j + 1) = INFINITY;
}
for (int64_t i = 1; i < n + 1; i++)
{
int64_t DB = 0;
for(int64_t j = 1; j < m + 1; j++)
{
int64_t i1 = DA[t[j-1]];
int64_t j1 = DB;
cost = (s[i-1] == t[j-1]) ? 0 : 1;
if(cost==0)
{
DB = j;
}
d(i+1, j+1) =
min4(d(i,j)+cost,
d(i + 1, j) + 1,
d(i, j + 1) + 1,
d(i1, j1) + (i - i1 - 1) + 1 + (j - j1 -1));
}
DA[s[i-1]] = i;
}
// Final cost is edge of matrix
cost = d(n + 1, m + 1);
free(dd);
free(DA);
return cost;
}
int main()
{
const char* str1 = "sitting";
const char* str2 = "kitten";
const int64_t cost = dldist2(str1, str2, strlen(str1), strlen(str2));
assert(cost == 3); // Actual cost: see https://en.wikipedia.org/wiki/Levenshtein_distance#Example
printf("cost: %" PRId64 "\n", cost);
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment