Last active
October 5, 2018 22:05
-
-
Save Rostepher/818fd0ce41d0c18154ae to your computer and use it in GitHub Desktop.
A working implementation of the Damerau-Levenshtein distance in C.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// based on the implementation found at: | |
// https://stackoverflow.com/questions/10727174/damerau-levenshtein-distance-edit-distance-with-transposition-c-implementation | |
// | |
// still deciphering the alogrithm from the cryptic vairable names, hopefully thie implementation below | |
// is easier to read and understand. | |
#include <assert.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#define EQ(a, b) ((a) == (b)) | |
#define MIN(a, b) ((a) < (b)) ? (a) : (b) | |
#define MIN4(a, b, c, d) MIN(MIN(a, b), MIN(c, d)) | |
static void print_matrix(unsigned **matrix, size_t m_rows, size_t m_cols); | |
/** | |
* Calculates and returns the Damerau-Levenshtein distance of two non NULL | |
* strings. | |
* | |
* @param str1 first non NULL string | |
* @param str2 second non NULL string | |
* @param alpha_size size of the alphabet | |
* | |
* @returns Damerau-Levenshtein distance of str1 and str2 | |
*/ | |
unsigned damerau_levenshtein(const char *str1, | |
const char *str2, | |
const unsigned alpha_size) | |
{ | |
// strings cannot be NULL | |
assert(str1 != NULL); | |
assert(str2 != NULL); | |
// calculate size of strings | |
size_t str1_len = strlen(str1); | |
size_t str2_len = strlen(str2); | |
// handle cases where one or both strings are empty | |
if (str1_len == 0) | |
return str2_len; | |
if (str2_len == 0) | |
return str1_len; | |
const unsigned INFINITY = str1_len + str2_len; | |
unsigned row, col; | |
// create "dictionary" | |
unsigned *dict = calloc(alpha_size, sizeof(unsigned)); | |
size_t m_rows = str1_len + 2; // matrix rows | |
size_t m_cols = str2_len + 2; // matrix cols | |
// matrix to hold computed values | |
unsigned **matrix = malloc(m_rows * sizeof(unsigned *)); | |
for (unsigned i = 0; i < m_rows; i++) | |
matrix[i] = calloc(m_cols, sizeof(unsigned)); | |
print_matrix(matrix, m_rows, m_cols); | |
// set all the starting values and add all characters to the dict | |
matrix[0][0] = INFINITY; | |
for (row = 1; row < m_rows; row++) { | |
matrix[row][0] = INFINITY; | |
matrix[row][1] = row - 1; | |
} | |
for (col = 1; col < m_cols; col++) { | |
matrix[0][col] = INFINITY; | |
matrix[1][col] = col - 1; | |
} | |
print_matrix(matrix, m_rows, m_cols); | |
unsigned db; // no idea what this is | |
unsigned i, k; // also no idea what these are | |
unsigned cost; | |
// fill in the matrix | |
for (row = 1; row <= str1_len; row++) { | |
db = 0; | |
for (col = 1; col <= str2_len; col++) { | |
i = dict[(unsigned) str2[col - 1]]; | |
k = db; | |
cost = EQ(str1[row - 1], str2[col - 1]) ? 0 : 1; | |
if (cost == 0) | |
db = col; | |
matrix[row + 1][col + 1] = MIN4( | |
matrix[row][col] + cost, | |
matrix[row + 1][col] + 1, | |
matrix[row][col + 1] + 1, | |
matrix[i][k] + (row - i - 1) + (col - k - 1) + 1 | |
); | |
print_matrix(matrix, m_rows, m_cols); | |
} | |
dict[(unsigned) str1[row - 1]] = row; | |
} | |
unsigned result = matrix[m_rows - 1][m_cols - 1]; | |
// free dict | |
free(dict); | |
// free matrix | |
for (unsigned i = 0; i < m_rows; i++) | |
free(matrix[i]); | |
free(matrix); | |
return result; | |
} | |
#include <stdio.h> | |
static void print_matrix(unsigned **matrix, size_t m_rows, size_t m_cols) | |
{ | |
unsigned i, k; | |
for (i = 0; i < m_rows; i++) { | |
for (k = 0; k < m_cols; k++) { | |
printf("%02d ", matrix[i][k]); | |
} | |
printf("\n"); | |
} | |
printf("\n"); | |
} | |
#define ALPHABET_SIZE 256 | |
int main(int argc, char **argv) | |
{ | |
assert(argc > 2); | |
char *str1 = argv[1]; | |
char *str2 = argv[2]; | |
printf("DL(\"%s\", \"%s\") = %d\n", str1, str2, | |
damerau_levenshtein(str1, str2, ALPHABET_SIZE)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment