Last active
May 4, 2021 17:52
-
-
Save gandroz/19b39b7240aec08bd92c7a06f2174107 to your computer and use it in GitHub Desktop.
Levenshtein distance matrix construction
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <string.h> | |
#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c))) | |
int* min_edit_distance_tab(char *source, char *target, int ins_cost, int del_cost, int rep_cost) | |
{ | |
/* | |
Input: | |
source: a string corresponding to the string you are starting with | |
target: a string corresponding to the string you want to end with | |
ins_cost: an integer setting the insert cost | |
del_cost: an integer setting the delete cost | |
rep_cost: an integer setting the replace cost | |
Output: | |
D: a matrix of len(source)+1 by len(target)+1 containing minimum edit distances | |
*/ | |
// use deletion and insert cost as 1 | |
int nb_row = strlen(source)+1; | |
int nb_col = strlen(target)+1; | |
int row, col; | |
int r_cost; | |
int replace, delete, insert; | |
// distance matrix | |
int D[nb_row * nb_col]; | |
// initialize cost matrix | |
D[0] = 0; | |
for (row=1; row<nb_row; row++) | |
D[row*nb_col] = D[(row-1)*nb_col] + del_cost; | |
for (col=1; col<nb_col; col++) | |
D[col] = D[col-1] + ins_cost; | |
for (row=1; row<nb_row; row++) | |
for (col=1; col<nb_col; col++) | |
{ | |
// Intialize r_cost to the 'replace' cost that is passed into this function | |
r_cost = rep_cost; | |
// Check to see if source character at the previous row | |
// matches the target character at the previous column, | |
if(source[row-1] == target[col-1]) | |
{ | |
// Update the replacement cost to 0 if source and target are the same | |
r_cost = 0; | |
} | |
// Update the cost at row, col based on previous entries in the cost matrix | |
// Refer to the equation calculate for D[i,j] (the minimum of three calculated costs) | |
replace = D[(row-1)*nb_col + col-1] + r_cost; | |
delete = D[(row-1)*nb_col + col] + del_cost; | |
insert = D[row*nb_col + col-1] + ins_cost; | |
D[row*nb_col + col] = MIN3(replace, delete, insert); | |
} | |
return D; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment