Last active
February 9, 2022 21:59
-
-
Save stivio00/964f1d820d276e0e6f278cce5df13da5 to your computer and use it in GitHub Desktop.
LINQ method for Edit Operations (inser, delet subst) using Levenshtein algo.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Levenshtein distance. ->. "HolA".EditDifferences("Hola").Count() | |
//Optimal edit operations ->. "HolA".EditDifferences("Hola").Reverse() | |
// returns the list of operations as a 2-tuple (op_string, item) | |
// where op_string is "+" for insert, "-" deletions and "~" substitution | |
public static IEnumerable<(string, TSource)> EditDifferences<TSource>(this IEnumerable<TSource> input, IEnumerable<TSource> old){ | |
TSource[] s1 = input.ToArray(), s2 = old.ToArray(); //cache for random access | |
int[,] matrix = new int[s1.Count()+1, s2.Count()+1]; | |
//Levenshtein matrix | |
for (int i = 0; i <= s1.Count(); i++) { | |
for (int j = 0; j <= s2.Count(); j++) { | |
if (Math.Min(i, j) == 0) matrix[i,j] = Math.Max(i, j); | |
else | |
matrix[i,j] = new[] { | |
matrix[i-1, j] + 1, | |
matrix[i, j-1] + 1, | |
matrix[i-1, j-1] + (s1[i-1].Equals(s2[j-1]) ? 0 : 1) | |
}.Min(); | |
} | |
} | |
// Backtracking (non w.) | |
int ii = input.Count(), jj = old.Count(); | |
while (ii >= 1 && jj >=1){ | |
if (matrix[ii-1,jj-1] < matrix[ii,jj]){ | |
ii--; jj--; | |
yield return ("~"+s2[jj], s1[ii] ); | |
} else if (matrix[ii,jj-1] < matrix[ii,jj]){ | |
jj--; | |
yield return ("+", s2[jj]); | |
} else if (matrix[ii-1,jj] < matrix[ii,jj]){ | |
ii--; | |
yield return ("-", s1[ii]); | |
} else if (matrix[ii,jj] == matrix[ii,jj]){ | |
ii--; jj--; | |
} else {break;} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment