Skip to content

Instantly share code, notes, and snippets.

@stivio00
Last active February 9, 2022 21:59
Show Gist options
  • Save stivio00/964f1d820d276e0e6f278cce5df13da5 to your computer and use it in GitHub Desktop.
Save stivio00/964f1d820d276e0e6f278cce5df13da5 to your computer and use it in GitHub Desktop.
LINQ method for Edit Operations (inser, delet subst) using Levenshtein algo.
//Levenshtein distance. ->. "HolA".EditDifferences("Hola").Count()
//Optimal edit operations ->. "HolA".EditDifferences("Hola").Reverse()
// returns the list of operations as a 2-tuple (op_string, item)
// where op_string is "+" for insert, "-" deletions and "~" substitution
public static IEnumerable<(string, TSource)> EditDifferences<TSource>(this IEnumerable<TSource> input, IEnumerable<TSource> old){
TSource[] s1 = input.ToArray(), s2 = old.ToArray(); //cache for random access
int[,] matrix = new int[s1.Count()+1, s2.Count()+1];
//Levenshtein matrix
for (int i = 0; i <= s1.Count(); i++) {
for (int j = 0; j <= s2.Count(); j++) {
if (Math.Min(i, j) == 0) matrix[i,j] = Math.Max(i, j);
else
matrix[i,j] = new[] {
matrix[i-1, j] + 1,
matrix[i, j-1] + 1,
matrix[i-1, j-1] + (s1[i-1].Equals(s2[j-1]) ? 0 : 1)
}.Min();
}
}
// Backtracking (non w.)
int ii = input.Count(), jj = old.Count();
while (ii >= 1 && jj >=1){
if (matrix[ii-1,jj-1] < matrix[ii,jj]){
ii--; jj--;
yield return ("~"+s2[jj], s1[ii] );
} else if (matrix[ii,jj-1] < matrix[ii,jj]){
jj--;
yield return ("+", s2[jj]);
} else if (matrix[ii-1,jj] < matrix[ii,jj]){
ii--;
yield return ("-", s1[ii]);
} else if (matrix[ii,jj] == matrix[ii,jj]){
ii--; jj--;
} else {break;}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment