Skip to content

Instantly share code, notes, and snippets.

@superlucky8848
Last active November 2, 2015 12:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save superlucky8848/20a490c7865e2f41f41d to your computer and use it in GitHub Desktop.
Save superlucky8848/20a490c7865e2f41f41d to your computer and use it in GitHub Desktop.
Compair two List<string> find minium changes from source to target. (Add, remove and modify count as a change), return one of the optimal change steps.
using System;
using System.Collections.Generic;
using System.Text;
namespace ProofReadingCorrect
{
enum EditType
{
NONE = 0,
CHANGE = 1,
ADD = 2,
REMOVE = 3
}
class EditLog
{
public EditType editType; //编辑类型
public int fromId; //原串编辑位置(用于CHANGE,ADD,REMOVE)
public int toId; //目标串编辑位置(用于CHANEG,ADD)
public EditLog(EditType _editType, int _formId, int _toId)
{
editType = _editType;
fromId = _formId;
toId = _toId;
}
}
class StringDistance
{
/// <summary>
/// Get minium change distance from source to target.(Use Lavenshtein Distance Algorithm)
/// </summary>
/// <param name="source">Source String</param>
/// <param name="target">Target String</param>
/// <returns>min distance.</returns>
public static int GetMinDistanceStr(string source, string target)
{
if (source.Length == 0) return target.Length;
if (target.Length == 0) return source.Length;
int[,] dist = new int[source.Length + 1, target.Length + 1];
for (int i = 1; i <= source.Length; i++) dist[i, 0] = i;
for (int j = 1; j <= target.Length; ++j) dist[0, j] = j;
for (int j = 1; j <= target.Length; ++j)
for (int i = 1; i <= source.Length; ++i)
{
if (source[i - 1] == target[j - 1])
dist[i, j] = dist[i - 1, j - 1];
else //dist[i,j]=min(dist[i-1,j],dist[i,j-1],dist[i-1,j-1]);
{
dist[i, j] = dist[i - 1, j - 1] + 1;
if (dist[i, j] > dist[i - 1, j] + 1) dist[i, j] = dist[i - 1, j] + 1;
if (dist[i, j] > dist[i, j - 1] + 1) dist[i, j] = dist[i, j - 1] + 1;
}
}
return dist[source.Length, target.Length];
}
/// <summary>
/// Get minium change distance from source to target.(Use Lavenshtein Distance Algorithm)
/// </summary>
/// <param name="source">Source String</param>
/// <param name="target">Target String</param>
/// <returns>The steps of changes, it's from bigger index to smaller index so no need to recalculate index offset</returns>
public static List<EditLog> GetMinChangeStr(string source, string target)
{
List<EditLog> editLog = new List<EditLog>();
int[,] dist = new int[source.Length + 1, target.Length + 1];
EditLog[,] log = new EditLog[source.Length + 1, target.Length + 1];
dist[0, 0] = 0;
log[0, 0] = new EditLog(EditType.NONE, 0, 0);
for (int i = 1; i <= source.Length; i++)
{
dist[i, 0] = i;
log[i, 0] = new EditLog(EditType.REMOVE, i - 1, 0);
}
for (int j = 1; j <= target.Length; j++)
{
dist[0, j] = j;
log[0, j] = new EditLog(EditType.ADD, 0, j - 1);
}
for (int j = 1; j <= target.Length; j++)
for (int i = 1; i <= source.Length; i++)
{
if (source[i - 1] == target[j - 1])
{
dist[i, j] = dist[i - 1, j - 1];
log[i, j] = new EditLog(EditType.NONE, i - 1, j - 1);
}
else
{
int da = dist[i, j - 1] + 1; //Distange for add
int dc = dist[i - 1, j - 1] + 1;//Distance for change
int dr = dist[i - 1, j] + 1; //Distange for remove
if (dc <= da && dc <= dr)
{
dist[i, j] = dc;
log[i, j] = new EditLog(EditType.CHANGE, i - 1, j - 1);
}
else if (dr <= da && dr <= dc)
{
dist[i, j] = dr;
log[i, j] = new EditLog(EditType.REMOVE, i - 1, j - 1);
}
else
{
dist[i, j] = da;
log[i, j] = new EditLog(EditType.ADD, i - 1, j - 1);
}
}
}
int m = source.Length;
int n = target.Length;
while (m > 0 || n > 0)
{
switch (log[m, n].editType)
{
case EditType.NONE:
m--;
n--;
break;
case EditType.CHANGE:
editLog.Add(log[m, n]);
m--;
n--;
break;
case EditType.ADD:
editLog.Add(log[m, n]);
n--;
break;
case EditType.REMOVE:
editLog.Add(log[m, n]);
m--;
break;
}
}
return editLog;
}
/// <summary>
/// Get minium change distance from source to target.(Use Lavenshtein Distance Algorithm)
/// </summary>
/// <param name="source">souce string list</param>
/// <param name="target">target string list</param>
/// <returns>min distance.</returns>
public static int GetMinDistanceList(List<string> source, List<string> target)
{
source.Insert(0, ""); //Add an empty string in the begining, so i will start with 1
target.Insert(0, ""); //Add an empty string in the begining, so j will start with 1
int[,] dist = new int[source.Count, target.Count];
for (int i = 1; i < source.Count; ++i) dist[i, 0] = i;
for (int j = 1; j < target.Count; ++j) dist[0, j] = j;
for (int j = 1; j < target.Count; ++j)
for (int i = 1; i < source.Count; ++i)
{
if (source[i] == target[j])
dist[i, j] = dist[i - 1, j - 1];
else //dist[i,j]=min(dist[i-1,j],dist[i,j-1],dist[i-1,j-1]);
{
dist[i, j] = dist[i - 1, j - 1] + 1;
if (dist[i, j] > dist[i - 1, j] + 1) dist[i, j] = dist[i - 1, j] + 1;
if (dist[i, j] > dist[i, j - 1] + 1) dist[i, j] = dist[i, j - 1] + 1;
}
}
return dist[source.Count, target.Count];
}
/// <summary>
/// Get one of the optimal changes with minium change distance from source to target.(Use Lavenshtein Distance Algorithm);
/// </summary>
/// <param name="source">souce string list</param>
/// <param name="target">target string list</param>
/// <returns>The steps of changes, it's from bigger index to smaller index so no need to recalculate index offset</returns>
public static List<EditLog> GetMinChangeList(List<string> source, List<string> target)
{
source.Insert(0, ""); //Add an empty string in the begining, so i will start with 1
target.Insert(0, ""); //Add an empty string in the begining, so j will start with 1
List<EditLog> editLog = new List<EditLog>();
int[,] dist = new int[source.Count, target.Count];
EditLog[,] log = new EditLog[source.Count, target.Count];
dist[0, 0] = 0;
log[0, 0] = new EditLog(EditType.NONE, 0, 0);
for (int i = 1; i < source.Count; i++)
{
dist[i, 0] = i;
log[i, 0] = new EditLog(EditType.REMOVE, i - 1, 0);
}
for (int j = 1; j < target.Count; j++)
{
dist[0, j] = j;
log[0, j] = new EditLog(EditType.ADD, 0, j - 1);
}
for (int j = 1; j < target.Count; j++)
for (int i = 1; i < source.Count; i++)
{
if (source[i] == target[j])
{
dist[i, j] = dist[i - 1, j - 1];
log[i, j] = new EditLog(EditType.NONE, i - 1, j - 1);
}
else
{
int da = dist[i, j - 1] + 1; //Distange for add
int dc = dist[i - 1, j - 1] + 1;//Distance for change
int dr = dist[i - 1, j] + 1; //Distange for remove
if (dc <= da && dc <= dr)
{
dist[i, j] = dc;
log[i, j] = new EditLog(EditType.CHANGE, i - 1, j - 1);
}
else if (dr <= da && dr <= dc)
{
dist[i, j] = dr;
log[i, j] = new EditLog(EditType.REMOVE, i - 1, j - 1);
}
else
{
dist[i, j] = da;
log[i, j] = new EditLog(EditType.ADD, i - 1, j - 1);
}
}
}
int m = source.Count - 1;
int n = target.Count - 1;
while (m > 0 || n > 0)
{
switch (log[m, n].editType)
{
case EditType.NONE:
m--;
n--;
break;
case EditType.CHANGE:
editLog.Add(log[m, n]);
m--;
n--;
break;
case EditType.ADD:
editLog.Add(log[m, n]);
n--;
break;
case EditType.REMOVE:
editLog.Add(log[m, n]);
m--;
break;
}
}
source.RemoveAt(0); //Remove those empty strings inserted at the begining of the function.
target.RemoveAt(0);
return editLog;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment