Last active
November 2, 2015 12:55
-
-
Save superlucky8848/20a490c7865e2f41f41d to your computer and use it in GitHub Desktop.
Compair two List<string> find minium changes from source to target. (Add, remove and modify count as a change), return one of the optimal change steps.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Text; | |
namespace ProofReadingCorrect | |
{ | |
enum EditType | |
{ | |
NONE = 0, | |
CHANGE = 1, | |
ADD = 2, | |
REMOVE = 3 | |
} | |
class EditLog | |
{ | |
public EditType editType; //编辑类型 | |
public int fromId; //原串编辑位置(用于CHANGE,ADD,REMOVE) | |
public int toId; //目标串编辑位置(用于CHANEG,ADD) | |
public EditLog(EditType _editType, int _formId, int _toId) | |
{ | |
editType = _editType; | |
fromId = _formId; | |
toId = _toId; | |
} | |
} | |
class StringDistance | |
{ | |
/// <summary> | |
/// Get minium change distance from source to target.(Use Lavenshtein Distance Algorithm) | |
/// </summary> | |
/// <param name="source">Source String</param> | |
/// <param name="target">Target String</param> | |
/// <returns>min distance.</returns> | |
public static int GetMinDistanceStr(string source, string target) | |
{ | |
if (source.Length == 0) return target.Length; | |
if (target.Length == 0) return source.Length; | |
int[,] dist = new int[source.Length + 1, target.Length + 1]; | |
for (int i = 1; i <= source.Length; i++) dist[i, 0] = i; | |
for (int j = 1; j <= target.Length; ++j) dist[0, j] = j; | |
for (int j = 1; j <= target.Length; ++j) | |
for (int i = 1; i <= source.Length; ++i) | |
{ | |
if (source[i - 1] == target[j - 1]) | |
dist[i, j] = dist[i - 1, j - 1]; | |
else //dist[i,j]=min(dist[i-1,j],dist[i,j-1],dist[i-1,j-1]); | |
{ | |
dist[i, j] = dist[i - 1, j - 1] + 1; | |
if (dist[i, j] > dist[i - 1, j] + 1) dist[i, j] = dist[i - 1, j] + 1; | |
if (dist[i, j] > dist[i, j - 1] + 1) dist[i, j] = dist[i, j - 1] + 1; | |
} | |
} | |
return dist[source.Length, target.Length]; | |
} | |
/// <summary> | |
/// Get minium change distance from source to target.(Use Lavenshtein Distance Algorithm) | |
/// </summary> | |
/// <param name="source">Source String</param> | |
/// <param name="target">Target String</param> | |
/// <returns>The steps of changes, it's from bigger index to smaller index so no need to recalculate index offset</returns> | |
public static List<EditLog> GetMinChangeStr(string source, string target) | |
{ | |
List<EditLog> editLog = new List<EditLog>(); | |
int[,] dist = new int[source.Length + 1, target.Length + 1]; | |
EditLog[,] log = new EditLog[source.Length + 1, target.Length + 1]; | |
dist[0, 0] = 0; | |
log[0, 0] = new EditLog(EditType.NONE, 0, 0); | |
for (int i = 1; i <= source.Length; i++) | |
{ | |
dist[i, 0] = i; | |
log[i, 0] = new EditLog(EditType.REMOVE, i - 1, 0); | |
} | |
for (int j = 1; j <= target.Length; j++) | |
{ | |
dist[0, j] = j; | |
log[0, j] = new EditLog(EditType.ADD, 0, j - 1); | |
} | |
for (int j = 1; j <= target.Length; j++) | |
for (int i = 1; i <= source.Length; i++) | |
{ | |
if (source[i - 1] == target[j - 1]) | |
{ | |
dist[i, j] = dist[i - 1, j - 1]; | |
log[i, j] = new EditLog(EditType.NONE, i - 1, j - 1); | |
} | |
else | |
{ | |
int da = dist[i, j - 1] + 1; //Distange for add | |
int dc = dist[i - 1, j - 1] + 1;//Distance for change | |
int dr = dist[i - 1, j] + 1; //Distange for remove | |
if (dc <= da && dc <= dr) | |
{ | |
dist[i, j] = dc; | |
log[i, j] = new EditLog(EditType.CHANGE, i - 1, j - 1); | |
} | |
else if (dr <= da && dr <= dc) | |
{ | |
dist[i, j] = dr; | |
log[i, j] = new EditLog(EditType.REMOVE, i - 1, j - 1); | |
} | |
else | |
{ | |
dist[i, j] = da; | |
log[i, j] = new EditLog(EditType.ADD, i - 1, j - 1); | |
} | |
} | |
} | |
int m = source.Length; | |
int n = target.Length; | |
while (m > 0 || n > 0) | |
{ | |
switch (log[m, n].editType) | |
{ | |
case EditType.NONE: | |
m--; | |
n--; | |
break; | |
case EditType.CHANGE: | |
editLog.Add(log[m, n]); | |
m--; | |
n--; | |
break; | |
case EditType.ADD: | |
editLog.Add(log[m, n]); | |
n--; | |
break; | |
case EditType.REMOVE: | |
editLog.Add(log[m, n]); | |
m--; | |
break; | |
} | |
} | |
return editLog; | |
} | |
/// <summary> | |
/// Get minium change distance from source to target.(Use Lavenshtein Distance Algorithm) | |
/// </summary> | |
/// <param name="source">souce string list</param> | |
/// <param name="target">target string list</param> | |
/// <returns>min distance.</returns> | |
public static int GetMinDistanceList(List<string> source, List<string> target) | |
{ | |
source.Insert(0, ""); //Add an empty string in the begining, so i will start with 1 | |
target.Insert(0, ""); //Add an empty string in the begining, so j will start with 1 | |
int[,] dist = new int[source.Count, target.Count]; | |
for (int i = 1; i < source.Count; ++i) dist[i, 0] = i; | |
for (int j = 1; j < target.Count; ++j) dist[0, j] = j; | |
for (int j = 1; j < target.Count; ++j) | |
for (int i = 1; i < source.Count; ++i) | |
{ | |
if (source[i] == target[j]) | |
dist[i, j] = dist[i - 1, j - 1]; | |
else //dist[i,j]=min(dist[i-1,j],dist[i,j-1],dist[i-1,j-1]); | |
{ | |
dist[i, j] = dist[i - 1, j - 1] + 1; | |
if (dist[i, j] > dist[i - 1, j] + 1) dist[i, j] = dist[i - 1, j] + 1; | |
if (dist[i, j] > dist[i, j - 1] + 1) dist[i, j] = dist[i, j - 1] + 1; | |
} | |
} | |
return dist[source.Count, target.Count]; | |
} | |
/// <summary> | |
/// Get one of the optimal changes with minium change distance from source to target.(Use Lavenshtein Distance Algorithm); | |
/// </summary> | |
/// <param name="source">souce string list</param> | |
/// <param name="target">target string list</param> | |
/// <returns>The steps of changes, it's from bigger index to smaller index so no need to recalculate index offset</returns> | |
public static List<EditLog> GetMinChangeList(List<string> source, List<string> target) | |
{ | |
source.Insert(0, ""); //Add an empty string in the begining, so i will start with 1 | |
target.Insert(0, ""); //Add an empty string in the begining, so j will start with 1 | |
List<EditLog> editLog = new List<EditLog>(); | |
int[,] dist = new int[source.Count, target.Count]; | |
EditLog[,] log = new EditLog[source.Count, target.Count]; | |
dist[0, 0] = 0; | |
log[0, 0] = new EditLog(EditType.NONE, 0, 0); | |
for (int i = 1; i < source.Count; i++) | |
{ | |
dist[i, 0] = i; | |
log[i, 0] = new EditLog(EditType.REMOVE, i - 1, 0); | |
} | |
for (int j = 1; j < target.Count; j++) | |
{ | |
dist[0, j] = j; | |
log[0, j] = new EditLog(EditType.ADD, 0, j - 1); | |
} | |
for (int j = 1; j < target.Count; j++) | |
for (int i = 1; i < source.Count; i++) | |
{ | |
if (source[i] == target[j]) | |
{ | |
dist[i, j] = dist[i - 1, j - 1]; | |
log[i, j] = new EditLog(EditType.NONE, i - 1, j - 1); | |
} | |
else | |
{ | |
int da = dist[i, j - 1] + 1; //Distange for add | |
int dc = dist[i - 1, j - 1] + 1;//Distance for change | |
int dr = dist[i - 1, j] + 1; //Distange for remove | |
if (dc <= da && dc <= dr) | |
{ | |
dist[i, j] = dc; | |
log[i, j] = new EditLog(EditType.CHANGE, i - 1, j - 1); | |
} | |
else if (dr <= da && dr <= dc) | |
{ | |
dist[i, j] = dr; | |
log[i, j] = new EditLog(EditType.REMOVE, i - 1, j - 1); | |
} | |
else | |
{ | |
dist[i, j] = da; | |
log[i, j] = new EditLog(EditType.ADD, i - 1, j - 1); | |
} | |
} | |
} | |
int m = source.Count - 1; | |
int n = target.Count - 1; | |
while (m > 0 || n > 0) | |
{ | |
switch (log[m, n].editType) | |
{ | |
case EditType.NONE: | |
m--; | |
n--; | |
break; | |
case EditType.CHANGE: | |
editLog.Add(log[m, n]); | |
m--; | |
n--; | |
break; | |
case EditType.ADD: | |
editLog.Add(log[m, n]); | |
n--; | |
break; | |
case EditType.REMOVE: | |
editLog.Add(log[m, n]); | |
m--; | |
break; | |
} | |
} | |
source.RemoveAt(0); //Remove those empty strings inserted at the begining of the function. | |
target.RemoveAt(0); | |
return editLog; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment