Skip to content

Instantly share code, notes, and snippets.

@windwp
Last active November 27, 2015 14:37
Show Gist options
  • Save windwp/b80cf8669b65f4cd74d5 to your computer and use it in GitHub Desktop.
Save windwp/b80cf8669b65f4cd74d5 to your computer and use it in GitHub Desktop.
Loại bỏ dấu tiếng việt
public class StringUtil
{
private static readonly string[] VietnameseSigns = new string[]
{
"aAeEoOuUiIdDyY",
"áàạảãâấầậẩẫăắằặẳẵ",
"ÁÀẠẢÃÂẤẦẬẨẪĂẮẰẶẲẴ",
"éèẹẻẽêếềệểễ",
"ÉÈẸẺẼÊẾỀỆỂỄ",
"óòọỏõôốồộổỗơớờợởỡ",
"ÓÒỌỎÕÔỐỒỘỔỖƠỚỜỢỞỠ",
"úùụủũưứừựửữ",
"ÚÙỤỦŨƯỨỪỰỬỮ",
"íìịỉĩ",
"ÍÌỊỈĨ",
"đ",
"Đ",
"ýỳỵỷỹ",
"ÝỲỴỶỸ"
};
public static string RemoveSign4VietnameseString(string str)
{
//Tiến hành thay thế , lọc bỏ dấu cho chuỗi
for (int i = 1; i < VietnameseSigns.Length; i++)
{
for (int j = 0; j < VietnameseSigns[i].Length; j++)
str = str.Replace(VietnameseSigns[i][j], VietnameseSigns[0][i - 1]);
}
return str;
}
public static string RemoveVietnameseSign(string vietnamString)
{
Regex regex = new Regex(@"\p{IsCombiningDiacriticalMarks}+");
string strFormD = vietnamString.Normalize(System.Text.NormalizationForm.FormD);
return regex.Replace(strFormD, String.Empty).Replace('\u0111', 'd').Replace('\u0110', 'D');
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment