Last active
December 19, 2024 07:43
-
Star
(128)
You must be signed in to star a gist -
Fork
(35)
You must be signed in to fork a gist
-
-
Save jarvisluong/f01e108e963092336f04c4b7dd6f7e45 to your computer and use it in GitHub Desktop.
Converting standard Vietnamese Characters to non-accent ones (Chuyển đổi ký tự tiếng Việt sang không dấu). Example: hải -> hai
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This function converts the string to lowercase, then perform the conversion | |
function toLowerCaseNonAccentVietnamese(str) { | |
str = str.toLowerCase(); | |
// We can also use this instead of from line 11 to line 17 | |
// str = str.replace(/\u00E0|\u00E1|\u1EA1|\u1EA3|\u00E3|\u00E2|\u1EA7|\u1EA5|\u1EAD|\u1EA9|\u1EAB|\u0103|\u1EB1|\u1EAF|\u1EB7|\u1EB3|\u1EB5/g, "a"); | |
// str = str.replace(/\u00E8|\u00E9|\u1EB9|\u1EBB|\u1EBD|\u00EA|\u1EC1|\u1EBF|\u1EC7|\u1EC3|\u1EC5/g, "e"); | |
// str = str.replace(/\u00EC|\u00ED|\u1ECB|\u1EC9|\u0129/g, "i"); | |
// str = str.replace(/\u00F2|\u00F3|\u1ECD|\u1ECF|\u00F5|\u00F4|\u1ED3|\u1ED1|\u1ED9|\u1ED5|\u1ED7|\u01A1|\u1EDD|\u1EDB|\u1EE3|\u1EDF|\u1EE1/g, "o"); | |
// str = str.replace(/\u00F9|\u00FA|\u1EE5|\u1EE7|\u0169|\u01B0|\u1EEB|\u1EE9|\u1EF1|\u1EED|\u1EEF/g, "u"); | |
// str = str.replace(/\u1EF3|\u00FD|\u1EF5|\u1EF7|\u1EF9/g, "y"); | |
// str = str.replace(/\u0111/g, "d"); | |
str = str.replace(/à|á|ạ|ả|ã|â|ầ|ấ|ậ|ẩ|ẫ|ă|ằ|ắ|ặ|ẳ|ẵ/g, "a"); | |
str = str.replace(/è|é|ẹ|ẻ|ẽ|ê|ề|ế|ệ|ể|ễ/g, "e"); | |
str = str.replace(/ì|í|ị|ỉ|ĩ/g, "i"); | |
str = str.replace(/ò|ó|ọ|ỏ|õ|ô|ồ|ố|ộ|ổ|ỗ|ơ|ờ|ớ|ợ|ở|ỡ/g, "o"); | |
str = str.replace(/ù|ú|ụ|ủ|ũ|ư|ừ|ứ|ự|ử|ữ/g, "u"); | |
str = str.replace(/ỳ|ý|ỵ|ỷ|ỹ/g, "y"); | |
str = str.replace(/đ/g, "d"); | |
// Some system encode vietnamese combining accent as individual utf-8 characters | |
str = str.replace(/\u0300|\u0301|\u0303|\u0309|\u0323/g, ""); // Huyền sắc hỏi ngã nặng | |
str = str.replace(/\u02C6|\u0306|\u031B/g, ""); // Â, Ê, Ă, Ơ, Ư | |
return str; | |
} | |
// This function keeps the casing unchanged for str, then perform the conversion | |
function toNonAccentVietnamese(str) { | |
str = str.replace(/A|Á|À|Ã|Ạ|Â|Ấ|Ầ|Ẫ|Ậ|Ă|Ắ|Ằ|Ẵ|Ặ/g, "A"); | |
str = str.replace(/à|á|ạ|ả|ã|â|ầ|ấ|ậ|ẩ|ẫ|ă|ằ|ắ|ặ|ẳ|ẵ/g, "a"); | |
str = str.replace(/E|É|È|Ẽ|Ẹ|Ê|Ế|Ề|Ễ|Ệ/, "E"); | |
str = str.replace(/è|é|ẹ|ẻ|ẽ|ê|ề|ế|ệ|ể|ễ/g, "e"); | |
str = str.replace(/I|Í|Ì|Ĩ|Ị/g, "I"); | |
str = str.replace(/ì|í|ị|ỉ|ĩ/g, "i"); | |
str = str.replace(/O|Ó|Ò|Õ|Ọ|Ô|Ố|Ồ|Ỗ|Ộ|Ơ|Ớ|Ờ|Ỡ|Ợ/g, "O"); | |
str = str.replace(/ò|ó|ọ|ỏ|õ|ô|ồ|ố|ộ|ổ|ỗ|ơ|ờ|ớ|ợ|ở|ỡ/g, "o"); | |
str = str.replace(/U|Ú|Ù|Ũ|Ụ|Ư|Ứ|Ừ|Ữ|Ự/g, "U"); | |
str = str.replace(/ù|ú|ụ|ủ|ũ|ư|ừ|ứ|ự|ử|ữ/g, "u"); | |
str = str.replace(/Y|Ý|Ỳ|Ỹ|Ỵ/g, "Y"); | |
str = str.replace(/ỳ|ý|ỵ|ỷ|ỹ/g, "y"); | |
str = str.replace(/Đ/g, "D"); | |
str = str.replace(/đ/g, "d"); | |
// Some system encode vietnamese combining accent as individual utf-8 characters | |
str = str.replace(/\u0300|\u0301|\u0303|\u0309|\u0323/g, ""); // Huyền sắc hỏi ngã nặng | |
str = str.replace(/\u02C6|\u0306|\u031B/g, ""); // Â, Ê, Ă, Ơ, Ư | |
return str; | |
} |
The gist has been updated with a new version that support character casing, with credits to @doraeminemon
thank
Thank you so much!!
nicee
thank you so much!
có cách nào convert không dấu thành có dấu không
Thank you so much!
Thank you so much !
what about from english to vietnamese ?
@doraeminemon
cam on anh rat nhieu
it not working
input: MẪU BIÊN BẢN HỌP XEM LẠI BĂNG HÌNH.doc
output: MAU BIEN BẢN HOP XEM LAI BANG HINH.doc
thanks
MẪU BIÊN BẢN HỌP XEM LẠI BĂNG HÌNH.doc
function toNonAccentVietnamese(str) {
str = str.replace(/A|Á|À|Ả|Ã|Ạ|Â|Ấ|Ầ|Ẩ|Ẫ|Ậ|Ă|Ắ|Ằ|Ẳ|Ẵ|Ặ/g, "A");
str = str.replace(/à|á|ạ|ả|ã|â|ầ|ấ|ậ|ẩ|ẫ|ậ|ă|ằ|ắ|ặ|ẳ|ẵ/g, "a");
<same_for_remaining>
...
Thiếu mấy chữ dấu hỏi
function toNonAccentVietnamese(str) {
return decodeURI(str).normalize('NFC')
.replace(/Á|À|Ả|Ã|Ạ|Â|Ấ|Ầ|Ẫ|Ẩ|Ậ|Ă|Ắ|Ằ|Ẵ|Ặ|ẳ/g, "A")
.replace(/á|à|ả|ã|ạ|â|ấ|ầ|ẫ|ẩ|ậ|ă|ắ|ằ|ẵ|ặ|ẳ/g, "a")
.replace(/É|È|Ẻ|Ẽ|Ẹ|Ê|Ế|Ề|Ể|Ễ|Ệ/g, "E")
.replace(/é|è|ẻ|ẽ|ẹ|ê|ề|ế|ệ|ể|ễ/g, "e")
.replace(/Í|Ì|Ỉ|Ĩ|Ị/g, "I")
.replace(/ì|í|ị|ỉ|ĩ/g, "i")
.replace(/Ó|Ò|Ỏ|Õ|Ọ|Ô|Ố|Ồ|Ỗ|Ộ|Ổ|Ơ|Ớ|Ờ|Ỡ|Ợ|Ở/g, "O")
.replace(/ò|ó|ọ|ỏ|õ|ô|ồ|ố|ộ|ổ|ỗ|ơ|ờ|ớ|ợ|ở|ỡ/g, "o")
.replace(/Ú|Ù|Ủ|Ũ|Ụ|Ư|Ứ|Ừ|Ữ|Ử|Ự/g, "U")
.replace(/ù|ú|ụ|ủ|ũ|ư|ừ|ứ|ự|ử|ữ/g, "u")
.replace(/Ý|Ỳ|Ỷ|Ỹ|Ỵ/g, "Y")
.replace(/ỳ|ý|ỵ|ỷ|ỹ/g, "y")
.replace(/Đ/g, "D")
.replace(/đ/g, "d")
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you <3