Skip to content

Instantly share code, notes, and snippets.

@jarvisluong
Last active December 19, 2024 07:43

Revisions

  1. jarvisluong revised this gist May 31, 2022. No changes.
  2. jarvisluong revised this gist May 31, 2022. No changes.
  3. jarvisluong revised this gist May 31, 2022. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions nonAccentVietnamese.js
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,4 @@
    // This function converts the string to lowercase, then perform the conversion
    function toLowerCaseNonAccentVietnamese(str) {
    str = str.toLowerCase();
    // We can also use this instead of from line 11 to line 17
    @@ -21,6 +22,7 @@ function toLowerCaseNonAccentVietnamese(str) {
    return str;
    }

    // This function keeps the casing unchanged for str, then perform the conversion
    function toNonAccentVietnamese(str) {
    str = str.replace(/A|Á|À|Ã||Â|||||Ă||||/g, "A");
    str = str.replace(/à|á|||ã|â||||||ă|||||/g, "a");
  4. jarvisluong revised this gist May 31, 2022. 1 changed file with 22 additions and 1 deletion.
    23 changes: 22 additions & 1 deletion nonAccentVietnamese.js
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,4 @@
    function nonAccentVietnamese(str) {
    function toLowerCaseNonAccentVietnamese(str) {
    str = str.toLowerCase();
    // We can also use this instead of from line 11 to line 17
    // str = str.replace(/\u00E0|\u00E1|\u1EA1|\u1EA3|\u00E3|\u00E2|\u1EA7|\u1EA5|\u1EAD|\u1EA9|\u1EAB|\u0103|\u1EB1|\u1EAF|\u1EB7|\u1EB3|\u1EB5/g, "a");
    @@ -19,4 +19,25 @@ function nonAccentVietnamese(str) {
    str = str.replace(/\u0300|\u0301|\u0303|\u0309|\u0323/g, ""); // Huyền sắc hỏi ngã nặng
    str = str.replace(/\u02C6|\u0306|\u031B/g, ""); // Â, Ê, Ă, Ơ, Ư
    return str;
    }

    function toNonAccentVietnamese(str) {
    str = str.replace(/A|Á|À|Ã||Â|||||Ă||||/g, "A");
    str = str.replace(/à|á|||ã|â||||||ă|||||/g, "a");
    str = str.replace(/E|É|È|||Ê||||/, "E");
    str = str.replace(/è|é||||ê||ế|||/g, "e");
    str = str.replace(/I|Í|Ì|Ĩ|/g, "I");
    str = str.replace(/ì|í|||ĩ/g, "i");
    str = str.replace(/O|Ó|Ò|Õ||Ô|||||Ơ||||/g, "O");
    str = str.replace(/ò|ó|||õ|ô||||||ơ|||||/g, "o");
    str = str.replace(/U|Ú|Ù|Ũ||Ư||||/g, "U");
    str = str.replace(/ù|ú|||ũ|ư|||||/g, "u");
    str = str.replace(/Y|Ý|||/g, "Y");
    str = str.replace(/|ý|||/g, "y");
    str = str.replace(/Đ/g, "D");
    str = str.replace(/đ/g, "d");
    // Some system encode vietnamese combining accent as individual utf-8 characters
    str = str.replace(/\u0300|\u0301|\u0303|\u0309|\u0323/g, ""); // Huyền sắc hỏi ngã nặng
    str = str.replace(/\u02C6|\u0306|\u031B/g, ""); // Â, Ê, Ă, Ơ, Ư
    return str;
    }
  5. jarvisluong revised this gist Apr 15, 2017. 1 changed file with 8 additions and 0 deletions.
    8 changes: 8 additions & 0 deletions nonAccentVietnamese.js
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,13 @@
    function nonAccentVietnamese(str) {
    str = str.toLowerCase();
    // We can also use this instead of from line 11 to line 17
    // str = str.replace(/\u00E0|\u00E1|\u1EA1|\u1EA3|\u00E3|\u00E2|\u1EA7|\u1EA5|\u1EAD|\u1EA9|\u1EAB|\u0103|\u1EB1|\u1EAF|\u1EB7|\u1EB3|\u1EB5/g, "a");
    // str = str.replace(/\u00E8|\u00E9|\u1EB9|\u1EBB|\u1EBD|\u00EA|\u1EC1|\u1EBF|\u1EC7|\u1EC3|\u1EC5/g, "e");
    // str = str.replace(/\u00EC|\u00ED|\u1ECB|\u1EC9|\u0129/g, "i");
    // str = str.replace(/\u00F2|\u00F3|\u1ECD|\u1ECF|\u00F5|\u00F4|\u1ED3|\u1ED1|\u1ED9|\u1ED5|\u1ED7|\u01A1|\u1EDD|\u1EDB|\u1EE3|\u1EDF|\u1EE1/g, "o");
    // str = str.replace(/\u00F9|\u00FA|\u1EE5|\u1EE7|\u0169|\u01B0|\u1EEB|\u1EE9|\u1EF1|\u1EED|\u1EEF/g, "u");
    // str = str.replace(/\u1EF3|\u00FD|\u1EF5|\u1EF7|\u1EF9/g, "y");
    // str = str.replace(/\u0111/g, "d");
    str = str.replace(/à|á|||ã|â||||||ă|||||/g, "a");
    str = str.replace(/è|é||||ê||ế|||/g, "e");
    str = str.replace(/ì|í|||ĩ/g, "i");
  6. jarvisluong created this gist Apr 15, 2017.
    14 changes: 14 additions & 0 deletions nonAccentVietnamese.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,14 @@
    function nonAccentVietnamese(str) {
    str = str.toLowerCase();
    str = str.replace(/à|á|||ã|â||||||ă|||||/g, "a");
    str = str.replace(/è|é||||ê||ế|||/g, "e");
    str = str.replace(/ì|í|||ĩ/g, "i");
    str = str.replace(/ò|ó|||õ|ô||||||ơ|||||/g, "o");
    str = str.replace(/ù|ú|||ũ|ư|||||/g, "u");
    str = str.replace(/|ý|||/g, "y");
    str = str.replace(/đ/g, "d");
    // Some system encode vietnamese combining accent as individual utf-8 characters
    str = str.replace(/\u0300|\u0301|\u0303|\u0309|\u0323/g, ""); // Huyền sắc hỏi ngã nặng
    str = str.replace(/\u02C6|\u0306|\u031B/g, ""); // Â, Ê, Ă, Ơ, Ư
    return str;
    }