enamoria/NlpUtils.java

## nlp_utils.py
"""
  Copyright @ nguyenvanhieu.vn
  Thằng code python này không giữ được lower/upper case
  Sẽ update khi rảnh
 """
import re
import os
import sys
# from Logger import LogEventSourcing
from datetime import datetime
import dateutil.parser
import traceback
import time
import requests


# logger = LogEventSourcing()

def call_api(data, url, method, timeout=3):
    headers = {
        'content-type': "application/x-www-form-urlencoded",
        'cache-control': "no-cache",
        'postman-token': "6a410524-a8e2-79c7-bd9d-53e4b68c84c7"
    }
    response = requests.request(method, url, data=data, headers=headers, timeout=timeout)
    return response


uniChars = "àáảãạâầấẩẫậăằắẳẵặèéẻẽẹêềếểễệđìíỉĩịòóỏõọôồốổỗộơờớởỡợùúủũụưừứửữựỳýỷỹỵÀÁẢÃẠÂẦẤẨẪẬĂẰẮẲẴẶÈÉẺẼẸÊỀẾỂỄỆĐÌÍỈĨỊÒÓỎÕỌÔỒỐỔỖỘƠỜỚỞỠỢÙÚỦŨỤƯỪỨỬỮỰỲÝỶỸỴÂĂĐÔƠƯ"
unsignChars = "aaaaaaaaaaaaaaaaaeeeeeeeeeeediiiiiooooooooooooooooouuuuuuuuuuuyyyyyAAAAAAAAAAAAAAAAAEEEEEEEEEEEDIIIOOOOOOOOOOOOOOOOOOOUUUUUUUUUUUYYYYYAADOOU"


def loaddicchar():
    dic = {}
    char1252 = 'à|á|ả|ã|ạ|ầ|ấ|ẩ|ẫ|ậ|ằ|ắ|ẳ|ẵ|ặ|è|é|ẻ|ẽ|ẹ|ề|ế|ể|ễ|ệ|ì|í|ỉ|ĩ|ị|ò|ó|ỏ|õ|ọ|ồ|ố|ổ|ỗ|ộ|ờ|ớ|ở|ỡ|ợ|ù|ú|ủ|ũ|ụ|ừ|ứ|ử|ữ|ự|ỳ|ý|ỷ|ỹ|ỵ|À|Á|Ả|Ã|Ạ|Ầ|Ấ|Ẩ|Ẫ|Ậ|Ằ|Ắ|Ẳ|Ẵ|Ặ|È|É|Ẻ|Ẽ|Ẹ|Ề|Ế|Ể|Ễ|Ệ|Ì|Í|Ỉ|Ĩ|Ị|Ò|Ó|Ỏ|Õ|Ọ|Ồ|Ố|Ổ|Ỗ|Ộ|Ờ|Ớ|Ở|Ỡ|Ợ|Ù|Ú|Ủ|Ũ|Ụ|Ừ|Ứ|Ử|Ữ|Ự|Ỳ|Ý|Ỷ|Ỹ|Ỵ'.split(
        '|')
    charutf8 = "à|á|ả|ã|ạ|ầ|ấ|ẩ|ẫ|ậ|ằ|ắ|ẳ|ẵ|ặ|è|é|ẻ|ẽ|ẹ|ề|ế|ể|ễ|ệ|ì|í|ỉ|ĩ|ị|ò|ó|ỏ|õ|ọ|ồ|ố|ổ|ỗ|ộ|ờ|ớ|ở|ỡ|ợ|ù|ú|ủ|ũ|ụ|ừ|ứ|ử|ữ|ự|ỳ|ý|ỷ|ỹ|ỵ|À|Á|Ả|Ã|Ạ|Ầ|Ấ|Ẩ|Ẫ|Ậ|Ằ|Ắ|Ẳ|Ẵ|Ặ|È|É|Ẻ|Ẽ|Ẹ|Ề|Ế|Ể|Ễ|Ệ|Ì|Í|Ỉ|Ĩ|Ị|Ò|Ó|Ỏ|Õ|Ọ|Ồ|Ố|Ổ|Ỗ|Ộ|Ờ|Ớ|Ở|Ỡ|Ợ|Ù|Ú|Ủ|Ũ|Ụ|Ừ|Ứ|Ử|Ữ|Ự|Ỳ|Ý|Ỷ|Ỹ|Ỵ".split(
        '|')
    for i in range(len(char1252)):
        dic[char1252[i]] = charutf8[i]
    return dic


dicchar = loaddicchar()


def convertwindown1525toutf8(txt):
    return re.sub(
        r'à|á|ả|ã|ạ|ầ|ấ|ẩ|ẫ|ậ|ằ|ắ|ẳ|ẵ|ặ|è|é|ẻ|ẽ|ẹ|ề|ế|ể|ễ|ệ|ì|í|ỉ|ĩ|ị|ò|ó|ỏ|õ|ọ|ồ|ố|ổ|ỗ|ộ|ờ|ớ|ở|ỡ|ợ|ù|ú|ủ|ũ|ụ|ừ|ứ|ử|ữ|ự|ỳ|ý|ỷ|ỹ|ỵ|À|Á|Ả|Ã|Ạ|Ầ|Ấ|Ẩ|Ẫ|Ậ|Ằ|Ắ|Ẳ|Ẵ|Ặ|È|É|Ẻ|Ẽ|Ẹ|Ề|Ế|Ể|Ễ|Ệ|Ì|Í|Ỉ|Ĩ|Ị|Ò|Ó|Ỏ|Õ|Ọ|Ồ|Ố|Ổ|Ỗ|Ộ|Ờ|Ớ|Ở|Ỡ|Ợ|Ù|Ú|Ủ|Ũ|Ụ|Ừ|Ứ|Ử|Ữ|Ự|Ỳ|Ý|Ỷ|Ỹ|Ỵ',
        lambda x: dicchar[x.group()], txt)

"""
    Start section: Chuyển câu văn về kiểu gõ telex khi không bật Unikey
    Ví dụ: thủy = thuyr, tượng = tuwowngj
"""
bang_nguyen_am = [['a', 'à', 'á', 'ả', 'ã', 'ạ', 'a'],
                  ['ă', 'ằ', 'ắ', 'ẳ', 'ẵ', 'ặ', 'aw'],
                  ['â', 'ầ', 'ấ', 'ẩ', 'ẫ', 'ậ', 'aa'],
                  ['e', 'è', 'é', 'ẻ', 'ẽ', 'ẹ', 'e'],
                  ['ê', 'ề', 'ế', 'ể', 'ễ', 'ệ', 'ee'],
                  ['i', 'ì', 'í', 'ỉ', 'ĩ', 'ị', 'i'],
                  ['o', 'ò', 'ó', 'ỏ', 'õ', 'ọ', 'o'],
                  ['ô', 'ồ', 'ố', 'ổ', 'ỗ', 'ộ', 'oo'],
                  ['ơ', 'ờ', 'ớ', 'ở', 'ỡ', 'ợ', 'ow'],
                  ['u', 'ù', 'ú', 'ủ', 'ũ', 'ụ', 'u'],
                  ['ư', 'ừ', 'ứ', 'ử', 'ữ', 'ự', 'uw'],
                  ['y', 'ỳ', 'ý', 'ỷ', 'ỹ', 'ỵ', 'y']]
bang_ky_tu_dau = ['', 'f', 's', 'r', 'x', 'j']

nguyen_am_to_ids = {}

for i in range(len(bang_nguyen_am)):
    for j in range(len(bang_nguyen_am[i]) - 1):
        nguyen_am_to_ids[bang_nguyen_am[i][j]] = (i, j)


def vn_word_to_telex_type(word):
    dau_cau = 0
    new_word = ''
    for char in word:
        x, y = nguyen_am_to_ids.get(char, (-1, -1))
        if x == -1:
            new_word += char
            continue
        if y != 0:
            dau_cau = y
        new_word += bang_nguyen_am[x][-1]
    new_word += bang_ky_tu_dau[dau_cau]
    return new_word


def vn_sentence_to_telex_type(sentence):
    """
    Chuyển câu tiếng việt có dấu về kiểu gõ telex.
    :param sentence:
    :return:
    """
    words = sentence.split()
    for index, word in enumerate(words):
        words[index] = vn_word_to_telex_type(word)
    return ' '.join(words)


"""
    End section: Chuyển câu văn về kiểu gõ telex khi không bật Unikey
"""

"""
    Start section: Chuyển câu văn về cách gõ dấu kiểu cũ: dùng òa úy thay oà uý
    Xem tại đây: https://vi.wikipedia.org/wiki/Quy_t%E1%BA%AFc_%C4%91%E1%BA%B7t_d%E1%BA%A5u_thanh_trong_ch%E1%BB%AF_qu%E1%BB%91c_ng%E1%BB%AF
"""


def chuan_hoa_dau_tu_tieng_viet(word):
    if not is_valid_vietnam_word(word):
        return word

    chars = list(word)
    dau_cau = 0
    nguyen_am_index = []
    qu_or_gi = False
    for index, char in enumerate(chars):
        x, y = nguyen_am_to_ids.get(char, (-1, -1))
        if x == -1:
            continue
        elif x == 9:  # check qu
            if index != 0 and chars[index - 1] == 'q':
                chars[index] = 'u'
                qu_or_gi = True
        elif x == 5:  # check gi
            if index != 0 and chars[index - 1] == 'g':
                chars[index] = 'i'
                qu_or_gi = True
        if y != 0:
            dau_cau = y
            chars[index] = bang_nguyen_am[x][0]
        if not qu_or_gi or index != 1:
            nguyen_am_index.append(index)
    if len(nguyen_am_index) < 2:
        if qu_or_gi:
            if len(chars) == 2:
                x, y = nguyen_am_to_ids.get(chars[1])
                chars[1] = bang_nguyen_am[x][dau_cau]
            else:
                x, y = nguyen_am_to_ids.get(chars[2], (-1, -1))
                if x != -1:
                    chars[2] = bang_nguyen_am[x][dau_cau]
                else:
                    chars[1] = bang_nguyen_am[5][dau_cau] if chars[1] == 'i' else bang_nguyen_am[9][dau_cau]
            return ''.join(chars)
        return word

    for index in nguyen_am_index:
        x, y = nguyen_am_to_ids[chars[index]]
        if x == 4 or x == 8:  # ê, ơ
            chars[index] = bang_nguyen_am[x][dau_cau]
            # for index2 in nguyen_am_index:
            #     if index2 != index:
            #         x, y = nguyen_am_to_ids[chars[index]]
            #         chars[index2] = bang_nguyen_am[x][0]
            return ''.join(chars)

    if len(nguyen_am_index) == 2:
        if nguyen_am_index[-1] == len(chars) - 1:
            x, y = nguyen_am_to_ids[chars[nguyen_am_index[0]]]
            chars[nguyen_am_index[0]] = bang_nguyen_am[x][dau_cau]
            # x, y = nguyen_am_to_ids[chars[nguyen_am_index[1]]]
            # chars[nguyen_am_index[1]] = bang_nguyen_am[x][0]
        else:
            # x, y = nguyen_am_to_ids[chars[nguyen_am_index[0]]]
            # chars[nguyen_am_index[0]] = bang_nguyen_am[x][0]
            x, y = nguyen_am_to_ids[chars[nguyen_am_index[1]]]
            chars[nguyen_am_index[1]] = bang_nguyen_am[x][dau_cau]
    else:
        # x, y = nguyen_am_to_ids[chars[nguyen_am_index[0]]]
        # chars[nguyen_am_index[0]] = bang_nguyen_am[x][0]
        x, y = nguyen_am_to_ids[chars[nguyen_am_index[1]]]
        chars[nguyen_am_index[1]] = bang_nguyen_am[x][dau_cau]
        # x, y = nguyen_am_to_ids[chars[nguyen_am_index[2]]]
        # chars[nguyen_am_index[2]] = bang_nguyen_am[x][0]
    return ''.join(chars)


def is_valid_vietnam_word(word):
    chars = list(word)
    nguyen_am_index = -1
    for index, char in enumerate(chars):
        x, y = nguyen_am_to_ids.get(char, (-1, -1))
        if x != -1:
            if nguyen_am_index == -1:
                nguyen_am_index = index
            else:
                if index - nguyen_am_index != 1:
                    return False
                nguyen_am_index = index
    return True


def chuan_hoa_dau_cau_tieng_viet(sentence):
    """
        Chuyển câu tiếng việt về chuẩn gõ dấu kiểu cũ.
        :param sentence:
        :return:
        """
    sentence = sentence.lower()
    words = sentence.split()
    for index, word in enumerate(words):
        words[index] = chuan_hoa_dau_tu_tieng_viet(word)
    return ' '.join(words)


"""
    End section: Chuyển câu văn về cách gõ dấu kiểu cũ: dùng òa úy thay oà uý
    Xem tại đây: https://vi.wikipedia.org/wiki/Quy_t%E1%BA%AFc_%C4%91%E1%BA%B7t_d%E1%BA%A5u_thanh_trong_ch%E1%BB%AF_qu%E1%BB%91c_ng%E1%BB%AF
"""
if __name__ == '__main__':
    # with open('C:/Users/htv/Desktop/testunicode.txt') as f:
    #     content = f.read()
    #     output = decodetounicode(content)
    #     wirtefile('C:/Users/htv/Desktop/unicode.txt', output)
    txt = 'nếu ngày mai trời nắng'
    # print(is_valid_vietnam_word(txt))
    txt = chuan_hoa_dau_cau_tieng_viet(txt)
    print(txt)

## NlpUtils.java
/*
  Copyright @ nguyenvanhieu.vn
  Thằng code java này vẫn giữ được lower/upper case
  Code này ngon hơn, check đúng trường hợp cần thêm dấu thì mới thêm
  Tuy nhiên code python ở dưới không check nhưng vẫn chưa thấy bugs nào hết :v
*/
package utils;

import java.util.*;


public class NlpUtils {
    static Map<String, String> dictChar;
    static Character[][] vowelTable = {
            {'a', 'à', 'á', 'ả', 'ã', 'ạ'},
            {'ă', 'ằ', 'ắ', 'ẳ', 'ẵ', 'ặ'},
            {'â', 'ầ', 'ấ', 'ẩ', 'ẫ', 'ậ'},
            {'e', 'è', 'é', 'ẻ', 'ẽ', 'ẹ'},
            {'ê', 'ề', 'ế', 'ể', 'ễ', 'ệ'},
            {'i', 'ì', 'í', 'ỉ', 'ĩ', 'ị'},
            {'o', 'ò', 'ó', 'ỏ', 'õ', 'ọ'},
            {'ô', 'ồ', 'ố', 'ổ', 'ỗ', 'ộ'},
            {'ơ', 'ờ', 'ớ', 'ở', 'ỡ', 'ợ'},
            {'u', 'ù', 'ú', 'ủ', 'ũ', 'ụ'},
            {'ư', 'ừ', 'ứ', 'ử', 'ữ', 'ự'},
            {'y', 'ỳ', 'ý', 'ỷ', 'ỹ', 'ỵ'}
    };
    static Set<Character> vietnamChars;
    static Map<Character, Integer> vowelLookupRow = new HashMap<>();
    static Map<Character, Integer> vowelLookupColumn = new HashMap<>();

    static {
        dictChar = loadDictChar();
        for (int i = 0; i < vowelTable.length; i++) {
            for (int j = 0; j < vowelTable[i].length; j++) {
                vowelLookupRow.put(vowelTable[i][j], i);
                vowelLookupColumn.put(vowelTable[i][j], j);
            }
        }

        vietnamChars = new HashSet<>(Arrays.asList('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
                'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
                'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'à', 'á', 'ả', 'ã',
                'ạ', 'ầ', 'ấ', 'ẩ', 'ẫ', 'ậ', 'ằ', 'ắ', 'ẳ', 'ẵ', 'ặ', 'è', 'é', 'ẻ', 'ẽ', 'ẹ', 'ề', 'ế', 'ể', 'ễ', 'ệ',
                'ì', 'í', 'ỉ', 'ĩ', 'ị', 'ò', 'ó', 'ỏ', 'õ', 'ọ', 'ô', 'ồ', 'ố', 'ổ', 'ỗ', 'ộ', 'ờ', 'ớ', 'ở', 'ỡ', 'ợ', 'ù',
                'ú', 'ủ', 'ũ', 'ụ', 'ừ', 'ứ', 'ử', 'ữ', 'ự', 'ỳ', 'ý', 'ỷ', 'ỹ', 'ỵ', 'À', 'Á', 'Ả', 'Ã', 'Ạ', 'Ầ', 'Ấ',
                'Ẩ', 'Ẫ', 'Ậ', 'Ằ', 'Ắ', 'Ẳ', 'Ẵ', 'Ặ', 'È', 'É', 'Ẻ', 'Ẽ', 'Ẹ', 'Ề', 'Ế', 'Ể', 'Ễ', 'Ệ', 'Ì', 'Í', 'Ỉ',
                'Ĩ', 'Ị', 'Ò', 'Ó', 'Ỏ', 'Õ', 'Ọ', 'Ô', 'Ồ', 'Ố', 'Ổ', 'Ỗ', 'Ộ', 'Ờ', 'Ớ', 'Ở', 'Ỡ', 'Ợ', 'Ù', 'Ú', 'Ủ', 'Ũ',
                'Ụ', 'Ừ', 'Ứ', 'Ử', 'Ữ', 'Ự', 'Ỳ', 'Ý', 'Ỷ', 'Ỹ', 'Ỵ', 'đ', 'Đ', 'ă', 'Ă', 'â', 'Â', 'ê', 'Ê', 'ô', 'Ô', 'ơ', 'Ơ', 'ư', 'Ư'));
    }

    private static Map<String, String> loadDictChar() {
        String[] char1252 = ("à|á|ả|ã|ạ|ầ|ấ|ẩ|ẫ|ậ|ằ|ắ|ẳ|ẵ|ặ|è|é|ẻ|ẽ|ẹ|ề|ế|ể|ễ|ệ|ì|í|ỉ|ĩ|ị|ò|ó|ỏ|õ|ọ|ồ|ố|ổ|ỗ|ộ|ờ|ớ|ở|ỡ|ợ|ù|" +
                "ú|ủ|ũ|ụ|ừ|ứ|ử|ữ|ự|ỳ|ý|ỷ|ỹ|ỵ|À|Á|Ả|Ã|Ạ|Ầ|Ấ|Ẩ|Ẫ|Ậ|Ằ|Ắ|Ẳ|Ẵ|Ặ|È|É|Ẻ|Ẽ|Ẹ|Ề|Ế|Ể|Ễ|Ệ|Ì|Í|Ỉ|Ĩ|Ị|Ò|Ó|Ỏ|Õ|Ọ|Ồ|Ố|Ổ|Ỗ" +
                "|Ộ|Ờ|Ớ|Ở|Ỡ|Ợ|Ù|Ú|Ủ|Ũ|Ụ|Ừ|Ứ|Ử|Ữ|Ự|Ỳ|Ý|Ỷ|Ỹ|Ỵ").split("\\|");
        String[] charUTF8 = ("à|á|ả|ã|ạ|ầ|ấ|ẩ|ẫ|ậ|ằ|ắ|ẳ|ẵ|ặ|è|é|ẻ|ẽ|ẹ|ề|ế|ể|ễ|ệ|ì|í|ỉ|ĩ|ị|ò|ó|ỏ|õ|ọ|ồ|ố|ổ|ỗ|ộ|ờ|ớ|ở|ỡ|ợ|ù|ú|" +
                "ủ|ũ|ụ|ừ|ứ|ử|ữ|ự|ỳ|ý|ỷ|ỹ|ỵ|À|Á|Ả|Ã|Ạ|Ầ|Ấ|Ẩ|Ẫ|Ậ|Ằ|Ắ|Ẳ|Ẵ|Ặ|È|É|Ẻ|Ẽ|Ẹ|Ề|Ế|Ể|Ễ|Ệ|Ì|Í|Ỉ|Ĩ|Ị|Ò|Ó|Ỏ|Õ|Ọ|Ồ|Ố|Ổ|Ỗ|" +
                "Ộ|Ờ|Ớ|Ở|Ỡ|Ợ|Ù|Ú|Ủ|Ũ|Ụ|Ừ|Ứ|Ử|Ữ|Ự|Ỳ|Ý|Ỷ|Ỹ|Ỵ").split("\\|");

        Map<String, String> dictChar = new HashMap<>();
        for (int i = 0; i < char1252.length; i++) {
            dictChar.put(char1252[i], charUTF8[i]);
        }
        return dictChar;
    }


    public static String convertChar1252ToUtf8(String sentence) {
        for (String key : dictChar.keySet()) {
            sentence = sentence.replaceAll(key, dictChar.get(key));
        }
        return sentence;
    }

    private static boolean isVietnamWord(String word) {
        /*
         * Kiểm tra có phải là từ tiếng việt, có dấu
         * Input word cần lowerCase nhé
         * */
        boolean hasAccent = false;
        int currentVowel = -1;
        for (int i = 0; i < word.length(); i++) {
            if (!vietnamChars.contains(word.charAt(i))) return false;
            if (vowelLookupRow.containsKey(word.charAt(i))) {
                if (currentVowel == -1)
                    currentVowel = i;
                else {
                    if (i - currentVowel != 1) return false;
                    currentVowel = i;
                }
                if (vowelLookupColumn.get(word.charAt(i)) > 0) {
                    if (hasAccent) return false; // Một từ có hai thanh dấu
                    hasAccent = true;
                }
            }
        }
        return hasAccent;
    }

    private static String correctVnAccentWord(String word) {
        word = word.toLowerCase();
        if (!isVietnamWord(word)) return word;

        char[] chars = word.toCharArray();
        int accentPosition = 0, x, y;
        boolean isQuOrGi = false;

        List<Integer> vowelsIndex = new ArrayList<>();
        for (int i = 0; i < chars.length; i++) {
            x = vowelLookupRow.getOrDefault(chars[i], -1);
            y = vowelLookupColumn.getOrDefault(chars[i], -1);

            if (x == -1) continue;
            else if (x == 9) { // qu
                if (i != 0 && chars[i - 1] == 'q') {
                    chars[i] = 'u';
                    isQuOrGi = true;
                }
            } else if (x == 5) { // gi
                if (i != 0 && chars[i - 1] == 'g') {
                    chars[i] = 'i';
                    isQuOrGi = true;
                }
            }
            if (y != 0) {
                accentPosition = y;
                chars[i] = vowelTable[x][0];
            }
            if (!isQuOrGi || i != 1) {
                vowelsIndex.add(i);
            }
        }
        if (vowelsIndex.size() < 2) {
            if (isQuOrGi) {
                if (chars.length == 2) {
                    x = vowelLookupRow.get(chars[1]);
                    chars[1] = vowelTable[x][accentPosition];
                } else {
                    x = vowelLookupRow.getOrDefault(chars[2], -1);
                    if (x != -1) {
                        chars[2] = vowelTable[x][accentPosition];
                    } else {
                        chars[1] = (chars[1] == 'i' ? vowelTable[5][accentPosition] : vowelTable[9][accentPosition]);
                    }
                }
                return String.copyValueOf(chars);
            }
            return word;
        }
        for (int index : vowelsIndex) {
            x = vowelLookupRow.get(chars[index]);
            if (x == 4 || x == 8) { // ê, ơ
                chars[index] = vowelTable[x][accentPosition];
                return String.copyValueOf(chars);
            }
        }
        if (vowelsIndex.size() == 2) {
            if (vowelsIndex.get(vowelsIndex.size() - 1) == chars.length - 1) {
                x = vowelLookupRow.get(chars[vowelsIndex.get(0)]);
                chars[vowelsIndex.get(0)] = vowelTable[x][accentPosition];
            } else {
                x = vowelLookupRow.get(chars[vowelsIndex.get(1)]);
                chars[vowelsIndex.get(1)] = vowelTable[x][accentPosition];
            }
        } else {
            x = vowelLookupRow.get(chars[vowelsIndex.get(1)]);
            chars[vowelsIndex.get(1)] = vowelTable[x][accentPosition];
        }
        return String.copyValueOf(chars);
    }

    private static List<Boolean> getUpperState(String word) {
        List<Boolean> uppers = new ArrayList<>();
        for (char c : word.toCharArray()) {
            uppers.add(Character.isUpperCase(c) ? true : false);
        }
        return uppers;
    }

    private static String updateUpperState(String word, List<Boolean> uppers) {
        char[] chars = word.toCharArray();
        for (int i = 0; i < chars.length; i++) {
            chars[i] = uppers.get(i) ? Character.toUpperCase(chars[i]) : chars[i];
        }
        return String.copyValueOf(chars);
    }

    public static String correctVnAccentSentence(String sentence) {
        String[] words = sentence.split("\\s+");
        for (int i = 0; i < words.length; i++) {
            List<Boolean> uppers = getUpperState(words[i]);
            words[i] = updateUpperState(correctVnAccentWord(words[i]), uppers);
        }
        return String.join(" ", words);
    }

    public static void main(String[] args) {
        System.out.println(NlpUtils.correctVnAccentSentence("chuôí"));
    }
}
	"""
	Copyright @ nguyenvanhieu.vn
	Thằng code python này không giữ được lower/upper case
	Sẽ update khi rảnh
	"""
	import re
	import os
	import sys
	# from Logger import LogEventSourcing
	from datetime import datetime
	import dateutil.parser
	import traceback
	import time
	import requests


	# logger = LogEventSourcing()

	def call_api(data, url, method, timeout=3):
	headers = {
	'content-type': "application/x-www-form-urlencoded",
	'cache-control': "no-cache",
	'postman-token': "6a410524-a8e2-79c7-bd9d-53e4b68c84c7"
	}
	response = requests.request(method, url, data=data, headers=headers, timeout=timeout)
	return response


	uniChars = "àáảãạâầấẩẫậăằắẳẵặèéẻẽẹêềếểễệđìíỉĩịòóỏõọôồốổỗộơờớởỡợùúủũụưừứửữựỳýỷỹỵÀÁẢÃẠÂẦẤẨẪẬĂẰẮẲẴẶÈÉẺẼẸÊỀẾỂỄỆĐÌÍỈĨỊÒÓỎÕỌÔỒỐỔỖỘƠỜỚỞỠỢÙÚỦŨỤƯỪỨỬỮỰỲÝỶỸỴÂĂĐÔƠƯ"
	unsignChars = "aaaaaaaaaaaaaaaaaeeeeeeeeeeediiiiiooooooooooooooooouuuuuuuuuuuyyyyyAAAAAAAAAAAAAAAAAEEEEEEEEEEEDIIIOOOOOOOOOOOOOOOOOOOUUUUUUUUUUUYYYYYAADOOU"


	def loaddicchar():
	dic = {}
	char1252 = 'à\|á\|ả\|ã\|ạ\|ầ\|ấ\|ẩ\|ẫ\|ậ\|ằ\|ắ\|ẳ\|ẵ\|ặ\|è\|é\|ẻ\|ẽ\|ẹ\|ề\|ế\|ể\|ễ\|ệ\|ì\|í\|ỉ\|ĩ\|ị\|ò\|ó\|ỏ\|õ\|ọ\|ồ\|ố\|ổ\|ỗ\|ộ\|ờ\|ớ\|ở\|ỡ\|ợ\|ù\|ú\|ủ\|ũ\|ụ\|ừ\|ứ\|ử\|ữ\|ự\|ỳ\|ý\|ỷ\|ỹ\|ỵ\|À\|Á\|Ả\|Ã\|Ạ\|Ầ\|Ấ\|Ẩ\|Ẫ\|Ậ\|Ằ\|Ắ\|Ẳ\|Ẵ\|Ặ\|È\|É\|Ẻ\|Ẽ\|Ẹ\|Ề\|Ế\|Ể\|Ễ\|Ệ\|Ì\|Í\|Ỉ\|Ĩ\|Ị\|Ò\|Ó\|Ỏ\|Õ\|Ọ\|Ồ\|Ố\|Ổ\|Ỗ\|Ộ\|Ờ\|Ớ\|Ở\|Ỡ\|Ợ\|Ù\|Ú\|Ủ\|Ũ\|Ụ\|Ừ\|Ứ\|Ử\|Ữ\|Ự\|Ỳ\|Ý\|Ỷ\|Ỹ\|Ỵ'.split(
	'\|')
	charutf8 = "à\|á\|ả\|ã\|ạ\|ầ\|ấ\|ẩ\|ẫ\|ậ\|ằ\|ắ\|ẳ\|ẵ\|ặ\|è\|é\|ẻ\|ẽ\|ẹ\|ề\|ế\|ể\|ễ\|ệ\|ì\|í\|ỉ\|ĩ\|ị\|ò\|ó\|ỏ\|õ\|ọ\|ồ\|ố\|ổ\|ỗ\|ộ\|ờ\|ớ\|ở\|ỡ\|ợ\|ù\|ú\|ủ\|ũ\|ụ\|ừ\|ứ\|ử\|ữ\|ự\|ỳ\|ý\|ỷ\|ỹ\|ỵ\|À\|Á\|Ả\|Ã\|Ạ\|Ầ\|Ấ\|Ẩ\|Ẫ\|Ậ\|Ằ\|Ắ\|Ẳ\|Ẵ\|Ặ\|È\|É\|Ẻ\|Ẽ\|Ẹ\|Ề\|Ế\|Ể\|Ễ\|Ệ\|Ì\|Í\|Ỉ\|Ĩ\|Ị\|Ò\|Ó\|Ỏ\|Õ\|Ọ\|Ồ\|Ố\|Ổ\|Ỗ\|Ộ\|Ờ\|Ớ\|Ở\|Ỡ\|Ợ\|Ù\|Ú\|Ủ\|Ũ\|Ụ\|Ừ\|Ứ\|Ử\|Ữ\|Ự\|Ỳ\|Ý\|Ỷ\|Ỹ\|Ỵ".split(
	'\|')
	for i in range(len(char1252)):
	dic[char1252[i]] = charutf8[i]
	return dic


	dicchar = loaddicchar()


	def convertwindown1525toutf8(txt):
	return re.sub(
	r'à\|á\|ả\|ã\|ạ\|ầ\|ấ\|ẩ\|ẫ\|ậ\|ằ\|ắ\|ẳ\|ẵ\|ặ\|è\|é\|ẻ\|ẽ\|ẹ\|ề\|ế\|ể\|ễ\|ệ\|ì\|í\|ỉ\|ĩ\|ị\|ò\|ó\|ỏ\|õ\|ọ\|ồ\|ố\|ổ\|ỗ\|ộ\|ờ\|ớ\|ở\|ỡ\|ợ\|ù\|ú\|ủ\|ũ\|ụ\|ừ\|ứ\|ử\|ữ\|ự\|ỳ\|ý\|ỷ\|ỹ\|ỵ\|À\|Á\|Ả\|Ã\|Ạ\|Ầ\|Ấ\|Ẩ\|Ẫ\|Ậ\|Ằ\|Ắ\|Ẳ\|Ẵ\|Ặ\|È\|É\|Ẻ\|Ẽ\|Ẹ\|Ề\|Ế\|Ể\|Ễ\|Ệ\|Ì\|Í\|Ỉ\|Ĩ\|Ị\|Ò\|Ó\|Ỏ\|Õ\|Ọ\|Ồ\|Ố\|Ổ\|Ỗ\|Ộ\|Ờ\|Ớ\|Ở\|Ỡ\|Ợ\|Ù\|Ú\|Ủ\|Ũ\|Ụ\|Ừ\|Ứ\|Ử\|Ữ\|Ự\|Ỳ\|Ý\|Ỷ\|Ỹ\|Ỵ',
	lambda x: dicchar[x.group()], txt)

	"""
	Start section: Chuyển câu văn về kiểu gõ telex khi không bật Unikey
	Ví dụ: thủy = thuyr, tượng = tuwowngj
	"""
	bang_nguyen_am = [['a', 'à', 'á', 'ả', 'ã', 'ạ', 'a'],
	['ă', 'ằ', 'ắ', 'ẳ', 'ẵ', 'ặ', 'aw'],
	['â', 'ầ', 'ấ', 'ẩ', 'ẫ', 'ậ', 'aa'],
	['e', 'è', 'é', 'ẻ', 'ẽ', 'ẹ', 'e'],
	['ê', 'ề', 'ế', 'ể', 'ễ', 'ệ', 'ee'],
	['i', 'ì', 'í', 'ỉ', 'ĩ', 'ị', 'i'],
	['o', 'ò', 'ó', 'ỏ', 'õ', 'ọ', 'o'],
	['ô', 'ồ', 'ố', 'ổ', 'ỗ', 'ộ', 'oo'],
	['ơ', 'ờ', 'ớ', 'ở', 'ỡ', 'ợ', 'ow'],
	['u', 'ù', 'ú', 'ủ', 'ũ', 'ụ', 'u'],
	['ư', 'ừ', 'ứ', 'ử', 'ữ', 'ự', 'uw'],
	['y', 'ỳ', 'ý', 'ỷ', 'ỹ', 'ỵ', 'y']]
	bang_ky_tu_dau = ['', 'f', 's', 'r', 'x', 'j']

	nguyen_am_to_ids = {}

	for i in range(len(bang_nguyen_am)):
	for j in range(len(bang_nguyen_am[i]) - 1):
	nguyen_am_to_ids[bang_nguyen_am[i][j]] = (i, j)


	def vn_word_to_telex_type(word):
	dau_cau = 0
	new_word = ''
	for char in word:
	x, y = nguyen_am_to_ids.get(char, (-1, -1))
	if x == -1:
	new_word += char
	continue
	if y != 0:
	dau_cau = y
	new_word += bang_nguyen_am[x][-1]
	new_word += bang_ky_tu_dau[dau_cau]
	return new_word


	def vn_sentence_to_telex_type(sentence):
	"""
	Chuyển câu tiếng việt có dấu về kiểu gõ telex.
	:param sentence:
	:return:
	"""
	words = sentence.split()
	for index, word in enumerate(words):
	words[index] = vn_word_to_telex_type(word)
	return ' '.join(words)


	"""
	End section: Chuyển câu văn về kiểu gõ telex khi không bật Unikey
	"""

	"""
	Start section: Chuyển câu văn về cách gõ dấu kiểu cũ: dùng òa úy thay oà uý
	Xem tại đây: https://vi.wikipedia.org/wiki/Quy_t%E1%BA%AFc_%C4%91%E1%BA%B7t_d%E1%BA%A5u_thanh_trong_ch%E1%BB%AF_qu%E1%BB%91c_ng%E1%BB%AF
	"""


	def chuan_hoa_dau_tu_tieng_viet(word):
	if not is_valid_vietnam_word(word):
	return word

	chars = list(word)
	dau_cau = 0
	nguyen_am_index = []
	qu_or_gi = False
	for index, char in enumerate(chars):
	x, y = nguyen_am_to_ids.get(char, (-1, -1))
	if x == -1:
	continue
	elif x == 9: # check qu
	if index != 0 and chars[index - 1] == 'q':
	chars[index] = 'u'
	qu_or_gi = True
	elif x == 5: # check gi
	if index != 0 and chars[index - 1] == 'g':
	chars[index] = 'i'
	qu_or_gi = True
	if y != 0:
	dau_cau = y
	chars[index] = bang_nguyen_am[x][0]
	if not qu_or_gi or index != 1:
	nguyen_am_index.append(index)
	if len(nguyen_am_index) < 2:
	if qu_or_gi:
	if len(chars) == 2:
	x, y = nguyen_am_to_ids.get(chars[1])
	chars[1] = bang_nguyen_am[x][dau_cau]
	else:
	x, y = nguyen_am_to_ids.get(chars[2], (-1, -1))
	if x != -1:
	chars[2] = bang_nguyen_am[x][dau_cau]
	else:
	chars[1] = bang_nguyen_am[5][dau_cau] if chars[1] == 'i' else bang_nguyen_am[9][dau_cau]
	return ''.join(chars)
	return word

	for index in nguyen_am_index:
	x, y = nguyen_am_to_ids[chars[index]]
	if x == 4 or x == 8: # ê, ơ
	chars[index] = bang_nguyen_am[x][dau_cau]
	# for index2 in nguyen_am_index:
	# if index2 != index:
	# x, y = nguyen_am_to_ids[chars[index]]
	# chars[index2] = bang_nguyen_am[x][0]
	return ''.join(chars)

	if len(nguyen_am_index) == 2:
	if nguyen_am_index[-1] == len(chars) - 1:
	x, y = nguyen_am_to_ids[chars[nguyen_am_index[0]]]
	chars[nguyen_am_index[0]] = bang_nguyen_am[x][dau_cau]
	# x, y = nguyen_am_to_ids[chars[nguyen_am_index[1]]]
	# chars[nguyen_am_index[1]] = bang_nguyen_am[x][0]
	else:
	# x, y = nguyen_am_to_ids[chars[nguyen_am_index[0]]]
	# chars[nguyen_am_index[0]] = bang_nguyen_am[x][0]
	x, y = nguyen_am_to_ids[chars[nguyen_am_index[1]]]
	chars[nguyen_am_index[1]] = bang_nguyen_am[x][dau_cau]
	else:
	# x, y = nguyen_am_to_ids[chars[nguyen_am_index[0]]]
	# chars[nguyen_am_index[0]] = bang_nguyen_am[x][0]
	x, y = nguyen_am_to_ids[chars[nguyen_am_index[1]]]
	chars[nguyen_am_index[1]] = bang_nguyen_am[x][dau_cau]
	# x, y = nguyen_am_to_ids[chars[nguyen_am_index[2]]]
	# chars[nguyen_am_index[2]] = bang_nguyen_am[x][0]
	return ''.join(chars)


	def is_valid_vietnam_word(word):
	chars = list(word)
	nguyen_am_index = -1
	for index, char in enumerate(chars):
	x, y = nguyen_am_to_ids.get(char, (-1, -1))
	if x != -1:
	if nguyen_am_index == -1:
	nguyen_am_index = index
	else:
	if index - nguyen_am_index != 1:
	return False
	nguyen_am_index = index
	return True


	def chuan_hoa_dau_cau_tieng_viet(sentence):
	"""
	Chuyển câu tiếng việt về chuẩn gõ dấu kiểu cũ.
	:param sentence:
	:return:
	"""
	sentence = sentence.lower()
	words = sentence.split()
	for index, word in enumerate(words):
	words[index] = chuan_hoa_dau_tu_tieng_viet(word)
	return ' '.join(words)


	"""
	End section: Chuyển câu văn về cách gõ dấu kiểu cũ: dùng òa úy thay oà uý
	Xem tại đây: https://vi.wikipedia.org/wiki/Quy_t%E1%BA%AFc_%C4%91%E1%BA%B7t_d%E1%BA%A5u_thanh_trong_ch%E1%BB%AF_qu%E1%BB%91c_ng%E1%BB%AF
	"""
	if __name__ == '__main__':
	# with open('C:/Users/htv/Desktop/testunicode.txt') as f:
	# content = f.read()
	# output = decodetounicode(content)
	# wirtefile('C:/Users/htv/Desktop/unicode.txt', output)
	txt = 'nếu ngày mai trời nắng'
	# print(is_valid_vietnam_word(txt))
	txt = chuan_hoa_dau_cau_tieng_viet(txt)
	print(txt)
	/*
	Copyright @ nguyenvanhieu.vn
	Thằng code java này vẫn giữ được lower/upper case
	Code này ngon hơn, check đúng trường hợp cần thêm dấu thì mới thêm
	Tuy nhiên code python ở dưới không check nhưng vẫn chưa thấy bugs nào hết :v
	*/
	package utils;

	import java.util.*;


	public class NlpUtils {
	static Map<String, String> dictChar;
	static Character[][] vowelTable = {
	{'a', 'à', 'á', 'ả', 'ã', 'ạ'},
	{'ă', 'ằ', 'ắ', 'ẳ', 'ẵ', 'ặ'},
	{'â', 'ầ', 'ấ', 'ẩ', 'ẫ', 'ậ'},
	{'e', 'è', 'é', 'ẻ', 'ẽ', 'ẹ'},
	{'ê', 'ề', 'ế', 'ể', 'ễ', 'ệ'},
	{'i', 'ì', 'í', 'ỉ', 'ĩ', 'ị'},
	{'o', 'ò', 'ó', 'ỏ', 'õ', 'ọ'},
	{'ô', 'ồ', 'ố', 'ổ', 'ỗ', 'ộ'},
	{'ơ', 'ờ', 'ớ', 'ở', 'ỡ', 'ợ'},
	{'u', 'ù', 'ú', 'ủ', 'ũ', 'ụ'},
	{'ư', 'ừ', 'ứ', 'ử', 'ữ', 'ự'},
	{'y', 'ỳ', 'ý', 'ỷ', 'ỹ', 'ỵ'}
	};
	static Set<Character> vietnamChars;
	static Map<Character, Integer> vowelLookupRow = new HashMap<>();
	static Map<Character, Integer> vowelLookupColumn = new HashMap<>();

	static {
	dictChar = loadDictChar();
	for (int i = 0; i < vowelTable.length; i++) {
	for (int j = 0; j < vowelTable[i].length; j++) {
	vowelLookupRow.put(vowelTable[i][j], i);
	vowelLookupColumn.put(vowelTable[i][j], j);
	}
	}

	vietnamChars = new HashSet<>(Arrays.asList('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
	'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
	'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'à', 'á', 'ả', 'ã',
	'ạ', 'ầ', 'ấ', 'ẩ', 'ẫ', 'ậ', 'ằ', 'ắ', 'ẳ', 'ẵ', 'ặ', 'è', 'é', 'ẻ', 'ẽ', 'ẹ', 'ề', 'ế', 'ể', 'ễ', 'ệ',
	'ì', 'í', 'ỉ', 'ĩ', 'ị', 'ò', 'ó', 'ỏ', 'õ', 'ọ', 'ô', 'ồ', 'ố', 'ổ', 'ỗ', 'ộ', 'ờ', 'ớ', 'ở', 'ỡ', 'ợ', 'ù',
	'ú', 'ủ', 'ũ', 'ụ', 'ừ', 'ứ', 'ử', 'ữ', 'ự', 'ỳ', 'ý', 'ỷ', 'ỹ', 'ỵ', 'À', 'Á', 'Ả', 'Ã', 'Ạ', 'Ầ', 'Ấ',
	'Ẩ', 'Ẫ', 'Ậ', 'Ằ', 'Ắ', 'Ẳ', 'Ẵ', 'Ặ', 'È', 'É', 'Ẻ', 'Ẽ', 'Ẹ', 'Ề', 'Ế', 'Ể', 'Ễ', 'Ệ', 'Ì', 'Í', 'Ỉ',
	'Ĩ', 'Ị', 'Ò', 'Ó', 'Ỏ', 'Õ', 'Ọ', 'Ô', 'Ồ', 'Ố', 'Ổ', 'Ỗ', 'Ộ', 'Ờ', 'Ớ', 'Ở', 'Ỡ', 'Ợ', 'Ù', 'Ú', 'Ủ', 'Ũ',
	'Ụ', 'Ừ', 'Ứ', 'Ử', 'Ữ', 'Ự', 'Ỳ', 'Ý', 'Ỷ', 'Ỹ', 'Ỵ', 'đ', 'Đ', 'ă', 'Ă', 'â', 'Â', 'ê', 'Ê', 'ô', 'Ô', 'ơ', 'Ơ', 'ư', 'Ư'));
	}

	private static Map<String, String> loadDictChar() {
	String[] char1252 = ("à\|á\|ả\|ã\|ạ\|ầ\|ấ\|ẩ\|ẫ\|ậ\|ằ\|ắ\|ẳ\|ẵ\|ặ\|è\|é\|ẻ\|ẽ\|ẹ\|ề\|ế\|ể\|ễ\|ệ\|ì\|í\|ỉ\|ĩ\|ị\|ò\|ó\|ỏ\|õ\|ọ\|ồ\|ố\|ổ\|ỗ\|ộ\|ờ\|ớ\|ở\|ỡ\|ợ\|ù\|" +
	"ú\|ủ\|ũ\|ụ\|ừ\|ứ\|ử\|ữ\|ự\|ỳ\|ý\|ỷ\|ỹ\|ỵ\|À\|Á\|Ả\|Ã\|Ạ\|Ầ\|Ấ\|Ẩ\|Ẫ\|Ậ\|Ằ\|Ắ\|Ẳ\|Ẵ\|Ặ\|È\|É\|Ẻ\|Ẽ\|Ẹ\|Ề\|Ế\|Ể\|Ễ\|Ệ\|Ì\|Í\|Ỉ\|Ĩ\|Ị\|Ò\|Ó\|Ỏ\|Õ\|Ọ\|Ồ\|Ố\|Ổ\|Ỗ" +
	"\|Ộ\|Ờ\|Ớ\|Ở\|Ỡ\|Ợ\|Ù\|Ú\|Ủ\|Ũ\|Ụ\|Ừ\|Ứ\|Ử\|Ữ\|Ự\|Ỳ\|Ý\|Ỷ\|Ỹ\|Ỵ").split("\\\|");
	String[] charUTF8 = ("à\|á\|ả\|ã\|ạ\|ầ\|ấ\|ẩ\|ẫ\|ậ\|ằ\|ắ\|ẳ\|ẵ\|ặ\|è\|é\|ẻ\|ẽ\|ẹ\|ề\|ế\|ể\|ễ\|ệ\|ì\|í\|ỉ\|ĩ\|ị\|ò\|ó\|ỏ\|õ\|ọ\|ồ\|ố\|ổ\|ỗ\|ộ\|ờ\|ớ\|ở\|ỡ\|ợ\|ù\|ú\|" +
	"ủ\|ũ\|ụ\|ừ\|ứ\|ử\|ữ\|ự\|ỳ\|ý\|ỷ\|ỹ\|ỵ\|À\|Á\|Ả\|Ã\|Ạ\|Ầ\|Ấ\|Ẩ\|Ẫ\|Ậ\|Ằ\|Ắ\|Ẳ\|Ẵ\|Ặ\|È\|É\|Ẻ\|Ẽ\|Ẹ\|Ề\|Ế\|Ể\|Ễ\|Ệ\|Ì\|Í\|Ỉ\|Ĩ\|Ị\|Ò\|Ó\|Ỏ\|Õ\|Ọ\|Ồ\|Ố\|Ổ\|Ỗ\|" +
	"Ộ\|Ờ\|Ớ\|Ở\|Ỡ\|Ợ\|Ù\|Ú\|Ủ\|Ũ\|Ụ\|Ừ\|Ứ\|Ử\|Ữ\|Ự\|Ỳ\|Ý\|Ỷ\|Ỹ\|Ỵ").split("\\\|");

	Map<String, String> dictChar = new HashMap<>();
	for (int i = 0; i < char1252.length; i++) {
	dictChar.put(char1252[i], charUTF8[i]);
	}
	return dictChar;
	}


	public static String convertChar1252ToUtf8(String sentence) {
	for (String key : dictChar.keySet()) {
	sentence = sentence.replaceAll(key, dictChar.get(key));
	}
	return sentence;
	}

	private static boolean isVietnamWord(String word) {
	/*
	* Kiểm tra có phải là từ tiếng việt, có dấu
	* Input word cần lowerCase nhé
	* */
	boolean hasAccent = false;
	int currentVowel = -1;
	for (int i = 0; i < word.length(); i++) {
	if (!vietnamChars.contains(word.charAt(i))) return false;
	if (vowelLookupRow.containsKey(word.charAt(i))) {
	if (currentVowel == -1)
	currentVowel = i;
	else {
	if (i - currentVowel != 1) return false;
	currentVowel = i;
	}
	if (vowelLookupColumn.get(word.charAt(i)) > 0) {
	if (hasAccent) return false; // Một từ có hai thanh dấu
	hasAccent = true;
	}
	}
	}
	return hasAccent;
	}

	private static String correctVnAccentWord(String word) {
	word = word.toLowerCase();
	if (!isVietnamWord(word)) return word;

	char[] chars = word.toCharArray();
	int accentPosition = 0, x, y;
	boolean isQuOrGi = false;

	List<Integer> vowelsIndex = new ArrayList<>();
	for (int i = 0; i < chars.length; i++) {
	x = vowelLookupRow.getOrDefault(chars[i], -1);
	y = vowelLookupColumn.getOrDefault(chars[i], -1);

	if (x == -1) continue;
	else if (x == 9) { // qu
	if (i != 0 && chars[i - 1] == 'q') {
	chars[i] = 'u';
	isQuOrGi = true;
	}
	} else if (x == 5) { // gi
	if (i != 0 && chars[i - 1] == 'g') {
	chars[i] = 'i';
	isQuOrGi = true;
	}
	}
	if (y != 0) {
	accentPosition = y;
	chars[i] = vowelTable[x][0];
	}
	if (!isQuOrGi \|\| i != 1) {
	vowelsIndex.add(i);
	}
	}
	if (vowelsIndex.size() < 2) {
	if (isQuOrGi) {
	if (chars.length == 2) {
	x = vowelLookupRow.get(chars[1]);
	chars[1] = vowelTable[x][accentPosition];
	} else {
	x = vowelLookupRow.getOrDefault(chars[2], -1);
	if (x != -1) {
	chars[2] = vowelTable[x][accentPosition];
	} else {
	chars[1] = (chars[1] == 'i' ? vowelTable[5][accentPosition] : vowelTable[9][accentPosition]);
	}
	}
	return String.copyValueOf(chars);
	}
	return word;
	}
	for (int index : vowelsIndex) {
	x = vowelLookupRow.get(chars[index]);
	if (x == 4 \|\| x == 8) { // ê, ơ
	chars[index] = vowelTable[x][accentPosition];
	return String.copyValueOf(chars);
	}
	}
	if (vowelsIndex.size() == 2) {
	if (vowelsIndex.get(vowelsIndex.size() - 1) == chars.length - 1) {
	x = vowelLookupRow.get(chars[vowelsIndex.get(0)]);
	chars[vowelsIndex.get(0)] = vowelTable[x][accentPosition];
	} else {
	x = vowelLookupRow.get(chars[vowelsIndex.get(1)]);
	chars[vowelsIndex.get(1)] = vowelTable[x][accentPosition];
	}
	} else {
	x = vowelLookupRow.get(chars[vowelsIndex.get(1)]);
	chars[vowelsIndex.get(1)] = vowelTable[x][accentPosition];
	}
	return String.copyValueOf(chars);
	}

	private static List<Boolean> getUpperState(String word) {
	List<Boolean> uppers = new ArrayList<>();
	for (char c : word.toCharArray()) {
	uppers.add(Character.isUpperCase(c) ? true : false);
	}
	return uppers;
	}

	private static String updateUpperState(String word, List<Boolean> uppers) {
	char[] chars = word.toCharArray();
	for (int i = 0; i < chars.length; i++) {
	chars[i] = uppers.get(i) ? Character.toUpperCase(chars[i]) : chars[i];
	}
	return String.copyValueOf(chars);
	}

	public static String correctVnAccentSentence(String sentence) {
	String[] words = sentence.split("\\s+");
	for (int i = 0; i < words.length; i++) {
	List<Boolean> uppers = getUpperState(words[i]);
	words[i] = updateUpperState(correctVnAccentWord(words[i]), uppers);
	}
	return String.join(" ", words);
	}

	public static void main(String[] args) {
	System.out.println(NlpUtils.correctVnAccentSentence("chuôí"));
	}
	}