Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Chuẩn hóa cách gõ dấu câu về kiểu gõ cũ (Python + Java version)
# -*- coding: utf-8 -*-
import regex as re
uniChars = "àáảãạâầấẩẫậăằắẳẵặèéẻẽẹêềếểễệđìíỉĩịòóỏõọôồốổỗộơờớởỡợùúủũụưừứửữựỳýỷỹỵÀÁẢÃẠÂẦẤẨẪẬĂẰẮẲẴẶÈÉẺẼẸÊỀẾỂỄỆĐÌÍỈĨỊÒÓỎÕỌÔỒỐỔỖỘƠỜỚỞỠỢÙÚỦŨỤƯỪỨỬỮỰỲÝỶỸỴÂĂĐÔƠƯ"
unsignChars = "aaaaaaaaaaaaaaaaaeeeeeeeeeeediiiiiooooooooooooooooouuuuuuuuuuuyyyyyAAAAAAAAAAAAAAAAAEEEEEEEEEEEDIIIOOOOOOOOOOOOOOOOOOOUUUUUUUUUUUYYYYYAADOOU"
def loaddicchar():
dic = {}
char1252 = 'à|á|ả|ã|ạ|ầ|ấ|ẩ|ẫ|ậ|ằ|ắ|ẳ|ẵ|ặ|è|é|ẻ|ẽ|ẹ|ề|ế|ể|ễ|ệ|ì|í|ỉ|ĩ|ị|ò|ó|ỏ|õ|ọ|ồ|ố|ổ|ỗ|ộ|ờ|ớ|ở|ỡ|ợ|ù|ú|ủ|ũ|ụ|ừ|ứ|ử|ữ|ự|ỳ|ý|ỷ|ỹ|ỵ|À|Á|Ả|Ã|Ạ|Ầ|Ấ|Ẩ|Ẫ|Ậ|Ằ|Ắ|Ẳ|Ẵ|Ặ|È|É|Ẻ|Ẽ|Ẹ|Ề|Ế|Ể|Ễ|Ệ|Ì|Í|Ỉ|Ĩ|Ị|Ò|Ó|Ỏ|Õ|Ọ|Ồ|Ố|Ổ|Ỗ|Ộ|Ờ|Ớ|Ở|Ỡ|Ợ|Ù|Ú|Ủ|Ũ|Ụ|Ừ|Ứ|Ử|Ữ|Ự|Ỳ|Ý|Ỷ|Ỹ|Ỵ'.split(
'|')
charutf8 = "à|á|ả|ã|ạ|ầ|ấ|ẩ|ẫ|ậ|ằ|ắ|ẳ|ẵ|ặ|è|é|ẻ|ẽ|ẹ|ề|ế|ể|ễ|ệ|ì|í|ỉ|ĩ|ị|ò|ó|ỏ|õ|ọ|ồ|ố|ổ|ỗ|ộ|ờ|ớ|ở|ỡ|ợ|ù|ú|ủ|ũ|ụ|ừ|ứ|ử|ữ|ự|ỳ|ý|ỷ|ỹ|ỵ|À|Á|Ả|Ã|Ạ|Ầ|Ấ|Ẩ|Ẫ|Ậ|Ằ|Ắ|Ẳ|Ẵ|Ặ|È|É|Ẻ|Ẽ|Ẹ|Ề|Ế|Ể|Ễ|Ệ|Ì|Í|Ỉ|Ĩ|Ị|Ò|Ó|Ỏ|Õ|Ọ|Ồ|Ố|Ổ|Ỗ|Ộ|Ờ|Ớ|Ở|Ỡ|Ợ|Ù|Ú|Ủ|Ũ|Ụ|Ừ|Ứ|Ử|Ữ|Ự|Ỳ|Ý|Ỷ|Ỹ|Ỵ".split(
'|')
for i in range(len(char1252)):
dic[char1252[i]] = charutf8[i]
return dic
dicchar = loaddicchar()
def convert_unicode(txt):
return re.sub(
r'à|á|ả|ã|ạ|ầ|ấ|ẩ|ẫ|ậ|ằ|ắ|ẳ|ẵ|ặ|è|é|ẻ|ẽ|ẹ|ề|ế|ể|ễ|ệ|ì|í|ỉ|ĩ|ị|ò|ó|ỏ|õ|ọ|ồ|ố|ổ|ỗ|ộ|ờ|ớ|ở|ỡ|ợ|ù|ú|ủ|ũ|ụ|ừ|ứ|ử|ữ|ự|ỳ|ý|ỷ|ỹ|ỵ|À|Á|Ả|Ã|Ạ|Ầ|Ấ|Ẩ|Ẫ|Ậ|Ằ|Ắ|Ẳ|Ẵ|Ặ|È|É|Ẻ|Ẽ|Ẹ|Ề|Ế|Ể|Ễ|Ệ|Ì|Í|Ỉ|Ĩ|Ị|Ò|Ó|Ỏ|Õ|Ọ|Ồ|Ố|Ổ|Ỗ|Ộ|Ờ|Ớ|Ở|Ỡ|Ợ|Ù|Ú|Ủ|Ũ|Ụ|Ừ|Ứ|Ử|Ữ|Ự|Ỳ|Ý|Ỷ|Ỹ|Ỵ',
lambda x: dicchar[x.group()], txt)
"""
Start section: Chuyển câu văn về kiểu gõ telex khi không bật Unikey
Ví dụ: thủy = thuyr, tượng = tuwowngj
"""
bang_nguyen_am = [['a', 'à', 'á', 'ả', 'ã', 'ạ', 'a'],
['ă', 'ằ', 'ắ', 'ẳ', 'ẵ', 'ặ', 'aw'],
['â', 'ầ', 'ấ', 'ẩ', 'ẫ', 'ậ', 'aa'],
['e', 'è', 'é', 'ẻ', 'ẽ', 'ẹ', 'e'],
['ê', 'ề', 'ế', 'ể', 'ễ', 'ệ', 'ee'],
['i', 'ì', 'í', 'ỉ', 'ĩ', 'ị', 'i'],
['o', 'ò', 'ó', 'ỏ', 'õ', 'ọ', 'o'],
['ô', 'ồ', 'ố', 'ổ', 'ỗ', 'ộ', 'oo'],
['ơ', 'ờ', 'ớ', 'ở', 'ỡ', 'ợ', 'ow'],
['u', 'ù', 'ú', 'ủ', 'ũ', 'ụ', 'u'],
['ư', 'ừ', 'ứ', 'ử', 'ữ', 'ự', 'uw'],
['y', 'ỳ', 'ý', 'ỷ', 'ỹ', 'ỵ', 'y']]
bang_ky_tu_dau = ['', 'f', 's', 'r', 'x', 'j']
nguyen_am_to_ids = {}
for i in range(len(bang_nguyen_am)):
for j in range(len(bang_nguyen_am[i]) - 1):
nguyen_am_to_ids[bang_nguyen_am[i][j]] = (i, j)
def vn_word_to_telex_type(word):
dau_cau = 0
new_word = ''
for char in word:
x, y = nguyen_am_to_ids.get(char, (-1, -1))
if x == -1:
new_word += char
continue
if y != 0:
dau_cau = y
new_word += bang_nguyen_am[x][-1]
new_word += bang_ky_tu_dau[dau_cau]
return new_word
def vn_sentence_to_telex_type(sentence):
"""
Chuyển câu tiếng việt có dấu về kiểu gõ telex.
:param sentence:
:return:
"""
words = sentence.split()
for index, word in enumerate(words):
words[index] = vn_word_to_telex_type(word)
return ' '.join(words)
"""
End section: Chuyển câu văn về kiểu gõ telex khi không bật Unikey
"""
"""
Start section: Chuyển câu văn về cách gõ dấu kiểu cũ: dùng òa úy thay oà uý
Xem tại đây: https://vi.wikipedia.org/wiki/Quy_t%E1%BA%AFc_%C4%91%E1%BA%B7t_d%E1%BA%A5u_thanh_trong_ch%E1%BB%AF_qu%E1%BB%91c_ng%E1%BB%AF
"""
def chuan_hoa_dau_tu_tieng_viet(word):
if not is_valid_vietnam_word(word):
return word
chars = list(word)
dau_cau = 0
nguyen_am_index = []
qu_or_gi = False
for index, char in enumerate(chars):
x, y = nguyen_am_to_ids.get(char, (-1, -1))
if x == -1:
continue
elif x == 9: # check qu
if index != 0 and chars[index - 1] == 'q':
chars[index] = 'u'
qu_or_gi = True
elif x == 5: # check gi
if index != 0 and chars[index - 1] == 'g':
chars[index] = 'i'
qu_or_gi = True
if y != 0:
dau_cau = y
chars[index] = bang_nguyen_am[x][0]
if not qu_or_gi or index != 1:
nguyen_am_index.append(index)
if len(nguyen_am_index) < 2:
if qu_or_gi:
if len(chars) == 2:
x, y = nguyen_am_to_ids.get(chars[1])
chars[1] = bang_nguyen_am[x][dau_cau]
else:
x, y = nguyen_am_to_ids.get(chars[2], (-1, -1))
if x != -1:
chars[2] = bang_nguyen_am[x][dau_cau]
else:
chars[1] = bang_nguyen_am[5][dau_cau] if chars[1] == 'i' else bang_nguyen_am[9][dau_cau]
return ''.join(chars)
return word
for index in nguyen_am_index:
x, y = nguyen_am_to_ids[chars[index]]
if x == 4 or x == 8: # ê, ơ
chars[index] = bang_nguyen_am[x][dau_cau]
# for index2 in nguyen_am_index:
# if index2 != index:
# x, y = nguyen_am_to_ids[chars[index]]
# chars[index2] = bang_nguyen_am[x][0]
return ''.join(chars)
if len(nguyen_am_index) == 2:
if nguyen_am_index[-1] == len(chars) - 1:
x, y = nguyen_am_to_ids[chars[nguyen_am_index[0]]]
chars[nguyen_am_index[0]] = bang_nguyen_am[x][dau_cau]
# x, y = nguyen_am_to_ids[chars[nguyen_am_index[1]]]
# chars[nguyen_am_index[1]] = bang_nguyen_am[x][0]
else:
# x, y = nguyen_am_to_ids[chars[nguyen_am_index[0]]]
# chars[nguyen_am_index[0]] = bang_nguyen_am[x][0]
x, y = nguyen_am_to_ids[chars[nguyen_am_index[1]]]
chars[nguyen_am_index[1]] = bang_nguyen_am[x][dau_cau]
else:
# x, y = nguyen_am_to_ids[chars[nguyen_am_index[0]]]
# chars[nguyen_am_index[0]] = bang_nguyen_am[x][0]
x, y = nguyen_am_to_ids[chars[nguyen_am_index[1]]]
chars[nguyen_am_index[1]] = bang_nguyen_am[x][dau_cau]
# x, y = nguyen_am_to_ids[chars[nguyen_am_index[2]]]
# chars[nguyen_am_index[2]] = bang_nguyen_am[x][0]
return ''.join(chars)
def is_valid_vietnam_word(word):
chars = list(word)
nguyen_am_index = -1
for index, char in enumerate(chars):
x, y = nguyen_am_to_ids.get(char, (-1, -1))
if x != -1:
if nguyen_am_index == -1:
nguyen_am_index = index
else:
if index - nguyen_am_index != 1:
return False
nguyen_am_index = index
return True
def chuan_hoa_dau_cau_tieng_viet(sentence):
"""
Chuyển câu tiếng việt về chuẩn gõ dấu kiểu cũ.
:param sentence:
:return:
"""
sentence = sentence.lower()
words = sentence.split()
for index, word in enumerate(words):
cw = re.sub(r'(^\p{P}*)([p{L}.]*\p{L}+)(\p{P}*$)', r'\1/\2/\3', word).split('/')
# print(cw)
if len(cw) == 3:
cw[1] = chuan_hoa_dau_tu_tieng_viet(cw[1])
words[index] = ''.join(cw)
return ' '.join(words)
"""
End section: Chuyển câu văn về cách gõ dấu kiểu cũ: dùng òa úy thay oà uý
Xem tại đây: https://vi.wikipedia.org/wiki/Quy_tắc_đặt_dấu_thanh_trong_chữ_quốc_ngữ
"""
if __name__ == '__main__':
print(chuan_hoa_dau_cau_tieng_viet('anh hoà, đang làm.. gì'))
# f = open('/home/lap60313/data/corpus-full.txt', encoding='utf8')
# sentence = f.readline()
# current_line = 0
# while sentence:
# current_line += 1
# if current_line % 1000 == 0:
# print('Current line', str(current_line))
# sentence = sentence.lower().strip()
# sentence = convertwindown1525toutf8(sentence)
# sentence = chuan_hoa_dau_cau_tieng_viet(sentence)
# with open('/home/lap60313/data/corpus-full.txt.out', 'a+', encoding='utf8') as fp:
# fp.write(sentence + "\n")
# sentence = f.readline()
package utils;
import java.util.*;
public class NlpUtils {
static Map<String, String> dictChar;
static Character[][] vowelTable = {
{'a', 'à', 'á', '', 'ã', ''},
{'ă', '', '', '', '', ''},
{'â', '', '', '', '', ''},
{'e', 'è', 'é', '', '', ''},
{'ê', '', 'ế', '', '', ''},
{'i', 'ì', 'í', '', 'ĩ', ''},
{'o', 'ò', 'ó', '', 'õ', ''},
{'ô', '', '', '', '', ''},
{'ơ', '', '', '', '', ''},
{'u', 'ù', 'ú', '', 'ũ', ''},
{'ư', '', '', '', '', ''},
{'y', '', 'ý', '', '', ''}
};
static Set<Character> vietnamChars;
static Map<Character, Integer> vowelLookupRow = new HashMap<>();
static Map<Character, Integer> vowelLookupColumn = new HashMap<>();
static {
dictChar = loadDictChar();
for (int i = 0; i < vowelTable.length; i++) {
for (int j = 0; j < vowelTable[i].length; j++) {
vowelLookupRow.put(vowelTable[i][j], i);
vowelLookupColumn.put(vowelTable[i][j], j);
}
}
vietnamChars = new HashSet<>(Arrays.asList('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'à', 'á', '', 'ã',
'', '', '', '', '', '', '', '', '', '', '', 'è', 'é', '', '', '', '', 'ế', '', '', '',
'ì', 'í', '', 'ĩ', '', 'ò', 'ó', '', 'õ', '', 'ô', '', '', '', '', '', '', '', '', '', '', 'ù',
'ú', '', 'ũ', '', '', '', '', '', '', '', 'ý', '', '', '', 'À', 'Á', '', 'Ã', '', '', '',
'', '', '', '', '', '', '', '', 'È', 'É', '', '', '', '', '', '', '', '', 'Ì', 'Í', '',
'Ĩ', '', 'Ò', 'Ó', '', 'Õ', '', 'Ô', '', '', '', '', '', '', '', '', '', '', 'Ù', 'Ú', '', 'Ũ',
'', '', '', '', '', '', '', 'Ý', '', '', '', 'đ', 'Đ', 'ă', 'Ă', 'â', 'Â', 'ê', 'Ê', 'ô', 'Ô', 'ơ', 'Ơ', 'ư', 'Ư'));
}
private static Map<String, String> loadDictChar() {
String[] char1252 = ("à|á|ả|ã|ạ|ầ|ấ|ẩ|ẫ|ậ|ằ|ắ|ẳ|ẵ|ặ|è|é|ẻ|ẽ|ẹ|ề|ế|ể|ễ|ệ|ì|í|ỉ|ĩ|ị|ò|ó|ỏ|õ|ọ|ồ|ố|ổ|ỗ|ộ|ờ|ớ|ở|ỡ|ợ|ù|" +
"ú|ủ|ũ|ụ|ừ|ứ|ử|ữ|ự|ỳ|ý|ỷ|ỹ|ỵ|À|Á|Ả|Ã|Ạ|Ầ|Ấ|Ẩ|Ẫ|Ậ|Ằ|Ắ|Ẳ|Ẵ|Ặ|È|É|Ẻ|Ẽ|Ẹ|Ề|Ế|Ể|Ễ|Ệ|Ì|Í|Ỉ|Ĩ|Ị|Ò|Ó|Ỏ|Õ|Ọ|Ồ|Ố|Ổ|Ỗ" +
"|Ộ|Ờ|Ớ|Ở|Ỡ|Ợ|Ù|Ú|Ủ|Ũ|Ụ|Ừ|Ứ|Ử|Ữ|Ự|Ỳ|Ý|Ỷ|Ỹ|Ỵ|Ð").split("\\|");
String[] charUTF8 = ("à|á|ả|ã|ạ|ầ|ấ|ẩ|ẫ|ậ|ằ|ắ|ẳ|ẵ|ặ|è|é|ẻ|ẽ|ẹ|ề|ế|ể|ễ|ệ|ì|í|ỉ|ĩ|ị|ò|ó|ỏ|õ|ọ|ồ|ố|ổ|ỗ|ộ|ờ|ớ|ở|ỡ|ợ|ù|ú|" +
"ủ|ũ|ụ|ừ|ứ|ử|ữ|ự|ỳ|ý|ỷ|ỹ|ỵ|À|Á|Ả|Ã|Ạ|Ầ|Ấ|Ẩ|Ẫ|Ậ|Ằ|Ắ|Ẳ|Ẵ|Ặ|È|É|Ẻ|Ẽ|Ẹ|Ề|Ế|Ể|Ễ|Ệ|Ì|Í|Ỉ|Ĩ|Ị|Ò|Ó|Ỏ|Õ|Ọ|Ồ|Ố|Ổ|Ỗ|" +
"Ộ|Ờ|Ớ|Ở|Ỡ|Ợ|Ù|Ú|Ủ|Ũ|Ụ|Ừ|Ứ|Ử|Ữ|Ự|Ỳ|Ý|Ỷ|Ỹ|Ỵ|Đ").split("\\|");
Map<String, String> dictChar = new HashMap<>();
for (int i = 0; i < char1252.length; i++) {
dictChar.put(char1252[i], charUTF8[i]);
}
return dictChar;
}
public static String convertUnicde(String sentence) {
for (String key : dictChar.keySet()) {
sentence = sentence.replaceAll(key, dictChar.get(key));
}
return sentence;
}
private static boolean isVietnamWord(String word) {
/*
* Kiểm tra có phải là từ tiếng việt, có dấu
* Input word cần lowerCase nhé
* */
boolean hasAccent = false;
int currentVowel = -1;
for (int i = 0; i < word.length(); i++) {
if (!vietnamChars.contains(word.charAt(i))) return false;
if (vowelLookupRow.containsKey(word.charAt(i))) {
if (currentVowel == -1)
currentVowel = i;
else {
if (i - currentVowel != 1) return false;
currentVowel = i;
}
if (vowelLookupColumn.get(word.charAt(i)) > 0) {
if (hasAccent) return false; // Một từ có hai thanh dấu
hasAccent = true;
}
}
}
return hasAccent;
}
private static String correctVnAccentWord(String word) {
// Tách head tail char
if (!word.matches(".*\\p{L}+.*")) {
return word;
}
word = word.replaceAll("^([^\\p{L}]*)([\\p{L}]+)([^\\p{L}]*)$", "$1 $2 $3").trim();
String[] parts = word.split("\\s+");
String head = "", tWord, tail = "";
if (parts.length == 1) {
word = parts[0];
} else if (parts.length == 2) {
if (parts[0].matches("\\p{L}+")) {
word = parts[0];
tail = parts[1];
} else {
head = parts[0];
word = parts[1];
}
} else {
head = parts[0];
word = parts[1];
tail = parts[2];
}
word = word.toLowerCase();
if (!isVietnamWord(word)) return head + word + tail;
char[] chars = word.toCharArray();
int accentPosition = 0, x, y;
boolean isQuOrGi = false;
List<Integer> vowelsIndex = new ArrayList<>();
for (int i = 0; i < chars.length; i++) {
x = vowelLookupRow.getOrDefault(chars[i], -1);
y = vowelLookupColumn.getOrDefault(chars[i], -1);
if (x == -1) continue;
else if (x == 9) { // qu
if (i != 0 && chars[i - 1] == 'q') {
chars[i] = 'u';
isQuOrGi = true;
}
} else if (x == 5) { // gi
if (i != 0 && chars[i - 1] == 'g') {
chars[i] = 'i';
isQuOrGi = true;
}
}
if (y != 0) {
accentPosition = y;
chars[i] = vowelTable[x][0];
}
if (!isQuOrGi || i != 1) {
vowelsIndex.add(i);
}
}
if (vowelsIndex.size() < 2) {
if (isQuOrGi) {
if (chars.length == 2) {
x = vowelLookupRow.get(chars[1]);
chars[1] = vowelTable[x][accentPosition];
} else {
x = vowelLookupRow.getOrDefault(chars[2], -1);
if (x != -1) {
chars[2] = vowelTable[x][accentPosition];
} else {
chars[1] = (chars[1] == 'i' ? vowelTable[5][accentPosition] : vowelTable[9][accentPosition]);
}
}
return head + String.copyValueOf(chars) + tail;
}
return head + word + tail;
}
for (int index : vowelsIndex) {
x = vowelLookupRow.get(chars[index]);
if (x == 4 || x == 8) { // ê, ơ
chars[index] = vowelTable[x][accentPosition];
return head + String.copyValueOf(chars) + tail;
}
}
if (vowelsIndex.size() == 2) {
if (vowelsIndex.get(vowelsIndex.size() - 1) == chars.length - 1) {
x = vowelLookupRow.get(chars[vowelsIndex.get(0)]);
chars[vowelsIndex.get(0)] = vowelTable[x][accentPosition];
} else {
x = vowelLookupRow.get(chars[vowelsIndex.get(1)]);
chars[vowelsIndex.get(1)] = vowelTable[x][accentPosition];
}
} else {
x = vowelLookupRow.get(chars[vowelsIndex.get(1)]);
chars[vowelsIndex.get(1)] = vowelTable[x][accentPosition];
}
return head + String.copyValueOf(chars) + tail;
}
private static List<Boolean> getUpperState(String word) {
List<Boolean> uppers = new ArrayList<>();
char[] chars = word.toCharArray();
for (int i = 0; i < chars.length; i++) {
uppers.add(Character.isUpperCase(chars[i]) ? true : false);
}
return uppers;
}
private static String updateUpperState(String word, List<Boolean> uppers) {
char[] chars = word.toCharArray();
for (int i = 0; i < chars.length; i++) {
chars[i] = uppers.get(i) ? Character.toUpperCase(chars[i]) : chars[i];
}
return String.copyValueOf(chars);
}
public static String correctVnAccentSentence(String sentence) {
sentence = convertChar1252ToUtf8(sentence);
String[] words = sentence.split("\\s+");
for (int i = 0; i < words.length; i++) {
List<Boolean> uppers = getUpperState(words[i]);
try {
words[i] = updateUpperState(correctVnAccentWord(words[i]), uppers);
} catch (Exception e) {
}
}
return String.join(" ", words);
}
private static String addExtraSpace(String sent) {
sent = sent.replaceAll("([\"',.:'!?/”“\\(])(\\p{L})", "$1 $2")
.replaceAll("(\\p{L})([\"',.:'!?/”“\\)])", "$1 $2");
return sent;
}
private static String removeExtraSpace(String sent) {
sent = sent.replaceAll("([/“\\(])\\s+(\\p{L})", "$1$2")
.replaceAll("(\\p{L})\\s+([,.:!?/\\)”])", "$1$2");
return sent;
}
public static void main(String[] args) {
System.out.println(NlpUtils.correctVnAccentSentence("Cái kiểu so sánh quận Nhất, U Minh gây tranh cãi vừa rồi, khiến dân chơi thể thao nhớ tới một người: Jose Mourinho."));
}
}
@nguyenvanhieuvn

This comment has been minimized.

Copy link
Owner Author

@nguyenvanhieuvn nguyenvanhieuvn commented Feb 13, 2020

13/02/2020: Java version, Bổ sung chuẩn hóa dấu cho các từ đi kèm ký tự đặc biệt nhưng vẫn giữ nguyên các ký tự này, ví dụ: (thuỳ) => (thùy)

@nguyenvanhieuvn

This comment has been minimized.

Copy link
Owner Author

@nguyenvanhieuvn nguyenvanhieuvn commented May 28, 2020

Cập nhật phiên bản code Python, sửa lỗi chuẩn hóa dấu với các từ dính dấu ngắt câu: anh hoà, đang làm.. gì; không thay đổi cấu trúc ban đầu của câu.

@nguyenvanlinh1808

This comment has been minimized.

Copy link

@nguyenvanlinh1808 nguyenvanlinh1808 commented Sep 3, 2020

Cảm ơn nguyenvanhieuvn, cái này sẽ giúp mình rất nhiều

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment